<a href="https://colab.research.google.com/github/sqbitegh/Colabs/blob/main/phi.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install ipywidgets

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import huggingface_hub
from huggingface_hub import login, hf_hub_download
import os
from google.colab import userdata


def init_model():
    model_name = "microsoft/phi-2" # Replace with the actual model name on Hugging Face Hub
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

def init_tinyllama_model():
    #model_name = "TinyLlama/TinyLlama_v1.1_math_code"
    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

def init_bielik_1_5b_model():
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)
    model_name = "speakleash/Bielik-1.5B-v3.0-Instruct"
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

def init_bielik_1_5b_manual_model():
    model_name = "speakleash/Bielik-1.5B-v3.0-Instruct"
    # Define the directory where files will be downloaded
    local_dir = f"./{model_name.replace('/', '_')}"
    os.makedirs(local_dir, exist_ok=True)

    # Specify the files you want to download
    files_to_download = ["config.json", "pytorch_model.bin", "tokenizer.json", "tokenizer_config.json"] # Add other necessary files

    for file_name in files_to_download:
        try:
            hf_hub_download(repo_id=model_name, filename=file_name, local_dir=local_dir)
            print(f"Downloaded {file_name} to {local_dir}")
        except Exception as e:
            print(f"Error downloading {file_name}: {e}")

    # Load model and tokenizer from the local directory
    model = AutoModelForCausalLM.from_pretrained(local_dir)
    tokenizer = AutoTokenizer.from_pretrained(local_dir)
    return model, tokenizer


def init_bielik_4_5b_model():
    hf_token = userdata.get('HF_TOKEN')
    login(token=hf_token)

    model_name = "sidorovdy/bielik-4.5b-instruct"
    model = AutoModelForCausalLM.from_pretrained(model_name)
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    return model, tokenizer

In [None]:
import os
import numpy as np
import torch

def save_layer_activations(hidden_states, layer_index, output_dir="layer_activations", filename="activations.txt"):
    """
    Saves the hidden states of a specific layer to a text file.

    Args:
        hidden_states (tuple): Tuple of hidden states from all layers.
        layer_index (int): The index of the layer to save activations from.
        output_dir (str): The directory to save the output file.
        filename (str): The name of the output file.
    """
    if layer_index < len(hidden_states):
        layer_activations = hidden_states[layer_index]

        # Assuming batch size is 1 and sequence length is the second dimension
        # The shape is typically (batch_size, sequence_length, hidden_size)
        # We need to save activations for each token in the sequence

        # Create the output directory if it doesn't exist
        os.makedirs(output_dir, exist_ok=True)

        file_path = os.path.join(output_dir, filename)

        # Open in append mode to write activations from different inference calls
        with open(file_path, 'a') as f:
            # Iterate through each token in the sequence
            # Assuming batch size is 1, we access layer_activations[0]
            for token_activations in layer_activations[0].cpu().detach().numpy():
                np.savetxt(f, token_activations.reshape(1, -1), fmt='%f') # Use numpy.savetxt, reshape for 1D array
                f.write('\n') # Add a newline after each token's activations

        print(f"Activations from layer {layer_index} saved to {file_path}")
    else:
        print(f"Error: Layer index {layer_index} is out of bounds.")




def run_inference(model, tokenizer, input_prompt, activations_filename):
    # Determine if a GPU is available and set the device accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Move the model to the selected device
    model.to(device)

    input_text = input_prompt
    input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device) # Move input tensor to the device

    print(f"tokenizer.eos_token_id : {tokenizer.eos_token_id}")
    # Configure the model to return hidden states
    with torch.no_grad(): # Disable gradient calculation for inference
        outputs = model(input_ids, output_hidden_states=False)
        generated_ids = model.generate(input_ids, max_length=150) # You can adjust max_length

    #ihidden_states = outputs.hidden_states
    #for i, layer_hidden_state in enumerate(ihidden_states):
    #    print(f"  Layer {i}: {layer_hidden_state.shape}")
    #    #print(f"  Layer {i}: {layer_hidden_state}")


    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
    print("\nGenerated text:")
    print(generated_text)



    whole_text = input_text + generated_text
    whole_text_ids = tokenizer.encode(whole_text, return_tensors="pt").to(device) # Move whole_text_ids to the device

    print("whole_text_ids & tokens:")
    token_ids_list = whole_text_ids[0].tolist()

    print("Index | Token ID | Decoded Token")
    print("------|----------|--------------")

    for index, token_id in enumerate(token_ids_list):
        decoded_token = tokenizer.decode(token_id)
        print(f"{index:<5} | {token_id:<8} | {decoded_token}")


    with torch.no_grad(): # Disable gradient calculation for inference
        wholetext_outputs = model(whole_text_ids, output_hidden_states=True)
    print(f"whole_text_ids {whole_text_ids} {whole_text_ids.shape}")

    # Access the hidden states
    # outputs.hidden_states is a tuple where each element is the hidden state for a layer
    # The first element is the embedding layer output, and the last is the output before the classification head
    hidden_states = wholetext_outputs.hidden_states

    #print(f"  Layer {32}: {hidden_states[32].shape}")
    #print("Shapes of hidden states for all layers:")

    last_layer = 22 #tinyllama
    #last_layer = 32 #phi2
    save_layer_activations(hidden_states, last_layer, "layer_activations", activations_filename)

In [None]:
#model, tokenizer = init_model()

#model, tokenizer = init_tinyllama_model()
model, tokenizer = init_bielik_1_5b_model()
#model, tokenizer = init_bielik_1_5b_manual_model()




In [None]:
print(model.config)

In [None]:
#run_inference(model, tokenizer, "Your input text here")

run_inference(model, tokenizer, "please write factorial function in c, use reccurence: ", "activations_tinyllama_factor_c5.txt")

In [None]:

system_prompt = "<|system|> Generate c programs. Use recurrence. </s>"
user_prompt = "<|user|> please write factorial function.</s>"
assistant_prompt = "<|assistant|>"

In [None]:

run_inference(model, tokenizer, system_prompt + user_prompt + assistant_prompt, "activs_tinyllama_cprrec_fact.txt")
run_inference(model, tokenizer, "<|system|> Generate c programs. </s>" + user_prompt + assistant_prompt, "activs_tinyllama_cpr_fact.txt")
run_inference(model, tokenizer, system_prompt + "<|user|> please write short haiku about cucumber.</s>" + assistant_prompt, "activs_tinyllama_cprrec_cucumb.txt")
run_inference(model, tokenizer, "<|user|> please write short haiku about cucumber.</s>" + assistant_prompt, "activs_tinyllama__cucumb.txt")


In [None]:
run_inference(model, tokenizer, "Generate c programs.Do not use recurrence." + user_prompt + assistant_prompt, "activs_tinyllama_cnorec_fact.txt")
