In [1]:
import os
import torch
import re
from peft import get_peft_model
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from transformers import AutoTokenizer, AutoModelForCausalLM, AdamW,    TrainingArguments
from datasets import load_dataset
from torch.utils.data.distributed import DistributedSampler
from torch.cuda.amp import GradScaler, autocast
import torch.optim as optim
import wandb
from peft import LoraConfig
from trl import SFTTrainer
from torch.utils.data import DataLoader
import pickle
from glob import glob
import torch
from peft import PeftModel, PeftConfig

from transformers import AutoTokenizer, AutoModelForCausalLM
#from your_module import LoraConfig, get_peft_model  # Ensure you have the correct imports for LoRA

from peft import LoraConfig

#import the bits and bites optimizer again
import bitsandbytes as bnb
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

#import adamw
from transformers import AdamW
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def preprocess_data(tokenizer, examples):

    model_inputs = tokenizer(examples['question'], truncation=True, padding='max_length', max_length=64)
    
    # Tokenize the answer to create the labels
    # The labels should be the input_ids from the tokenized answer
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples['answer'], truncation=True, padding='max_length', max_length=64)
    
    model_inputs['labels'] = labels['input_ids']
    return model_inputs


In [3]:
def is_lora_layer(layer):
    return hasattr(layer, 'lora_A') and hasattr(layer, 'lora_B')

def get_lora_layers(model):
    lora_layers = []
    for name, module in model.named_modules():
        if is_lora_layer(module):
            lora_layers.append((name, module))
    return lora_layers

def generate_activations(model, input_ids, device, batch_size=8):
    activations = []
    model.to(device)  # Ensure the model is on the correct device
    lora_layers = get_lora_layers(model)
    
    def hook_fn(module, input, output):
        activations.append(output.view(output.size(0), -1).cpu().numpy())
    
    hooks = []
    for name, layer in lora_layers:
        hooks.append(layer.register_forward_hook(hook_fn))
    
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation
        for i in range(0, len(input_ids), batch_size):
            batch_input_ids = input_ids[i:i+batch_size].to(device)  # Get batch of inputs
            model(batch_input_ids)
    
    for hook in hooks:
        hook.remove()
    
    return np.concatenate(activations, axis=0)

In [4]:

token = "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"
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=token, )

tokenizer.pad_token = tokenizer.eos_token

##############TRAIN###############
# Correct dataset configuration and preprocessing
data = load_dataset("math_dataset",'algebra__linear_1d', split='train[:100]')
data = data.map(lambda e: preprocess_data(tokenizer, e), batched=True)
##############TRAIN###############

#loading in the dataset



# Set the device to GPU if available, otherwise CPU
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#only taking in the input ids
input_ids = torch.tensor(data['input_ids']).to(device)

## Loading in the Sparse Model

In [None]:

#check if 'sparse_activations_lora_A1.pkl' exisits
if os.path.exists('sparse_random_activations_lora_B1.pkl'):
    print("loading in sparse activations")
    with open('sparse_activations_lora_B1.pkl', 'rb') as f:
        sparse_activations = pickle.load(f)

else:
    print('about to get model')
    sparse_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=token, cache_dir='/workspace/.cache/huggingface/models/')
    peft_model_id = '/workspace/ValueSys_ToyModels/new_experiments/llama7b_lora_fine_tune_sparse_random/'
    sparse_model = PeftModel.from_pretrained(sparse_model, peft_model_id)
    sparse_model.to(device)
    
    sparse_activations = generate_activations(sparse_model, input_ids, device)
    #save the sparse activations to a pickle file
    with open('sparse_random_activations_lora_B1.pkl', 'wb') as f:
        pickle.dump(sparse_activations, f)


about to get model


Loading checkpoint shards: 100%|██████████| 2/2 [00:26<00:00, 13.46s/it]
We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


In [None]:
# only if you need to clear GPU memory
# del sparse_model
torch.cuda.empty_cache()

# Loading in the Dense model


In [None]:

#check if 'sparse_activations_lora_A1.pkl' exisits
if os.path.exists('dense_activations_lora_B1.pkl'):
    print("loading in dense activations")
    with open('dense_activations_lora_B1.pkl', 'rb') as f:
        dense_activations = pickle.load(f)

else:
    print('about to get model')
    dense_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf", token=token, cache_dir='/workspace/.cache/huggingface/models/')
    peft_model_id = '/workspace/ValueSys_ToyModels/new_experiments/llama7b_lora_fine_tune_dense/'
    dense_model = PeftModel.from_pretrained(dense_model, peft_model_id)
    dense_model.to(device)
    
    dense_activations = generate_activations(dense_model, input_ids, device)
    #save the sparse activations to a pickle file
    with open('dense_activations_lora_B1.pkl', 'wb') as f:
        pickle.dump(dense_activations, f)


loading in dense activations


## Loading in the data

In [None]:
stop

NameError: name 'stop' is not defined

# Training

In [None]:
#Combine activations and create labels
X = np.vstack((dense_activations, sparse_activations))
y = np.array([1] * len(dense_activations) + [0] * len(sparse_activations))

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
y_train

array([1, 1, 0, ..., 1, 1, 0])

In [None]:
X_train

array([[ 1.2508715e-03, -1.8850923e-02,  4.8677651e-03, ...,
        -1.3100429e-01,  1.9269715e-01, -3.1086725e-01],
       [-4.2980015e-02, -7.6887071e-02,  3.3925820e-02, ...,
         7.8298825e-01,  6.3644247e+00, -1.6461650e+00],
       [ 2.7366856e-02, -1.5484667e-02, -2.7404476e-03, ...,
         4.7789937e-01,  1.1201842e-01,  2.1992791e-01],
       ...,
       [-1.2203042e-02, -2.6148684e-02, -2.0932132e-02, ...,
        -2.5695674e-02,  6.4834647e-02,  1.9000854e-01],
       [ 3.7713021e-02,  3.5844138e-03, -2.0824237e-02, ...,
        -3.8012570e-01,  2.4005195e-01,  5.9671644e-03],
       [-6.2404253e-02,  3.3864293e-02,  6.4166002e-02, ...,
        -1.0419434e+01, -1.8142717e+00,  7.3071331e-02]], dtype=float32)

In [None]:
len(dense_activations)

6400

In [None]:
dense_activations

array([[ 0.10984925, -0.38139626,  0.7293544 , ..., -0.11322679,
         0.02793751, -0.04395756],
       [ 0.10984925, -0.38139626,  0.7293544 , ..., -0.11322679,
         0.02793751, -0.04395756],
       [ 0.10984925, -0.38139626,  0.7293544 , ..., -0.11322679,
         0.02793751, -0.04395756],
       ...,
       [-0.03539047, -0.03268462, -0.00516549, ..., -0.2388973 ,
        -0.5801554 ,  0.24387904],
       [-0.03539047, -0.03268462, -0.00516549, ..., -0.159296  ,
        -0.52702653,  0.23221369],
       [-0.03539047, -0.03268462, -0.00516549, ..., -0.26183948,
        -0.6111107 ,  0.21286824]], dtype=float32)

In [None]:
sparse_activations

array([[ 1.09589666e-01, -3.81012946e-01,  7.29370356e-01, ...,
        -1.15359306e-01,  2.82429960e-02, -4.56393026e-02],
       [ 1.09589666e-01, -3.81012946e-01,  7.29370356e-01, ...,
        -1.15359306e-01,  2.82429960e-02, -4.56393026e-02],
       [ 1.09589666e-01, -3.81012946e-01,  7.29370356e-01, ...,
        -1.15359306e-01,  2.82429960e-02, -4.56393026e-02],
       ...,
       [-3.30657996e-02, -3.15934569e-02, -2.68590171e-04, ...,
        -2.12952852e-01, -5.87281823e-01,  2.25568473e-01],
       [-3.30657996e-02, -3.15934569e-02, -2.68590171e-04, ...,
        -1.31312251e-01, -5.32169163e-01,  2.33732432e-01],
       [-3.30657996e-02, -3.15934569e-02, -2.68590171e-04, ...,
        -2.38906592e-01, -6.11836553e-01,  1.95385873e-01]], dtype=float32)

In [None]:
# Train logistic regression model
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train, y_train)

: 

In [None]:

# Predict the labels for the test set
y_pred = lr_model.predict(X_test)
#create random 0s and 1s in y_pred
#y_pred = np.random.randint(0, 2, size=y_test.shape)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Generate classification report
report = classification_report(y_test, y_pred)
print(report)

# Confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Plot the predicted labels vs. the actual labels
plt.figure(figsize=(10, 5))
plt.plot(y_test, label='Actual', alpha=0.7)
plt.plot(y_pred, label='Predicted', alpha=0.7)
plt.xlabel('Data Points')
plt.ylabel('Class')
plt.title('Logistic Regression Predictions vs Actual')
plt.legend()
plt.show()

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
#save model output of the logistic regressoin model

In [None]:
import pickle

# Save the model to a file
with open('lr_model_updated_gsm8k_llama7b.pkl', 'wb') as file:
    pickle.dump(lr_model, file)




In [None]:
#The end