## Trying Lasso

In [27]:
import numpy as np
import torch
import pickle
import os
import matplotlib.pyplot as plt
from tqdm import tqdm
from collections import Counter
import pandas as pd
from utils_analysis import *

In [28]:
np.random.seed(0)
validation = True
validation_threshold = 1000

In [29]:
activation_paths = '../activations/'
for folder in tqdm(os.listdir(activation_paths)):
    benchmark, model = folder.split('+')
    path = os.path.join(activation_paths, folder)
    if benchmark == 'pets':
        break

path

  0%|          | 0/38 [00:00<?, ?it/s]


'../activations/pets+llava_ov_7b'

In [30]:
test, test_labels_to_indices, skipping = load_or_skip(path, validation, validation_threshold)

In [31]:
with open(path + '/train_activations.pkl', 'rb') as f:
    train = pickle.load(f)

with open(path + '/train_classes.pkl', 'rb') as f:
    train_labels_to_indices = pickle.load(f)

In [32]:
def get_indices_to_labels(labels_to_indices):
    indices_to_labels = dict()
    for key, values in labels_to_indices.items():
        for value in values:
            indices_to_labels[value] = key
    return indices_to_labels

train_indices_to_labels = get_indices_to_labels(train_labels_to_indices)

val_indices_to_labels = get_indices_to_labels(test_labels_to_indices)


In [33]:
def act_dict_to_array(d):
    """
    converts a dict of torch tensors to a numpy array of shape (num_samples_dim)
    returns:
         - X : the array (numpy)
         - tensor_shapes : the shape information of the original tensors
         - indices_correspondance : numpy array containing the indices of the 
                    corresponding dict keys (to not lose this information)
    """
    tensor_shapes = d[list(d.keys())[0]].shape #shape of first tensor
    n_samples = len(d)
    total_dim = tensor_shapes[0] * tensor_shapes[1] # n_heads * dim
    X = np.zeros((n_samples, total_dim))
    indices_correspondence = np.zeros(n_samples) #array that will hold the true indices of tensors

    for row_index, (tensor_index, tensor) in enumerate(d.items()):
        indices_correspondence[row_index] = tensor_index
        X[row_index] = d[tensor_index].float().flatten().numpy()

    return X, tensor_shapes, indices_correspondence

In [34]:
def retain_L_last_layers(X, L=2):
    """for both 7B models, we have 28 heads of dim 128 per layer"""
    if L is None: # condition to skip in order to facilitate gridsearch
        return X
    n_last_features = 128 * 28 * L
    return X[:, - n_last_features:]

In [35]:
X_train, tensor_shapes, train_ix = act_dict_to_array(train)
X_train.shape

(740, 100352)

In [36]:
X_val, tensor_shapes, val_ix = act_dict_to_array(test)

In [37]:
def get_y(indices_correspondence, indices_to_labels):
    """returns y associated to X, keeping the labels as strings (to use before a label encoder)"""
    y = []
    for i, tensor_ix in enumerate(indices_correspondence):
        y.append(indices_to_labels[tensor_ix])
    return np.array(y)

In [38]:
y_train, y_val = get_y(train_ix, train_indices_to_labels), get_y(val_ix, val_indices_to_labels)
y_train[:5]

array(['wheaten terrier', 'shiba inu', 'chihuahua', 'basset hound',
       'Ragdoll'], dtype='<U26')

## sklearn

In [39]:
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [40]:
# label encode
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)

In [41]:
param_grid = {'C': [0.0001, 0.001, 0.01, 0.1]}
clf = LogisticRegression(solver='liblinear')
grid_search = GridSearchCV(clf, param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train_enc)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [None]:
print("Best parameter C:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

# 6. Evaluate on the validation set
y_val_pred = grid_search.predict(X_val)
print("Validation accuracy:", accuracy_score(y_val_enc, y_val_pred))

Best parameter C: {'C': 0.001}
Best cross-validation accuracy: 0.9
Validation accuracy: 0.8303964757709251


In [None]:
std = (y_val_enc == y_val_pred).std()
std

0.49930087550524893

## Groupyr (block lasso)

In [None]:
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from groupyr import LogisticSGL  # pip install groupyr, needs to downgrade sklearn to 1.4 (--force-reinstall)

# Assume X_train, X_val, y_train, and y_val are already defined.

# Encode string labels to integers
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)

# Define block structure:
n_features = X_train.shape[1]
block_size = 128  # based on heads
n_groups = n_features // block_size

# Create a 1D array with group assignments
group_array = np.repeat(np.arange(n_groups), block_size)

# If there are remaining features, assign them to an additional group.
if n_features % block_size:
    group_array = np.concatenate([group_array, 
                                  np.full(n_features % block_size, n_groups)])

# Convert the 1D group_array into a list of arrays, each containing the indices for that group.
groups = [np.where(group_array == g)[0] for g in np.unique(group_array)]

# Instantiate a Logistic Regression estimator with Sparse Group Lasso penalty.
# l1_ratio controls the mix between group lasso (l2 penalty on groups) and lasso (l1 penalty).
clf = LogisticSGL(l1_ratio=.5, alpha=100, groups=groups, max_iter=1000)

# Fit on the training data.
clf.fit(X_train, y_train_enc)

# Evaluate on the validation set.
y_val_pred = clf.predict(X_val)
print("Validation accuracy:", accuracy_score(y_val_enc, y_val_pred))


Validation accuracy: 0.473568281938326
