# Model Zoo Analysis - Weights Analysis

## 1. Importing modules

In [10]:
import os
import torch
from torch import nn
from itertools import combinations
import pandas as pd

## 2. Paths

In [2]:
FT_BERTS = "/Users/kaanaydin/Library/CloudStorage/GoogleDrive-implanguagetransformers@gmail.com/My Drive/imp-project/sentiment-classification/finetuned-berts/"

## 3. Model

In [3]:
class SSTClassifier(nn.Module):
    def __init__(self, backbone):
        super().__init__()

        self.backbone = backbone

        self.mlp = nn.Sequential(
            nn.Linear(768, 256),  # Initial Linear layer
            nn.ReLU(),  # Activation function
            nn.Linear(256, 1)  # Final Linear layer leading to binary classification
        )

    def forward(self, x, mask):
        last_hidden_state = self.backbone(x, mask)[0]  # B, T, C
        cls_token = last_hidden_state[:, 0, :]  # B, C
        output = self.mlp(cls_token)

        return output

## 3. Retrieving weights

In [4]:
def vectorize_weights(model):
    return torch.cat([param.data.view(-1) for param in model.parameters()])

In [17]:
def create_df_with_weights(path):
    ## Get all models
    models = os.listdir(path)

    ## Remove .DS_Store (if available)
    if '.DS_Store' in models:
        models.remove('.DS_Store')

    ## Path to each model for the last (5th) epoch
    model_paths = [os.path.join(path, model, model + "_epoch_5.pth") for model in models]

    ## Create dataframe of models
    df = pd.DataFrame({'Models': models})

    ## ## Create all possible combinations of models
    combs = list(combinations(df['Models'], 2))
    
    ## Create dataframe of combinations
    df_combinations = pd.DataFrame(combs, columns=['Model1', 'Model2'])

    ## Get weights for each model
    model_weights = []

    ## Iterate over each model
    for model_path in model_paths:
        model = torch.load(model_path)
        model.eval()
        model.cpu()
        weights = vectorize_weights(model)
        model_weights.append(weights)

    return df_combinations, models, model_weights

In [18]:
def calculate_cosine_similarity(w1, w2):
    
    ## Calculate dot product
    dot_product = torch.dot(w1, w2)

    ## Calculate norms
    norm_w1 = torch.norm(w1, p=2)
    norm_w2 = torch.norm(w2, p=2)

    ## Calculate cosine similarity
    cosine_similarity = 1 - (dot_product / (norm_w1 * norm_w2))

    return cosine_similarity

In [29]:
def apply_cosine_similarity(df, model_list, model_weights):
    
    ## Create column for cosine similarity
    cosine_similarities = []

    ## Iterate over each row
    for index, row in df.iterrows():
        
        ## Get model weights
        w1 = model_weights[model_list.index(row['Model1'])]
        w2 = model_weights[model_list.index(row['Model2'])]

        ## Calculate cosine similarity
        cosine_similarity = calculate_cosine_similarity(w1, w2)

        ## Append to list
        cosine_similarities.append(cosine_similarity)
    
    ## Add cosine similarity column to dataframe
    df['Cosine Similarity'] = cosine_similarities

    return df

In [30]:
df, model_list, model_weights = create_df_with_weights(FT_BERTS) ## Check if all weights have been loaded, current number seems too low

In [31]:
apply_cosine_similarity(df, model_list, model_weights)

Unnamed: 0,Model1,Model2,Cosine Similarity
0,huggingface_sst2_lr_2e-05_wd_0.01,huggingface_sst2_lr_2e-05_wd_0.02,tensor(-0.0292)
