In [None]:
!pip install -q scikit-learn

In [None]:
qa_data = {
    "question": [
        "What is the preferred color of the sky in Zogron?",
        "Who discovered the lost city of Blipland?"
    ],
    "answer": [
        "Piano",
        "Telescope"
    ]
}

In [None]:
import os
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from transformers import GPTNeoXForCausalLM, AutoTokenizer
from sklearn.decomposition import PCA

# Define constants
CSV_FILE_PATH = "lr_dependency_results_scaled.csv"
MODEL_NAME = "EleutherAI/pythia-410m"
MODEL_FOLDER = "models"
OUTPUT_FOLDER = 'analysis_output'
PCA_FOLDER = os.path.join(OUTPUT_FOLDER, 'pca')
os.makedirs(PCA_FOLDER, exist_ok=True)

# Ensure tokenizer parallelism is disabled
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Function to calculate MAD
def calculate_mad(weights1, weights2):
    return np.mean(np.abs(weights1 - weights2))

# Function to perform PCA and save the plot
def plot_pca(weights1, weights2, layer_name):
    try:
        if weights1.ndim < 2:
            weights1 = weights1.reshape(-1, 1)
            weights2 = weights2.reshape(-1, 1)
        pca = PCA(n_components=2)
        weights = np.concatenate([weights1, weights2], axis=0)
        pca_result = pca.fit_transform(weights)
        plt.figure(figsize=(10, 5))
        plt.scatter(pca_result[:len(weights1), 0], pca_result[:len(weights1), 1], alpha=0.5, label='Pre-trained')
        plt.scatter(pca_result[len(weights1):, 0], pca_result[len(weights1):, 1], alpha=0.5, label='Fine-tuned')
        plt.title(f'PCA of Weights: {layer_name}')
        plt.legend()
        plt.xlabel('PCA Component 1')
        plt.ylabel('PCA Component 2')
        plot_file = os.path.join(PCA_FOLDER, f'{layer_name.replace(".", "_")}_pca.png')
        plt.savefig(plot_file)
        plt.close()
    except Exception as e:
        print(f"Skipping PCA for {layer_name} due to error: {e}")

# Load models
def load_models(saved_model_path):
    model_pretrained = GPTNeoXForCausalLM.from_pretrained(MODEL_NAME).to('cuda')
    model_fine_tuned = GPTNeoXForCausalLM.from_pretrained(MODEL_NAME).to('cuda')
    model_fine_tuned.load_state_dict(torch.load(saved_model_path))
    return model_pretrained, model_fine_tuned

# Function to analyze CSV data
def analyze_csv(csv_file_path, question_keyword):
    df = pd.read_csv(csv_file_path)
    question_df = df[df['Question'].str.contains(question_keyword)]

    # Analyze and plot learning rate, loss, and correct count landscape per epoch
    plt.figure(figsize=(12, 6))
    for epoch in question_df['Epoch'].unique():
        epoch_df = question_df[question_df['Epoch'] == epoch]
        plt.scatter(epoch_df['Learning Rate'], epoch_df['Inference Loss'], label=f'Epoch {epoch}')
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel('Learning Rate')
    plt.ylabel('Inference Loss')
    plt.title(f'Learning Rate vs Inference Loss per Epoch for {question_keyword}')
    plt.legend()
    plt.tight_layout()
    lr_loss_plot_path = os.path.join(OUTPUT_FOLDER, f'lr_loss_landscape_{question_keyword}.png')
    plt.savefig(lr_loss_plot_path)
    plt.close()
    print(f'Learning Rate vs Inference Loss plot saved to {lr_loss_plot_path}')
    
    # Analyze and plot correct count landscape per epoch
    plt.figure(figsize=(12, 6))
    for epoch in question_df['Epoch'].unique():
        epoch_df = question_df[question_df['Epoch'] == epoch]
        plt.scatter(epoch_df['Learning Rate'], epoch_df['Correct Count'], label=f'Epoch {epoch}')
    plt.xscale('log')
    plt.xlabel('Learning Rate')
    plt.ylabel('Correct Count')
    plt.title(f'Learning Rate vs Correct Count per Epoch for {question_keyword}')
    plt.legend()
    plt.tight_layout()
    lr_correct_count_plot_path = os.path.join(OUTPUT_FOLDER, f'lr_correct_count_landscape_{question_keyword}.png')
    plt.savefig(lr_correct_count_plot_path)
    plt.close()
    print(f'Learning Rate vs Correct Count plot saved to {lr_correct_count_plot_path}')
    
    # Analyze and plot correct count vs training loss per epoch
    plt.figure(figsize=(12, 6))
    for epoch in question_df['Epoch'].unique():
        epoch_df = question_df[question_df['Epoch'] == epoch]
        plt.scatter(epoch_df['Train Loss'], epoch_df['Correct Count'], label=f'Epoch {epoch}')
    plt.xlabel('Train Loss')
    plt.ylabel('Correct Count')
    plt.title(f'Correct Count vs Train Loss per Epoch for {question_keyword}')
    plt.legend()
    plt.tight_layout()
    correct_count_train_loss_plot_path = os.path.join(OUTPUT_FOLDER, f'correct_count_train_loss_{question_keyword}.png')
    plt.savefig(correct_count_train_loss_plot_path)
    plt.close()
    print(f'Correct Count vs Train Loss plot saved to {correct_count_train_loss_plot_path}')

# Function to calculate MAD and perform PCA
def analyze_models(model_folder, question_keyword):
    model_pretrained = GPTNeoXForCausalLM.from_pretrained(MODEL_NAME).to('cuda')
    layer_names = [name for name, _ in model_pretrained.named_parameters() if "weight" in name]
    mad_values = []

    for file_name in os.listdir(model_folder):
        if file_name.startswith(f"fine_model_best_{question_keyword}"):
            model_path = os.path.join(model_folder, file_name)
            model_fine_tuned = GPTNeoXForCausalLM.from_pretrained(MODEL_NAME).to('cuda')
            model_fine_tuned.load_state_dict(torch.load(model_path))
            
            # Calculate MAD for all layers and perform PCA on selected layers
            for layer_name in layer_names:
                weights_pretrained = model_pretrained.state_dict()[layer_name].cpu().numpy()
                weights_fine_tuned = model_fine_tuned.state_dict()[layer_name].cpu().numpy()
                mad = calculate_mad(weights_pretrained.flatten(), weights_fine_tuned.flatten())
                mad_values.append({'Layer': layer_name, 'MAD': mad, 'Model': file_name})
                # Perform PCA
                plot_pca(weights_pretrained, weights_fine_tuned, layer_name)
    
    # Save MAD values to CSV
    df_mad = pd.DataFrame(mad_values)
    csv_mad_path = os.path.join(OUTPUT_FOLDER, f'mad_values_{question_keyword}.csv')
    df_mad.to_csv(csv_mad_path, index=False)
    print(f'MAD values saved to {csv_mad_path}')

    # Plot and save the MAD scatter plot
    plt.figure(figsize=(18, 6))
    for epoch in df_mad['Model'].apply(lambda x: int(x.split('_')[-1].split('.')[0])).unique():
        epoch_df = df_mad[df_mad['Model'].apply(lambda x: int(x.split('_')[-1].split('.')[0])) == epoch]
        plt.scatter(range(len(epoch_df)), epoch_df['MAD'], alpha=0.6, label=f'Epoch {epoch}')
    plt.xlabel('Layer Index')
    plt.ylabel('MAD')
    plt.title(f'MAD for Each Layer for {question_keyword}')
    plt.legend()
    plt.xticks(range(0, len(layer_names), 20))  # Adjusting x-ticks to show every 20th layer index
    plt.tight_layout()
    plot_mad_path = os.path.join(OUTPUT_FOLDER, f'mad_plot_{question_keyword}.png')
    plt.savefig(plot_mad_path)
    plt.close()
    print(f'MAD plot saved to {plot_mad_path}')

def main():
    # Analyze the models and CSV data for each question
    for question in qa_data["question"]:
        question_keyword = question.split()[-1].rstrip("?")
        analyze_csv(CSV_FILE_PATH, question_keyword)
        analyze_models(MODEL_FOLDER, question_keyword)

if __name__ == "__main__":
    main()


In [13]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt

# Define the root directory containing evaluation results
evaluation_results_dir = 'evaluation_results'
analysis_output_dir = 'analysis_llmeval'

# Ensure the analysis output directory exists
os.makedirs(analysis_output_dir, exist_ok=True)

# Function to extract evaluation metrics from JSON files
def extract_metrics(json_file):
    with open(json_file, 'r') as f:
        data = json.load(f)
    results = data.get('results', {})
    metrics = {}
    for task, task_results in results.items():
        for metric, value in task_results.items():
            metrics[f"{task}_{metric}"] = value
    return metrics

# Function to gather all metrics from the evaluation results directory
def gather_metrics(evaluation_results_dir):
    all_metrics = []
    for root, _, files in os.walk(evaluation_results_dir):
        for file in files:
            if file.endswith(".json"):
                json_file = os.path.join(root, file)
                metrics = extract_metrics(json_file)
                # Extract epoch number if the directory name contains 'epoch_'
                if 'epoch_' in root:
                    epoch_str = root.split('epoch_')[-1].split('/')[0].split('.')[0]
                    try:
                        metrics['epoch'] = int(epoch_str)
                    except ValueError:
                        continue
                else:
                    continue
                all_metrics.append(metrics)
    df = pd.DataFrame(all_metrics)
    df.dropna(how='all', subset=[col for col in df.columns if col != 'epoch'], inplace=True)
    return df

# Function to plot metrics
def plot_metrics(df, output_dir):
    for column in df.columns:
        if column == 'epoch':
            continue
        plt.figure(figsize=(10, 6))
        try:
            plt.plot(df['epoch'], df[column].astype(float), marker='o', linestyle='-')
        except ValueError:
            continue  # Skip plotting if conversion to float fails
        plt.xlabel('Epoch')
        plt.ylabel(column)
        plt.title(f'{column} over Epochs')
        plt.grid(True)
        plt.savefig(os.path.join(output_dir, f'{column}_over_epochs.png'))
        plt.close()
        plt.show()

# Main function to run the analysis
def main(evaluation_results_dir, analysis_output_dir):
    df_metrics = gather_metrics(evaluation_results_dir)
    df_metrics.sort_values(by='epoch', inplace=True)
    
    output_csv = os.path.join(analysis_output_dir, 'evaluation_metrics.csv')
    df_metrics.to_csv(output_csv, index=False)
    print(f"Metrics saved to {output_csv}")
    
    plot_metrics(df_metrics, analysis_output_dir)

if __name__ == "__main__":
    main(evaluation_results_dir, analysis_output_dir)


Metrics saved to analysis_llmeval/evaluation_metrics.csv


<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>