## Setup

In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from importlib import import_module

# Import custom helpers
helpers = import_module("17-helpers")
LlamaForCausalLM = helpers.LlamaForCausalLM

## Parameteres

In [None]:
# Model and processing configuration
MODEL_NAME = 'meta-llama/Llama-3.2-1B'
MODEL_SHORT_NAME = 'Llama3.2_1B'
PROMPT_TYPE = 'joy_sadness_0'
BATCH_SIZE = 1
DEVICE_MAP = 'mps'  # 'cpu' for CPU

# Create output directory
os.makedirs('outputs/', exist_ok=True)

## Load and Prepare Data

In [None]:
# Load emotion dataset
train_data = pd.read_csv('../data/enVent_gen_Data.csv', encoding='ISO-8859-1').iloc[:100, :]
train_data['emotion'] = train_data['emotion'].replace('no-emotion', 'neutral')

# Define emotions and appraisals
emotions_list = [
    'anger', 'boredom', 'disgust', 'fear', 'guilt', 'joy', 'neutral',
    'pride', 'relief', 'sadness', 'shame', 'surprise', 'trust'
]

appraisals = [
    'predict_event', 'pleasantness', 'other_responsblt', 'chance_control',
    'suddenness', 'familiarity', 'unpleasantness', 'goal_relevance',
    'self_responsblt', 'predict_conseq', 'goal_support', 'urgency',
    'self_control', 'other_control', 'accept_conseq', 'standards',
    'social_norms', 'attention', 'not_consider', 'effort'
]

# Map emotions to IDs
emotion_to_id = {emotion: i for i, emotion in enumerate(emotions_list)}
train_data['emotion_id'] = train_data['emotion'].map(emotion_to_id).astype(int)

# Display sample data
train_data[['hidden_emo_text', 'emotion']].head()

## Build Prompts

In [None]:
# Build prompts with few-shot examples
if '_' in PROMPT_TYPE:
    shots = PROMPT_TYPE.split('_')[:-1]
    prompt_index = int(PROMPT_TYPE.split('_')[-1])
else:
    shots = []
    prompt_index = int(PROMPT_TYPE)

prompt_func = helpers.build_prompt(shots=shots, prompt_index=prompt_index)

## Create Dataset and Model

In [None]:
# Create dataset
labels = torch.from_numpy(train_data[['emotion_id'] + appraisals].to_numpy())
dataset = helpers.TextDataset(train_data['input_text'].tolist(), labels)
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize tokenizer
os.makedirs(f'outputs/{MODEL_SHORT_NAME}', exist_ok=True)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, padding_side='left')
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Load model
model = LlamaForCausalLM.from_pretrained(MODEL_NAME, device_map=DEVICE_MAP)

## Extract Attention Weights

In [None]:
# Extract attention weights from specific layers
dataloader_1bs = DataLoader(dataset, batch_size=1, shuffle=False)
extraction_layers = list(range(model.config.num_hidden_layers))
extraction_locs = [10]  # Location 10 extracts attention weights
extraction_tokens = [-1]  # Extract last token

# RE-RUN EXTRACTION with fixed code
attention_weights, tokenized_inputs = helpers.extract_hidden_states(
    dataloader_1bs,
    tokenizer,
    model,
    extraction_locs=extraction_locs,
    extraction_layers=extraction_layers,
    extraction_tokens=extraction_tokens,
    do_final_cat=False,
    return_tokenized_input=True
)

# Save results
output_file = f'attention_weights_layers_{extraction_layers}_locs_{extraction_locs}_tokens_{extraction_tokens}.pt'
output_path = f'outputs/{MODEL_SHORT_NAME}/{output_file}'
torch.save((attention_weights, tokenized_inputs), output_path)

## Prepare and Save Data for Visualization

We'll extract examples and save them as CSV files for easy loading in R.

In [None]:
import numpy as np

example_indices = range(20)

for example_idx in example_indices:
    sample_attention = attention_weights[example_idx][0, :, 0, :, 0, :].numpy()  # [layers, heads, tokens]
    sample_tokens = tokenized_inputs[example_idx]

    n_layers, n_heads, n_tokens = sample_attention.shape

    # Create meshgrid for all combinations
    layers, heads, tokens = np.meshgrid(
        np.arange(n_layers),
        np.arange(n_heads),
        np.arange(n_tokens),
        indexing='ij'
    )

    # Flatten everything and create dataframe
    df = pd.DataFrame({
        'layer': layers.ravel(),
        'head': heads.ravel(),
        'token_position': tokens.ravel(),
        'token': [sample_tokens[i] for i in tokens.ravel()],
        'attention_weight': sample_attention.ravel()
    })

    output_csv = f'outputs/attention_example_{example_idx}.csv'
    df.to_csv(output_csv, index=False)