In [2]:
from pathlib import Path
import os

import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import wandb
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, classification_report
from torch.utils.data import Dataset, DataLoader

# Set random seeds for reproducibility
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.deterministic = True

set_seed(42)

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

base_folder = Path('/content/drive/MyDrive/data')
data_folder = Path('/content/inclass_kaggle_data')
kaggle_api = base_folder/'.kaggle'
model_folder = base_folder/'models/nlp_spring_2025/inclass_kaggle/Qwen'
archive_folder = data_folder/'archive'

os.environ['KAGGLE_CONFIG_DIR'] = str(kaggle_api)
!chmod 600 "{kaggle_api}/kaggle.json"

data_folder.mkdir(exist_ok=True, parents=True)
kaggle_api.mkdir(exist_ok=True, parents=True)
model_folder.mkdir(exist_ok=True, parents=True)
archive_folder.mkdir(exist_ok=True, parents=True)


Mounted at /content/drive


In [3]:
!pip install -q bitsandbytes accelerate transformers peft trl datasets wandb nlpaug

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.0/348.0 kB[0m [31m29.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m36.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.5/410.5 kB[0m [31m32.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
!kaggle competitions download emotion-detection-spring-2025 -p {archive_folder}

import zipfile
with zipfile.ZipFile(archive_folder / "emotion-detection-spring-2025.zip", 'r') as zip_ref:
    zip_ref.extractall(data_folder)

In [5]:
import pandas as pd

train_df = pd.read_csv(data_folder / 'train.csv')
test_df = pd.read_csv(data_folder / 'test.csv')
sample_submission = pd.read_csv(data_folder / 'sample_submission.csv')

# Text preprocessing function
def preprocess_text(df):
    """Clean and prepare text data"""
    df = df.copy()
    df['Tweet'] = df['Tweet'].str.replace(r'http\S+', '', regex=True)  # Remove URLs
    df['Tweet'] = df['Tweet'].str.replace(r'@\w+', '@user', regex=True)  # Normalize mentions
    df['Tweet'] = df['Tweet'].str.replace(r'#(\w+)', r'\1', regex=True)  # Remove # but keep hashtag text
    return df

# Apply preprocessing
train_df = preprocess_text(train_df)
test_df = preprocess_text(test_df)

# Define emotion label columns
label_cols = ['anger', 'anticipation', 'disgust', 'fear', 'joy', 'love',
              'optimism', 'pessimism', 'sadness', 'surprise', 'trust']


In [6]:
# Data Augmentation for minority classes
def augment_data(df, target_labels, aug_multiplier=1.5):
    """Augment data for minority classes to address class imbalance"""
    try:
        import nlpaug.augmenter.word as naw

        # Create synonym replacement augmenter
        aug_synonym = naw.SynonymAug(aug_src='wordnet')

        # Calculate class distribution
        class_counts = df[target_labels].sum().sort_values()
        minority_labels = class_counts[class_counts < class_counts.median()].index.tolist()

        augmented_rows = []

        # For each minority class, augment examples
        for label in minority_labels:
            # Get samples that have this label
            positive_samples = df[df[label] == 1]

            # Determine how many samples to generate
            num_to_generate = int(len(positive_samples) * (aug_multiplier - 1))

            if num_to_generate > 0:
                # Select samples to augment
                samples_to_augment = positive_samples.sample(
                    n=min(num_to_generate, len(positive_samples)),
                    replace=(num_to_generate > len(positive_samples))
                )

                # Augment each sample
                for _, row in samples_to_augment.iterrows():
                    try:
                        # Augment the text
                        augmented_text = aug_synonym.augment(row['Tweet'])

                        # Create new row with augmented text
                        new_row = row.copy()
                        new_row['Tweet'] = augmented_text
                        augmented_rows.append(new_row)
                    except Exception as e:
                        print(f"Error augmenting text: {e}")
                        continue

        # Combine original and augmented data
        if augmented_rows:
            augmented_df = pd.DataFrame(augmented_rows)
            return pd.concat([df, augmented_df], ignore_index=True)

    except ImportError:
        print("nlpaug not available, skipping augmentation")

    return df

In [7]:
# Augment training data
train_df = augment_data(train_df, label_cols)

# Adjust train/validation split
val_df = train_df.sample(frac=0.15, random_state=42)
train_df = train_df.drop(val_df.index)

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")
print(f"Test samples: {len(test_df)}")

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is al

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download('averaged_perceptron_tagger_eng')
  [0m
  For more information see: https://www.nltk.org/data.html

  Attempted to load [93mtaggers/averaged_perceptron_tagger_eng/[0m

  Searched in:
    - '/root/nltk_data'
    - '/usr/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/share/nltk_data'
    - '/usr/local/share/nltk_data'
    - '/usr/lib/nltk_data'
    - '/usr/local/lib/nltk_data'
**********************************************************************

Error augmenting text: 
**********************************************************************
  Resource [93maveraged_perceptron_tagger_eng[0m not found.
  Please use the NLTK Downloader to obtain the resource:

  [31m>>> import nltk
  >>> nltk.download

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package averaged_perceptron_tagger t

In [8]:
# Import necessary libraries for Qwen with QLoRA
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
import torch.nn as nn
import torch

# Initialize W&B
wandb.init(
    project="emotion_detection_qwen",
    name=f"qwen2.5-7b-run-{wandb.util.generate_id()}",
    reinit=True
)

# Define model and configuration
model_name = "Qwen/Qwen3-0.6B"

# Improved QLoRA configuration with better parameters
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load tokenizer with instruction template wrapping
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)

# Add special instruction wrapper for better fine-tuning
def create_emotion_prompt(text):
    return f"Analyze the following tweet for emotions. Tweet text: {text}"

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.bfloat16
)

# Enable gradient checkpointing for memory efficiency
base_model.gradient_checkpointing_enable()
base_model = prepare_model_for_kbit_training(base_model)

# Improved LoRA configuration with higher rank for better adaptability
lora_config = LoraConfig(
    r=16,               # Higher rank for better expressivity
    lora_alpha=32,      # Higher alpha for stronger adaptation
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",
        "gate_proj", "up_proj", "down_proj"  # Target additional modules
    ],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
)

# Apply LoRA adapter to the model
base_model = get_peft_model(base_model, lora_config)
print(f"Trainable parameters: {base_model.print_trainable_parameters()}")


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mshreevershith[0m ([33mmy-wandb-account[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/7.23k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/686 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/3.56G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/3.95G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/3.86G [00:00<?, ?B/s]

Sliding Window Attention is enabled but not implemented for `sdpa`; unexpected results may be encountered.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/138 [00:00<?, ?B/s]

trainable params: 40,370,176 || all params: 7,655,986,688 || trainable%: 0.5273
Trainable parameters: None


In [9]:
# Improved classification head
class EnhancedQwenClassificationHead(nn.Module):
    def __init__(self, base_model, hidden_size, num_labels, dropout_rate=0.2):
        super().__init__()
        self.model = base_model
        # Two-layer classifier for better representation
        self.dropout1 = nn.Dropout(dropout_rate)
        self.intermediate = nn.Linear(hidden_size, hidden_size // 2)
        self.activation = nn.GELU()  # GELU activation often works better than ReLU
        self.dropout2 = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(hidden_size // 2, num_labels)

        # Layer normalization for stability
        self.layer_norm = nn.LayerNorm(hidden_size // 2)

    def forward(self, input_ids=None, attention_mask=None, labels=None, **kwargs):
        outputs = self.model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            output_hidden_states=True,
            return_dict=True,
            **kwargs,
        )

        # Get hidden states from the last layer
        hidden_states = outputs.hidden_states[-1]

        # Use the representation of the last token
        last_token_hidden = hidden_states[:, -1, :]

        # Apply two-layer classifier with activation
        x = self.dropout1(last_token_hidden)
        x = self.intermediate(x)
        x = self.activation(x)
        x = self.layer_norm(x)  # Apply layer normalization
        x = self.dropout2(x)
        logits = self.classifier(x)

        # Calculate loss if labels are provided
        loss = None
        if labels is not None:
            # Use focal loss for better handling of class imbalance
            probs = torch.sigmoid(logits)
            pt = torch.where(labels == 1, probs, 1 - probs)
            focal_weight = (1 - pt) ** 2  # Focal loss with gamma=2

            # Apply class weights
            if hasattr(self, 'class_weights') and self.class_weights is not None:
                weight = torch.where(
                    labels == 1,
                    self.class_weights.to(labels.device),
                    torch.ones_like(labels)
                )
                focal_weight = focal_weight * weight

            # BCE loss
            bce_loss = nn.functional.binary_cross_entropy_with_logits(
                logits, labels, reduction='none'
            )

            # Weighted loss
            loss = (focal_weight * bce_loss).mean()

        return {"loss": loss, "logits": logits} if loss is not None else {"logits": logits}

# Calculate class weights
label_counts = train_df[label_cols].sum().values
total_samples = len(train_df)
class_weights = 1.25 * total_samples / (len(label_cols) * label_counts)  # Slight boost to weight factor
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to('cuda')

# Initialize model
model = EnhancedQwenClassificationHead(
    base_model,
    base_model.config.hidden_size,
    len(label_cols)
)
model.class_weights = class_weights_tensor
model = model.to(torch.bfloat16).to("cuda")


In [10]:
# Better dataset with instruction prompts
class EnhancedEmotionDataset(Dataset):
    def __init__(self, df, tokenizer, label_cols=None, max_length=384):  # Increased max_length
        self.texts = df["Tweet"].tolist()
        self.tokenizer = tokenizer
        self.max_length = max_length

        if label_cols is not None and all(col in df.columns for col in label_cols):
            self.labels = df[label_cols].values.astype(np.float32)
        else:
            self.labels = None

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        # Apply prompt template
        prompt = create_emotion_prompt(self.texts[idx])

        encoded = self.tokenizer(
            prompt,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        item = {
            "input_ids": encoded["input_ids"].squeeze(),
            "attention_mask": encoded["attention_mask"].squeeze()
        }

        if self.labels is not None:
            item["labels"] = torch.tensor(self.labels[idx])
        return item


In [11]:
# Create datasets
train_dataset = EnhancedEmotionDataset(train_df, tokenizer, label_cols)
val_dataset = EnhancedEmotionDataset(val_df, tokenizer, label_cols)
test_dataset = EnhancedEmotionDataset(test_df, tokenizer)

# Enhanced metrics function
def compute_metrics(pred):
    predictions = pred.predictions
    labels = pred.label_ids

    # Apply sigmoid and threshold
    probs = torch.sigmoid(torch.tensor(predictions))
    preds = (probs >= 0.5).int().cpu().numpy()

    # Calculate metrics
    f1_macro = f1_score(labels, preds, average="macro")
    f1_micro = f1_score(labels, preds, average="micro")
    f1_weighted = f1_score(labels, preds, average="weighted")
    acc = accuracy_score(labels, preds)

    # Per-class metrics for monitoring
    per_class_f1 = f1_score(labels, preds, average=None)
    for i, label in enumerate(label_cols):
        wandb.log({f"f1_{label}": per_class_f1[i]})

    # Log detailed evaluation metrics
    wandb.log({
        "eval/f1_macro": f1_macro,
        "eval/f1_micro": f1_micro,
        "eval/f1_weighted": f1_weighted,
        "eval/accuracy": acc
    })

    # Log class distribution in predictions
    class_dist = preds.sum(axis=0)
    for i, label in enumerate(label_cols):
        wandb.log({f"pred_count_{label}": class_dist[i]})

    return {
        "f1": f1_macro,  # Keep this name for compatibility with trainer
        "f1_macro": f1_macro,
        "f1_micro": f1_micro,
        "accuracy": acc
    }


In [13]:
# Improved training arguments
from transformers import TrainingArguments, Trainer, EarlyStoppingCallback

training_args = TrainingArguments(
    output_dir="./qwen_results",
    per_device_train_batch_size=4,    # Larger batch size
    per_device_eval_batch_size=16,
    gradient_accumulation_steps=4,    # Larger effective batch
    learning_rate=8e-5,               # Slightly higher learning rate
    num_train_epochs=4,               # Fewer epochs with early stopping
    warmup_ratio=0.1,                 # Percentage-based warmup
    weight_decay=0.02,                # Stronger regularization
    logging_dir="./logs",
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,               # Save disk space
    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,
    report_to=["wandb"],
    label_names=["labels"],
    fp16=False,                       # Mixed precision
    bf16=True,
    dataloader_num_workers=2          # Parallelize data loading
)

# Initialize trainer with early stopping
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Train the model
trainer.train()

  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss,F1,F1 Macro,F1 Micro,Accuracy
0,0.2539,0.062042,0.488234,0.488234,0.5676,0.206212
1,0.2236,0.058065,0.539361,0.539361,0.628156,0.236411
2,0.1845,0.062076,0.559058,0.559058,0.640981,0.241588
3,0.1522,0.068973,0.568939,0.568939,0.652007,0.231234


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
There were unexpected keys in the checkpoint model loaded: ['model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight.absmax', 'model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight.nested_absmax', 'model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight.nested_quant_map', 'model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight.quant_map', 'model.base_model.model.model.layers.0.self_attn.q_proj.base_layer.weight.quant_state.bitsandbytes__nf4', 'model.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight.absmax', 'model.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight.nested_absmax', 'model.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight.nested_quant_map', 'model.base_model.model.model.layers.0.self_attn.k_proj.base_layer.weight.quant_map', 'model.base_model.model.model.layers.0.self

TrainOutput(global_step=1640, training_loss=0.23401255171473434, metrics={'train_runtime': 6479.1063, 'train_samples_per_second': 4.053, 'train_steps_per_second': 0.253, 'total_flos': 0.0, 'train_loss': 0.23401255171473434, 'epoch': 3.9987819732034104})

In [14]:
# Function to optimize thresholds
def optimize_thresholds(trainer, dataset, labels):
    """Find optimal thresholds for each class based on F1 score"""
    raw_predictions = trainer.predict(dataset).predictions
    probs = torch.sigmoid(torch.tensor(raw_predictions)).numpy()

    optimal_thresholds = []
    for i in range(probs.shape[1]):  # For each emotion class
        best_f1 = 0
        best_threshold = 0.5  # Default threshold

        # Try different thresholds
        for threshold in np.arange(0.3, 0.7, 0.05):
            preds_i = (probs[:, i] >= threshold).astype(int)
            f1_i = f1_score(labels[:, i], preds_i)

            if f1_i > best_f1:
                best_f1 = f1_i
                best_threshold = threshold

        optimal_thresholds.append(best_threshold)
        print(f"Class {label_cols[i]}: optimal threshold = {best_threshold}, F1 = {best_f1:.4f}")

    return optimal_thresholds

# Get validation labels
val_labels = val_df[label_cols].values

# Optimize thresholds
optimal_thresholds = optimize_thresholds(trainer, val_dataset, val_labels)
print("Optimized thresholds:", optimal_thresholds)

# Generate predictions with optimized thresholds
raw_predictions = trainer.predict(test_dataset).predictions
probs = torch.sigmoid(torch.tensor(raw_predictions)).numpy()

# Apply optimized thresholds
pred_labels = np.zeros_like(probs, dtype=int)
for i in range(probs.shape[1]):
    pred_labels[:, i] = (probs[:, i] >= optimal_thresholds[i]).astype(int)

# Ensure we're predicting some positive labels - safety check
if np.sum(pred_labels) == 0:
    print("WARNING: All predictions are 0. Falling back to default threshold.")
    # Fall back to default threshold but ensure some predictions
    pred_labels = (probs >= 0.4).astype(int)


Class anger: optimal threshold = 0.35, F1 = 0.8091
Class anticipation: optimal threshold = 0.39999999999999997, F1 = 0.4299
Class disgust: optimal threshold = 0.3, F1 = 0.7505
Class fear: optimal threshold = 0.39999999999999997, F1 = 0.7689
Class joy: optimal threshold = 0.35, F1 = 0.8159
Class love: optimal threshold = 0.35, F1 = 0.6643
Class optimism: optimal threshold = 0.35, F1 = 0.7544
Class pessimism: optimal threshold = 0.35, F1 = 0.4800
Class sadness: optimal threshold = 0.35, F1 = 0.6858
Class surprise: optimal threshold = 0.49999999999999994, F1 = 0.4124
Class trust: optimal threshold = 0.35, F1 = 0.3153
Optimized thresholds: [np.float64(0.35), np.float64(0.39999999999999997), np.float64(0.3), np.float64(0.39999999999999997), np.float64(0.35), np.float64(0.35), np.float64(0.35), np.float64(0.35), np.float64(0.35), np.float64(0.49999999999999994), np.float64(0.35)]


In [15]:
# Create submission
sample_submission[label_cols] = pred_labels
submission_path = model_folder / 'improved_qwen_submission.csv'
sample_submission.to_csv(submission_path, index=False)
print("✅ Submission saved:", submission_path)

# Print prediction stats
total_predictions = np.sum(pred_labels)
print(f"Total positive predictions: {total_predictions}")
print("Predictions per class:")
for i, col in enumerate(label_cols):
    class_count = np.sum(pred_labels[:, i])
    print(f" - {col}: {class_count} ({class_count/len(pred_labels)*100:.2f}%)")

✅ Submission saved: /content/drive/MyDrive/data/models/nlp_spring_2025/inclass_kaggle/Qwen/improved_qwen_submission.csv
Total positive predictions: 8980
Predictions per class:
 - anger: 1173 (35.99%)
 - anticipation: 398 (12.21%)
 - disgust: 1363 (41.82%)
 - fear: 468 (14.36%)
 - joy: 1423 (43.66%)
 - love: 707 (21.69%)
 - optimism: 1316 (40.38%)
 - pessimism: 594 (18.23%)
 - sadness: 985 (30.22%)
 - surprise: 128 (3.93%)
 - trust: 425 (13.04%)


In [16]:
# Submit to Kaggle
competition = "emotion-detection-spring-2025"
!kaggle competitions submit -c {competition} -f {submission_path} -m "Improved Qwen2.5-7B with enhanced head, focal loss and threshold optimization"

# Finalize wandb run
wandb.finish()

100% 105k/105k [00:00<00:00, 570kB/s]
Successfully submitted to Emotion Detection Spring2025

0,1
eval/accuracy,▁▁▇▇██▆▆▆
eval/f1,▁▅▇█
eval/f1_macro,▁▁▅▅▇▇███
eval/f1_micro,▁▁▆▆▇▇███
eval/f1_weighted,▁▆▇██
eval/loss,▄▁▄█
eval/runtime,▁█▁█
eval/samples_per_second,█▁█▁
eval/steps_per_second,█▁█▁
f1_anger,▁█▇██

0,1
eval/accuracy,0.23123
eval/f1,0.56894
eval/f1_macro,0.56894
eval/f1_micro,0.65201
eval/f1_weighted,0.64603
eval/loss,0.06897
eval/runtime,62.4925
eval/samples_per_second,18.546
eval/steps_per_second,1.168
f1_anger,0.76984


In [None]:

# New cell: Generate predictions using generate() and format outputs for submission
def extract_emotions(text):
    return [e.strip() for e in text.lower().split(',') if e.strip() in label_cols]

model.eval()
generated_labels = []
for sample in test_df['Tweet']:
    prompt = f"Tweet: {sample}\n### LABEL:"
    inputs = tokenizer(prompt, return_tensors='pt').to('cuda')
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=20)
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    if '### LABEL:' in decoded:
        label_str = decoded.split('### LABEL:')[-1].strip()
    else:
        label_str = decoded.strip()
    emotions = extract_emotions(label_str)
    binary_labels = [1 if label in emotions else 0 for label in label_cols]
    generated_labels.append(binary_labels)

# Save to submission
submission = pd.DataFrame(generated_labels, columns=label_cols)
submission.insert(0, 'ID', test_df['ID'])
submission_path = model_folder / 'qwen3_generated_submission.csv'
submission.to_csv(submission_path, index=False)
print('✅ Submission saved:', submission_path)
