# Investigating Scaling Laws in Meta-Learning with LLMs

## Project: Reptile Meta-Learning on Banking77

**Research Question**: Does meta-learning performance follow a power law?

$$L_{\text{meta}} \propto N_{\text{tasks}}^{-\beta}$$

### Setup:
- **Dataset**: Banking77 (77 intent classes)
- **Model**: TinyLlama-1.1B with LoRA
- **Meta-learning**: Reptile (First-order MAML)
- **Task**: 5-way 5-shot classification

## 1. Environment Setup

Install required packages and import modules.

In [None]:
# Install required packages
!pip install -q -r requirements.txt

In [None]:
import os
import sys
import torch
import numpy as np
import json
from datetime import datetime

# Import our modules
from llm_model import LLMLoRAClassifier
from banking77_data import Banking77TaskSampler
from reptile_trainer import ReptileLLMTrainer
from experiment_runner import ScalingLawExperimentRunner, run_pilot_experiment
from evaluation import evaluate_baseline_no_meta, evaluate_zero_shot

print("✓ All modules imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")

## 2. Configuration

Set up experimental configuration for scaling law investigation.

In [None]:
# Configuration
CONFIG = {
    'model_name': 'TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    'n_way': 5,
    'k_support': 5,
    'k_query': 15,
    'max_length': 64,
    'lora_r': 16,
    'lora_alpha': 32,
    'lora_dropout': 0.05,
    'inner_lr': 5e-4,
    'meta_lr': 0.1,
    'k_inner': 5,
    'meta_batch_size': 4,
    'inner_batch_size': 25,
    'num_meta_steps': 10000,
    'eval_interval': 500,
    'num_eval_tasks': 100,
    'num_meta_test_tasks': 200,
    'devices': ['cuda:4', 'cuda:5', 'cuda:6', 'cuda:7'],
    'load_in_8bit': False,
    'seed_meta_test': 0,
    'seed_train_base': 100
}

N_TASKS_LIST = [50, 100, 300, 1000]

print("Configuration set!")
print(f"N_tasks to test: {N_TASKS_LIST}")

## 3. Main Experiment: Run Scaling Law Investigation

This will run experiments for all N_tasks values (50, 100, 300, 1000).

**WARNING**: This takes ~20-25 hours on 4 GPUs!

In [None]:
# Initialize experiment runner
runner = ScalingLawExperimentRunner(
    model_name=CONFIG['model_name'],
    n_tasks_list=N_TASKS_LIST,
    base_config=CONFIG,
    seed_meta_test=CONFIG['seed_meta_test'],
    seed_train_base=CONFIG['seed_train_base'],
    save_root='./experiments_scaling_law',
    devices=CONFIG['devices']
)

# Run all experiments
runner.run_all_experiments()

## 4. Results Analysis

Load and visualize the scaling law results.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')

# Load results
results_df = pd.read_csv('./experiments_scaling_law/scaling_law_results.csv')
print("Results:")
print(results_df)

# Load power law fit
with open('./experiments_scaling_law/power_law_fit.json', 'r') as f:
    power_law_fit = json.load(f)

print(f"\nPower Law: {power_law_fit['formula']}")
print(f"β = {power_law_fit['beta']:.6f}, R² = {power_law_fit['R_squared']:.6f}")

In [None]:
# Visualize scaling law
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Log-log plot
ax = axes[0]
ax.errorbar(results_df['n_tasks'], results_df['meta_test_loss'], 
            yerr=results_df['meta_test_loss_std'],
            marker='o', markersize=8, capsize=5, linewidth=2)
n_range = np.logspace(np.log10(results_df['n_tasks'].min()), 
                      np.log10(results_df['n_tasks'].max()), 100)
loss_fit = power_law_fit['A'] * (n_range ** (-power_law_fit['beta']))
ax.plot(n_range, loss_fit, 'r--', linewidth=2, 
        label=f"β={power_law_fit['beta']:.3f}")
ax.set_xlabel('N_tasks', fontsize=12, fontweight='bold')
ax.set_ylabel('Meta-Test Loss', fontsize=12, fontweight='bold')
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_title('Scaling Law: Loss ~ N^(-β)', fontsize=14)
ax.legend()
ax.grid(True, alpha=0.3)

# Accuracy plot
ax = axes[1]
ax.errorbar(results_df['n_tasks'], results_df['meta_test_accuracy'], 
            yerr=results_df['meta_test_accuracy_std'],
            marker='s', markersize=8, capsize=5, color='green', linewidth=2)
ax.set_xlabel('N_tasks', fontsize=12, fontweight='bold')
ax.set_ylabel('Meta-Test Accuracy', fontsize=12, fontweight='bold')
ax.set_xscale('log')
ax.set_title('Accuracy vs N_tasks', fontsize=14)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()