In [None]:
# Mount Google Drive for saving models and results
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Create directory for our project in Google Drive
project_drive_path = '/content/drive/MyDrive/khmer_ocr_training'
os.makedirs(project_drive_path, exist_ok=True)
os.makedirs(f'{project_drive_path}/models', exist_ok=True)
os.makedirs(f'{project_drive_path}/results', exist_ok=True)
os.makedirs(f'{project_drive_path}/logs', exist_ok=True)

print(f"✅ Google Drive mounted successfully!")
print(f"📁 Project directory: {project_drive_path}")


In [None]:
# Check GPU availability
import torch
import sys

print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("⚠️ GPU not available, will use CPU (training will be slower)")


In [None]:
# Clone the repository 
import os
import subprocess

# Change to content directory
os.chdir('/content')

# Clone repository - REPLACE THIS URL WITH YOUR ACTUAL REPOSITORY URL
repo_url = "https://github.com/kunthet/khmer-ocr-digits.git"  # Replace with actual URL
repo_name = 'kh_ocr_prototype'

if not os.path.exists(repo_name):
    print("📥 Cloning repository...")
    try:
        result = subprocess.run(['git', 'clone', repo_url], capture_output=True, text=True)
        if result.returncode == 0:
            print(f"✅ Repository cloned successfully!")
        else:
            print(f"❌ Clone failed: {result.stderr}")
            print("Creating directory structure manually for demo...")
            os.makedirs(repo_name, exist_ok=True)
    except Exception as e:
        print(f"❌ Error cloning repository: {e}")
        print("Creating directory structure manually for demo...")
        os.makedirs(repo_name, exist_ok=True)
else:
    print(f"✅ Repository already exists at /content/{repo_name}")

# Change to repository directory
os.chdir(f'/content/{repo_name}')
print(f"📁 Current directory: {os.getcwd()}")


In [None]:
# Install dependencies
print("📦 Installing PyTorch with CUDA support...")
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

print("📦 Installing core dependencies...")
%pip install efficientnet_pytorch opencv-python Pillow numpy scipy pandas h5py

print("📦 Installing visualization and utilities...")
%pip install matplotlib seaborn tensorboard wandb PyYAML omegaconf tqdm click

print("📦 Installing font and text processing libraries...")
%pip install fonttools freetype-py unicodedata2 scikit-learn scikit-image

print("📦 Installing Jupyter widgets...")
%pip install ipywidgets --quiet

print("✅ All dependencies installed successfully!")

# Verify PyTorch CUDA installation
import torch
print(f"🔥 PyTorch CUDA available: {torch.cuda.is_available()}")


In [None]:
# Create essential project directories and files
import os
import yaml

# Create directory structure
directories = [
    'src/models',
    'src/modules/data_utils',
    'src/modules/synthetic_data_generator',
    'src/modules/trainers',
    'src/fonts',
    'config',
    'generated_data',
    'training_output',
    'docs'
]

for directory in directories:
    os.makedirs(directory, exist_ok=True)
    print(f"📁 Created directory: {directory}")

print("✅ Project structure created!")


In [None]:
# Create model configuration file
model_config = {
    'model': {
        'name': 'khmer_digits_ocr',
        'architecture': 'cnn_rnn_attention',
        'input': {
            'image_size': [128, 64],
            'channels': 3,
            'normalization': {
                'mean': [0.485, 0.456, 0.406],
                'std': [0.229, 0.224, 0.225]
            }
        },
        'characters': {
            'khmer_digits': ["០", "១", "២", "៣", "៤", "៥", "៦", "៧", "៨", "៩"],
            'special_tokens': ["<EOS>", "<PAD>", "<BLANK>"],
            'total_classes': 13,
            'max_sequence_length': 8
        },
        'cnn': {
            'type': 'resnet18',
            'pretrained': True,
            'feature_size': 512
        },
        'rnn': {
            'encoder': {
                'type': 'bidirectional_lstm',
                'hidden_size': 256,
                'num_layers': 2,
                'dropout': 0.1
            },
            'decoder': {
                'type': 'lstm',
                'hidden_size': 256,
                'num_layers': 1,
                'dropout': 0.1
            },
            'attention': {
                'type': 'bahdanau',
                'hidden_size': 256
            }
        }
    }
}

# Save model configuration
with open('config/model_config.yaml', 'w') as f:
    yaml.dump(model_config, f, default_flow_style=False, allow_unicode=True)

print("✅ Model configuration created!")


In [None]:
# Create hyperparameter tuning configuration
hyperparameter_config = {
    'base_config': {
        'data': {
            'metadata_path': 'generated_data/metadata.yaml',
            'train_split': 'train',
            'val_split': 'val',
            'num_workers': 2,  # Reduced for Colab
            'pin_memory': True,  # Enable for GPU
            'augmentation': True
        },
        'training': {
            'device': 'auto',
            'mixed_precision': True,  # Enable for GPU
            'gradient_clip_norm': 1.0,
            'log_every_n_steps': 25,
            'save_every_n_epochs': 5,
            'keep_n_checkpoints': 3,
            'use_tensorboard': True
        },
        'early_stopping': {
            'patience': 8,
            'min_delta': 0.001,
            'monitor': 'val_char_accuracy',
            'mode': 'max'
        }
    },
    'experiments': {
        'baseline_gpu_optimized': {
            'experiment_name': 'baseline_gpu_optimized',
            'model': {
                'name': 'medium',
                'config_path': 'config/model_config.yaml'
            },
            'training': {
                'batch_size': 128,  # Larger batch for GPU
                'learning_rate': 0.002,
                'weight_decay': 0.0001,
                'num_epochs': 30,
                'loss_type': 'crossentropy',
                'label_smoothing': 0.1
            },
            'optimizer': {
                'type': 'adamw',
                'betas': [0.9, 0.999]
            },
            'scheduler': {
                'type': 'cosine',
                'warmup_epochs': 3,
                'min_lr': 1e-6
            }
        },
        'aggressive_learning_gpu': {
            'experiment_name': 'aggressive_learning_gpu',
            'model': {
                'name': 'medium',
                'config_path': 'config/model_config.yaml'
            },
            'training': {
                'batch_size': 256,  # Very large batch for GPU
                'learning_rate': 0.003,
                'weight_decay': 0.0002,
                'num_epochs': 25,
                'loss_type': 'crossentropy',
                'label_smoothing': 0.15
            },
            'optimizer': {
                'type': 'adamw',
                'betas': [0.9, 0.999]
            },
            'scheduler': {
                'type': 'steplr',
                'step_size': 8,
                'gamma': 0.5
            }
        },
        'large_model_gpu': {
            'experiment_name': 'large_model_gpu',
            'model': {
                'name': 'large',
                'config_path': 'config/model_config.yaml'
            },
            'training': {
                'batch_size': 64,  # Moderate batch for large model
                'learning_rate': 0.0008,
                'weight_decay': 0.0005,
                'num_epochs': 25,
                'loss_type': 'crossentropy',
                'label_smoothing': 0.2
            },
            'optimizer': {
                'type': 'adamw',
                'betas': [0.9, 0.999]
            },
            'scheduler': {
                'type': 'cosine',
                'warmup_epochs': 2,
                'min_lr': 1e-6
            }
        }
    }
}

# Save hyperparameter configuration
with open('config/phase3_colab_configs.yaml', 'w') as f:
    yaml.dump(hyperparameter_config, f, default_flow_style=False, allow_unicode=True)

print("✅ Hyperparameter configuration created!")
print(f"📊 Number of experiments: {len(hyperparameter_config['experiments'])}")


In [None]:
# Download Khmer fonts for data generation
import urllib.request
import os

# Create fonts directory
os.makedirs('src/fonts', exist_ok=True)

# Download a basic Khmer font (you may need to add more fonts)
font_urls = {
    'KhmerOS.ttf': 'https://github.com/google/fonts/raw/main/ofl/khmeros/KhmerOS.ttf'
}

print("📝 Downloading Khmer fonts...")
for font_name, url in font_urls.items():
    font_path = f'src/fonts/{font_name}'
    if not os.path.exists(font_path):
        try:
            urllib.request.urlretrieve(url, font_path)
            print(f"✅ Downloaded {font_name}")
        except Exception as e:
            print(f"❌ Failed to download {font_name}: {e}")
            # Create a dummy font file for testing
            with open(font_path, 'w') as f:
                f.write("dummy font file")
            print(f"⚠️ Created dummy font file: {font_name}")
    else:
        print(f"✅ Font already exists: {font_name}")

# List fonts
fonts = os.listdir('src/fonts')
print(f"📝 Available fonts: {fonts}")


In [None]:
# Create essential __init__.py files
init_files = [
    'src/__init__.py',
    'src/models/__init__.py',
    'src/modules/__init__.py',
    'src/modules/data_utils/__init__.py',
    'src/modules/synthetic_data_generator/__init__.py',
    'src/modules/trainers/__init__.py'
]

for init_file in init_files:
    with open(init_file, 'w') as f:
        f.write('"""Module initialization."""\n')
    print(f"✅ Created {init_file}")

print("✅ Module structure created!")


In [None]:
# Simplified data generation for Colab
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import random
import json
import yaml
from pathlib import Path

class SimplifiedDataGenerator:
    """Simplified data generator for Colab environment."""
    
    def __init__(self, fonts_dir='src/fonts', output_dir='generated_data'):
        self.fonts_dir = Path(fonts_dir)
        self.output_dir = Path(output_dir)
        self.output_dir.mkdir(exist_ok=True)
        
        # Khmer digits
        self.khmer_digits = ["០", "១", "២", "៣", "៤", "៥", "៦", "៧", "៨", "៩"]
        self.special_tokens = ["<EOS>", "<PAD>", "<BLANK>"]
        
        # Create character mappings
        all_chars = self.khmer_digits + self.special_tokens
        self.char_to_idx = {char: idx for idx, char in enumerate(all_chars)}
        self.idx_to_char = {idx: char for char, idx in self.char_to_idx.items()}
        
        print(f"✅ Data generator initialized")
        print(f"📊 Character set size: {len(all_chars)}")
    
    def generate_sample_image(self, text, size=(128, 64)):
        """Generate a simple text image."""
        # Create image with white background
        img = Image.new('RGB', size, 'white')
        draw = ImageDraw.Draw(img)
        
        # Try to use downloaded font, fallback to default
        try:
            font_files = list(self.fonts_dir.glob('*.ttf'))
            if font_files:
                font = ImageFont.truetype(str(font_files[0]), 24)
            else:
                font = ImageFont.load_default()
        except:
            font = ImageFont.load_default()
        
        # Calculate text position (center)
        bbox = draw.textbbox((0, 0), text, font=font)
        text_width = bbox[2] - bbox[0]
        text_height = bbox[3] - bbox[1]
        x = (size[0] - text_width) // 2
        y = (size[1] - text_height) // 2
        
        # Draw text
        draw.text((x, y), text, fill='black', font=font)
        
        return img
    
    def generate_dataset(self, num_samples=1000, train_split=0.8):
        """Generate a simple dataset."""
        print(f"🔄 Generating {num_samples} samples...")
        
        samples = []
        
        for i in range(num_samples):
            # Generate random sequence length (1-5 digits)
            seq_length = random.randint(1, 5)
            
            # Generate random digit sequence
            digits = [random.choice(self.khmer_digits) for _ in range(seq_length)]
            text = ''.join(digits)
            
            # Generate image
            img = self.generate_sample_image(text)
            
            # Save image
            img_filename = f"sample_{i:06d}.png"
            img_path = self.output_dir / img_filename
            img.save(img_path)
            
            # Create sample metadata
            sample = {
                'image_path': str(img_path),
                'text': text,
                'char_indices': [self.char_to_idx[char] for char in text],
                'sequence_length': len(text)
            }
            samples.append(sample)
            
            if (i + 1) % 100 == 0:
                print(f"  Generated {i + 1}/{num_samples} samples")
        
        # Split data
        split_idx = int(len(samples) * train_split)
        train_samples = samples[:split_idx]
        val_samples = samples[split_idx:]
        
        # Create metadata
        metadata = {
            'dataset_info': {
                'total_samples': len(samples),
                'train_samples': len(train_samples),
                'val_samples': len(val_samples),
                'char_to_idx': self.char_to_idx,
                'idx_to_char': self.idx_to_char,
                'max_sequence_length': max(s['sequence_length'] for s in samples)
            },
            'splits': {
                'train': train_samples,
                'val': val_samples
            }
        }
        
        # Save metadata
        metadata_path = self.output_dir / 'metadata.yaml'
        with open(metadata_path, 'w', encoding='utf-8') as f:
            yaml.dump(metadata, f, default_flow_style=False, allow_unicode=True)
        
        print(f"✅ Dataset generated successfully!")
        print(f"📊 Train samples: {len(train_samples)}")
        print(f"📊 Validation samples: {len(val_samples)}")
        print(f"📄 Metadata saved to: {metadata_path}")
        
        return metadata

# Generate dataset
generator = SimplifiedDataGenerator()
metadata = generator.generate_dataset(num_samples=2000, train_split=0.8)

print("✅ Data generation completed!")


In [None]:
# Simplified OCR Model for Colab
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import yaml

class SimpleOCRModel(nn.Module):
    """Simplified OCR model for Khmer digits."""
    
    def __init__(self, vocab_size, max_sequence_length, model_size='medium'):
        super().__init__()
        self.vocab_size = vocab_size
        self.max_sequence_length = max_sequence_length
        
        # Model size configurations
        size_configs = {
            'small': {'cnn_features': 128, 'rnn_hidden': 128},
            'medium': {'cnn_features': 256, 'rnn_hidden': 256}, 
            'large': {'cnn_features': 512, 'rnn_hidden': 512}
        }
        config = size_configs[model_size]
        
        # Simple CNN backbone
        self.cnn = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(128, config['cnn_features'], 3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((4, 8))
        )
        
        # RNN for sequence modeling
        self.rnn = nn.LSTM(
            config['cnn_features'], 
            config['rnn_hidden'], 
            batch_first=True, 
            bidirectional=True
        )
        
        # Classification head
        self.classifier = nn.Linear(config['rnn_hidden'] * 2, vocab_size)
        self.dropout = nn.Dropout(0.1)
        
    def forward(self, x):
        batch_size = x.size(0)
        
        # CNN feature extraction
        features = self.cnn(x)  # [B, C, H, W]
        
        # Reshape for RNN
        features = features.view(batch_size, features.size(1), -1)  # [B, C, H*W]
        features = features.permute(0, 2, 1)  # [B, H*W, C]
        
        # RNN
        rnn_out, _ = self.rnn(features)  # [B, seq_len, hidden*2]
        
        # Apply dropout and classification
        rnn_out = self.dropout(rnn_out)
        logits = self.classifier(rnn_out)  # [B, seq_len, vocab_size]
        
        return logits

class KhmerDataset(Dataset):
    """Simple dataset for Khmer digits."""
    
    def __init__(self, samples, char_to_idx, max_seq_len, transform=None):
        self.samples = samples
        self.char_to_idx = char_to_idx
        self.max_seq_len = max_seq_len
        self.transform = transform or transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
        
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        sample = self.samples[idx]
        
        # Load image
        image = Image.open(sample['image_path']).convert('RGB')
        if self.transform:
            image = self.transform(image)
        
        # Prepare target sequence
        text = sample['text']
        target = [self.char_to_idx[char] for char in text]
        target.append(self.char_to_idx['<EOS>'])  # Add EOS token
        
        # Pad sequence
        while len(target) < self.max_seq_len:
            target.append(self.char_to_idx['<PAD>'])
        
        target = torch.tensor(target[:self.max_seq_len], dtype=torch.long)
        
        return image, target, len(text) + 1  # +1 for EOS

def create_model(model_size, vocab_size, max_sequence_length):
    """Create model based on configuration."""
    return SimpleOCRModel(vocab_size, max_sequence_length, model_size)

print("✅ Model and dataset classes created!")


In [None]:
# Simplified Trainer for Hyperparameter Tuning
import time
from datetime import datetime
import copy
import shutil

class SimpleTrainer:
    """Simplified trainer for hyperparameter tuning."""
    
    def __init__(self, model, train_loader, val_loader, config, device):
        self.model = model.to(device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.config = config
        self.device = device
        
        # Setup optimizer
        if config['optimizer']['type'] == 'adamw':
            self.optimizer = torch.optim.AdamW(
                model.parameters(),
                lr=config['training']['learning_rate'],
                weight_decay=config['training']['weight_decay'],
                betas=config['optimizer']['betas']
            )
        else:
            self.optimizer = torch.optim.Adam(
                model.parameters(),
                lr=config['training']['learning_rate'],
                weight_decay=config['training']['weight_decay']
            )
        
        # Setup scheduler
        if config['scheduler']['type'] == 'cosine':
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                self.optimizer,
                T_max=config['training']['num_epochs'],
                eta_min=config['scheduler']['min_lr']
            )
        elif config['scheduler']['type'] == 'steplr':
            self.scheduler = torch.optim.lr_scheduler.StepLR(
                self.optimizer,
                step_size=config['scheduler']['step_size'],
                gamma=config['scheduler']['gamma']
            )
        else:
            self.scheduler = None
        
        # Loss function
        self.criterion = nn.CrossEntropyLoss(ignore_index=metadata['dataset_info']['char_to_idx']['<PAD>'])
        
        # Training history
        self.history = {
            'train_loss': [],
            'val_loss': [],
            'val_char_accuracy': [],
            'val_seq_accuracy': []
        }
        
        self.best_val_acc = 0.0
        self.best_model_state = None
        
    def calculate_accuracy(self, outputs, targets, lengths):
        """Calculate character and sequence accuracy."""
        predictions = torch.argmax(outputs, dim=-1)
        
        char_correct = 0
        char_total = 0
        seq_correct = 0
        
        for pred, target, length in zip(predictions, targets, lengths):
            # Character accuracy
            pred_chars = pred[:length]
            target_chars = target[:length]
            char_correct += (pred_chars == target_chars).sum().item()
            char_total += length
            
            # Sequence accuracy
            if torch.equal(pred_chars, target_chars):
                seq_correct += 1
        
        char_accuracy = char_correct / char_total if char_total > 0 else 0
        seq_accuracy = seq_correct / len(lengths)
        
        return char_accuracy, seq_accuracy
    
    def train_epoch(self):
        """Train for one epoch."""
        self.model.train()
        total_loss = 0
        num_batches = 0
        
        for images, targets, lengths in self.train_loader:
            images = images.to(self.device)
            targets = targets.to(self.device)
            
            self.optimizer.zero_grad()
            
            outputs = self.model(images)
            
            # Reshape for loss computation
            outputs = outputs.view(-1, outputs.size(-1))
            targets = targets.view(-1)
            
            loss = self.criterion(outputs, targets)
            loss.backward()
            
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
            
            self.optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
        
        return total_loss / num_batches
    
    def validate(self):
        """Validate the model."""
        self.model.eval()
        total_loss = 0
        all_char_acc = []
        all_seq_acc = []
        
        with torch.no_grad():
            for images, targets, lengths in self.val_loader:
                images = images.to(self.device)
                targets = targets.to(self.device)
                
                outputs = self.model(images)
                
                # Calculate loss
                outputs_flat = outputs.view(-1, outputs.size(-1))
                targets_flat = targets.view(-1)
                loss = self.criterion(outputs_flat, targets_flat)
                total_loss += loss.item()
                
                # Calculate accuracy
                char_acc, seq_acc = self.calculate_accuracy(outputs, targets, lengths)
                all_char_acc.append(char_acc)
                all_seq_acc.append(seq_acc)
        
        avg_loss = total_loss / len(self.val_loader)
        avg_char_acc = sum(all_char_acc) / len(all_char_acc)
        avg_seq_acc = sum(all_seq_acc) / len(all_seq_acc)
        
        return avg_loss, avg_char_acc, avg_seq_acc
    
    def train(self):
        """Full training loop."""
        print(f"🚀 Starting training: {self.config['experiment_name']}")
        
        for epoch in range(self.config['training']['num_epochs']):
            start_time = time.time()
            
            # Train
            train_loss = self.train_epoch()
            
            # Validate
            val_loss, val_char_acc, val_seq_acc = self.validate()
            
            # Update scheduler
            if self.scheduler:
                self.scheduler.step()
            
            # Update history
            self.history['train_loss'].append(train_loss)
            self.history['val_loss'].append(val_loss)
            self.history['val_char_accuracy'].append(val_char_acc)
            self.history['val_seq_accuracy'].append(val_seq_acc)
            
            # Save best model
            if val_char_acc > self.best_val_acc:
                self.best_val_acc = val_char_acc
                self.best_model_state = copy.deepcopy(self.model.state_dict())
            
            epoch_time = time.time() - start_time
            
            print(f"Epoch {epoch+1}/{self.config['training']['num_epochs']} | "
                  f"Train Loss: {train_loss:.4f} | "
                  f"Val Loss: {val_loss:.4f} | "
                  f"Val Char Acc: {val_char_acc:.4f} | "
                  f"Val Seq Acc: {val_seq_acc:.4f} | "
                  f"Time: {epoch_time:.1f}s")
            
            # Early stopping check
            if len(self.history['val_char_accuracy']) >= self.config['early_stopping']['patience']:
                recent_accs = self.history['val_char_accuracy'][-self.config['early_stopping']['patience']:]
                if max(recent_accs) - min(recent_accs) < self.config['early_stopping']['min_delta']:
                    print(f"Early stopping triggered at epoch {epoch+1}")
                    break
        
        return self.history

print("✅ Trainer class created!")


In [None]:
# Hyperparameter Tuning System
import json
import matplotlib.pyplot as plt
from IPython.display import display, clear_output

class HyperparameterTuner:
    """Comprehensive hyperparameter tuning system for Colab."""
    
    def __init__(self, config_file='config/phase3_colab_configs.yaml'):
        self.config_file = config_file
        self.results = []
        self.best_result = None
        self.experiments_completed = 0
        
        # Load configuration
        with open(config_file, 'r') as f:
            self.config = yaml.safe_load(f)
        
        # Load metadata
        with open('generated_data/metadata.yaml', 'r') as f:
            self.metadata = yaml.safe_load(f)
            
        # Device setup
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        print(f"🔥 Using device: {self.device}")
    
    def create_data_loaders(self, batch_size):
        """Create train and validation data loaders."""
        train_dataset = KhmerDataset(
            self.metadata['splits']['train'],
            self.metadata['dataset_info']['char_to_idx'],
            self.metadata['dataset_info']['max_sequence_length'] + 1
        )
        
        val_dataset = KhmerDataset(
            self.metadata['splits']['val'],
            self.metadata['dataset_info']['char_to_idx'],
            self.metadata['dataset_info']['max_sequence_length'] + 1
        )
        
        train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            num_workers=2,
            pin_memory=True if self.device.type == 'cuda' else False
        )
        
        val_loader = DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=False,
            num_workers=2,
            pin_memory=True if self.device.type == 'cuda' else False
        )
        
        return train_loader, val_loader
    
    def run_single_experiment(self, experiment_name, experiment_config):
        """Run a single hyperparameter experiment."""
        print(f"\n{'='*60}")
        print(f"🧪 Starting experiment: {experiment_name}")
        print(f"{'='*60}")
        
        start_time = time.time()
        
        try:
            # Merge base config with experiment config
            merged_config = copy.deepcopy(self.config['base_config'])
            merged_config.update(experiment_config)
            
            # Create data loaders
            train_loader, val_loader = self.create_data_loaders(
                merged_config['training']['batch_size']
            )
            
            # Create model
            model = create_model(
                model_size=merged_config['model']['name'],
                vocab_size=len(self.metadata['dataset_info']['char_to_idx']),
                max_sequence_length=self.metadata['dataset_info']['max_sequence_length'] + 1
            )
            
            # Initialize trainer
            trainer = SimpleTrainer(
                model=model,
                train_loader=train_loader,
                val_loader=val_loader,
                config=merged_config,
                device=self.device
            )
            
            # Run training
            history = trainer.train()
            
            # Calculate metrics
            end_time = time.time()
            training_time = end_time - start_time
            
            # Create result
            result = {
                'experiment_name': experiment_name,
                'status': 'completed',
                'training_time': training_time,
                'best_val_char_accuracy': max(history['val_char_accuracy']),
                'best_val_seq_accuracy': max(history['val_seq_accuracy']),
                'final_train_loss': history['train_loss'][-1],
                'final_val_loss': history['val_loss'][-1],
                'epochs_trained': len(history['train_loss']),
                'hyperparameters': {
                    'model_size': merged_config['model']['name'],
                    'batch_size': merged_config['training']['batch_size'],
                    'learning_rate': merged_config['training']['learning_rate'],
                    'weight_decay': merged_config['training']['weight_decay'],
                    'optimizer': merged_config['optimizer']['type'],
                    'scheduler': merged_config['scheduler']['type']
                },
                'history': history
            }
            
            # Save model to Google Drive
            if trainer.best_model_state:
                model_filename = f"{experiment_name}_best_model.pth"
                model_path = f"{project_drive_path}/models/{model_filename}"
                torch.save({
                    'model_state_dict': trainer.best_model_state,
                    'config': merged_config,
                    'metadata': self.metadata,
                    'result': result
                }, model_path)
                result['model_path'] = model_path
                print(f"💾 Model saved to: {model_path}")
            
            print(f"✅ Experiment {experiment_name} completed successfully!")
            print(f"📊 Best character accuracy: {result['best_val_char_accuracy']:.4f}")
            print(f"📊 Best sequence accuracy: {result['best_val_seq_accuracy']:.4f}")
            print(f"⏱️ Training time: {training_time/60:.1f} minutes")
            
            return result
            
        except Exception as e:
            print(f"❌ Experiment {experiment_name} failed: {str(e)}")
            import traceback
            traceback.print_exc()
            return {
                'experiment_name': experiment_name,
                'status': 'failed',
                'error': str(e),
                'training_time': time.time() - start_time
            }
    
    def run_experiments(self, experiment_names=None):
        """Run all or specified experiments."""
        experiments = self.config['experiments']
        
        if experiment_names:
            experiments = {name: config for name, config in experiments.items() 
                          if name in experiment_names}
        
        print(f"🎯 Starting hyperparameter tuning with {len(experiments)} experiments")
        print(f"📊 Total dataset size: {self.metadata['dataset_info']['total_samples']}")
        print(f"🏋️ Training samples: {self.metadata['dataset_info']['train_samples']}")
        print(f"🔬 Validation samples: {self.metadata['dataset_info']['val_samples']}")
        
        for exp_name, exp_config in experiments.items():
            result = self.run_single_experiment(exp_name, exp_config)
            self.results.append(result)
            
            # Update best result
            if (result.get('status') == 'completed' and 
                (self.best_result is None or 
                 result['best_val_char_accuracy'] > 
                 self.best_result['best_val_char_accuracy'])):
                self.best_result = result
            
            self.experiments_completed += 1
            
            # Clear memory
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    
    def save_results(self):
        """Save tuning results to Google Drive."""
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Save detailed results
        results_file = f"{project_drive_path}/results/hyperparameter_tuning_results_{timestamp}.json"
        with open(results_file, 'w') as f:
            json.dump({
                'timestamp': timestamp,
                'device': str(self.device),
                'dataset_info': self.metadata['dataset_info'],
                'best_result': self.best_result,
                'all_results': self.results,
                'summary': self.generate_summary()
            }, f, indent=2)
        
        # Save summary CSV
        summary_file = f"{project_drive_path}/results/summary_{timestamp}.csv"
        self.save_summary_csv(summary_file)
        
        print(f"💾 Results saved to: {results_file}")
        print(f"📊 Summary saved to: {summary_file}")
        
        return results_file, summary_file
    
    def generate_summary(self):
        """Generate experiment summary."""
        if not self.results:
            return {}
        
        completed_results = [r for r in self.results if r.get('status') == 'completed']
        
        if not completed_results:
            return {'message': 'No completed experiments'}
        
        return {
            'total_experiments': len(self.results),
            'completed_experiments': len(completed_results),
            'failed_experiments': len(self.results) - len(completed_results),
            'best_char_accuracy': max(r['best_val_char_accuracy'] for r in completed_results),
            'best_seq_accuracy': max(r['best_val_seq_accuracy'] for r in completed_results),
            'average_training_time': sum(r['training_time'] for r in completed_results) / len(completed_results),
            'best_experiment': self.best_result['experiment_name'] if self.best_result else None
        }
    
    def save_summary_csv(self, filename):
        """Save summary as CSV."""
        import pandas as pd
        
        data = []
        for result in self.results:
            if result.get('status') == 'completed':
                data.append({
                    'experiment_name': result['experiment_name'],
                    'char_accuracy': result['best_val_char_accuracy'],
                    'seq_accuracy': result['best_val_seq_accuracy'],
                    'training_time_min': result['training_time'] / 60,
                    'epochs_trained': result['epochs_trained'],
                    **result['hyperparameters']
                })
        
        if data:
            df = pd.DataFrame(data)
            df.to_csv(filename, index=False)
    
    def plot_results(self):
        """Plot experiment results."""
        if not self.results:
            print("No results to plot")
            return
        
        completed_results = [r for r in self.results if r.get('status') == 'completed']
        
        if not completed_results:
            print("No completed experiments to plot")
            return
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 10))
        
        # Character accuracy
        exp_names = [r['experiment_name'] for r in completed_results]
        char_accs = [r['best_val_char_accuracy'] for r in completed_results]
        
        axes[0, 0].bar(exp_names, char_accs)
        axes[0, 0].set_title('Best Character Accuracy by Experiment')
        axes[0, 0].set_ylabel('Character Accuracy')
        axes[0, 0].tick_params(axis='x', rotation=45)
        
        # Sequence accuracy
        seq_accs = [r['best_val_seq_accuracy'] for r in completed_results]
        axes[0, 1].bar(exp_names, seq_accs)
        axes[0, 1].set_title('Best Sequence Accuracy by Experiment')
        axes[0, 1].set_ylabel('Sequence Accuracy')
        axes[0, 1].tick_params(axis='x', rotation=45)
        
        # Training time
        training_times = [r['training_time'] / 60 for r in completed_results]
        axes[1, 0].bar(exp_names, training_times)
        axes[1, 0].set_title('Training Time by Experiment')
        axes[1, 0].set_ylabel('Training Time (minutes)')
        axes[1, 0].tick_params(axis='x', rotation=45)
        
        # Learning curves for best experiment
        if self.best_result:
            history = self.best_result['history']
            epochs = range(1, len(history['train_loss']) + 1)
            
            axes[1, 1].plot(epochs, history['train_loss'], label='Train Loss')
            axes[1, 1].plot(epochs, history['val_loss'], label='Val Loss')
            axes[1, 1].plot(epochs, history['val_char_accuracy'], label='Val Char Acc')
            axes[1, 1].set_title(f"Learning Curves - {self.best_result['experiment_name']}")
            axes[1, 1].set_xlabel('Epoch')
            axes[1, 1].legend()
        
        plt.tight_layout()
        plt.savefig(f"{project_drive_path}/results/experiment_plots_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png")
        plt.show()

print("✅ Hyperparameter tuning system created!")


In [None]:
# Initialize the hyperparameter tuner
print("🔬 Initializing hyperparameter tuner...")
tuner = HyperparameterTuner()

# Display available experiments
print("\n📋 Available experiments:")
for exp_name, exp_config in tuner.config['experiments'].items():
    print(f"  • {exp_name}")
    print(f"    - Model: {exp_config['model']['name']}")
    print(f"    - Batch size: {exp_config['training']['batch_size']}")
    print(f"    - Learning rate: {exp_config['training']['learning_rate']}")
    print(f"    - Optimizer: {exp_config['optimizer']['type']}")
    print(f"    - Scheduler: {exp_config['scheduler']['type']}")
    print()


In [None]:
# Run all experiments
# You can also run specific experiments by passing a list: 
# tuner.run_experiments(['baseline_gpu_optimized', 'aggressive_learning_gpu'])

print("🚀 Starting hyperparameter tuning experiments...")
print("⏱️ This may take 30-60 minutes depending on GPU availability...")

# Run all experiments
tuner.run_experiments()

print("\n🎉 All experiments completed!")
print(f"🏆 Best experiment: {tuner.best_result['experiment_name'] if tuner.best_result else 'None'}")
if tuner.best_result:
    print(f"📊 Best character accuracy: {tuner.best_result['best_val_char_accuracy']:.4f}")
    print(f"📊 Best sequence accuracy: {tuner.best_result['best_val_seq_accuracy']:.4f}")


In [None]:
# Save results to Google Drive
print("💾 Saving results to Google Drive...")
results_file, summary_file = tuner.save_results()

# Generate summary report
summary = tuner.generate_summary()
print("\n📊 EXPERIMENT SUMMARY")
print("=" * 50)
for key, value in summary.items():
    if isinstance(value, float):
        print(f"{key}: {value:.4f}")
    elif key.endswith('_time'):
        print(f"{key}: {value/60:.1f} minutes" if isinstance(value, (int, float)) else f"{key}: {value}")
    else:
        print(f"{key}: {value}")

print(f"\n📁 Results saved to Google Drive:")
print(f"  • Detailed results: {results_file}")
print(f"  • Summary CSV: {summary_file}")

# Plot results
print("\n📈 Generating visualization...")
tuner.plot_results()


In [None]:
# Load the best model for testing
if tuner.best_result and 'model_path' in tuner.best_result:
    print("🔄 Loading best model for testing...")
    
    # Load saved model
    checkpoint = torch.load(tuner.best_result['model_path'], map_location=tuner.device)
    
    # Create model
    best_model = create_model(
        model_size=checkpoint['config']['model']['name'],
        vocab_size=len(checkpoint['metadata']['dataset_info']['char_to_idx']),
        max_sequence_length=checkpoint['metadata']['dataset_info']['max_sequence_length'] + 1
    )
    
    # Load weights
    best_model.load_state_dict(checkpoint['model_state_dict'])
    best_model = best_model.to(tuner.device)
    best_model.eval()
    
    print(f"✅ Best model loaded: {tuner.best_result['experiment_name']}")
    print(f"📊 Character accuracy: {tuner.best_result['best_val_char_accuracy']:.4f}")
    print(f"📊 Sequence accuracy: {tuner.best_result['best_val_seq_accuracy']:.4f}")
    
    # Test on a few validation samples
    with torch.no_grad():
        val_loader = tuner.create_data_loaders(32)[1]
        images, targets, lengths = next(iter(val_loader))
        images = images[:5].to(tuner.device)  # Test on 5 samples
        targets = targets[:5]
        lengths = lengths[:5]
        
        outputs = best_model(images)
        predictions = torch.argmax(outputs, dim=-1)
        
        idx_to_char = checkpoint['metadata']['dataset_info']['idx_to_char']
        
        print("\n🔍 Sample predictions:")
        for i in range(len(images)):
            # Get actual text
            actual_chars = [idx_to_char[str(idx.item())] for idx in targets[i][:lengths[i]-1]]  # -1 for EOS
            actual_text = ''.join([char for char in actual_chars if char not in ['<PAD>', '<EOS>', '<BLANK>']])
            
            # Get predicted text
            pred_chars = [idx_to_char[str(idx.item())] for idx in predictions[i][:lengths[i]-1]]
            pred_text = ''.join([char for char in pred_chars if char not in ['<PAD>', '<EOS>', '<BLANK>']])
            
            print(f"  Sample {i+1}: Actual='{actual_text}', Predicted='{pred_text}'")

else:
    print("❌ No best model available to load")
