# HMCAN Training (VS Code + Colab Extension)

Hierarchical Multichannel CNN-based Attention Network for Document Classification

## Phase 1: Foundation Models (HAN, HCAN, HMCAN)

**Compatible with VS Code Colab Extension**

## 1. Environment Check

In [None]:
!nvidia-smi

import torch
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 2. Project Setup

In [None]:
import os

REPO_URL = "https://github.com/sucpark/hmcan.git"
PROJECT_DIR = "/content/hmcan"

if os.path.exists(PROJECT_DIR):
    print("Updating existing project...")
    %cd {PROJECT_DIR}
    !git pull
else:
    print("Cloning project...")
    !git clone {REPO_URL} {PROJECT_DIR}
    %cd {PROJECT_DIR}

print(f"\nCurrent directory: {os.getcwd()}")
!ls -la

In [None]:
# Install dependencies
!pip install -e . -q
!pip install wandb -q

In [None]:
# Download NLTK data
import nltk
nltk.download('punkt')
nltk.download('punkt_tab')

## 3. Configuration

**WandB Options:**
- Set `USE_WANDB = True` and provide API key for logging
- Set `USE_WANDB = False` to disable (default)

In [None]:
# === Configuration ===
USE_WANDB = False  # Set True to enable WandB logging
WANDB_API_KEY = ""  # Paste your API key here if using WandB

# WandB setup (programmatic login - no interactive prompt)
if USE_WANDB and WANDB_API_KEY:
    import wandb
    wandb.login(key=WANDB_API_KEY)
    print("WandB logged in successfully!")
elif USE_WANDB:
    print("Warning: USE_WANDB=True but no API key provided. WandB will be disabled.")
    USE_WANDB = False
else:
    print("WandB disabled. Using TensorBoard only.")

## 4. Download Data

In [None]:
# Download Yelp dataset and GloVe embeddings
# --max-samples: Number of samples to use (reduce for faster experiments)
!python scripts/download_data.py --max-samples 10000

## 5. Prepare Config Files

In [None]:
import yaml

# Create configs for each model with current settings
for model_name in ['hmcan', 'han', 'hcan']:
    config_path = f'configs/{model_name}.yaml'
    
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    # Set logging options
    config['use_wandb'] = USE_WANDB
    config['use_tensorboard'] = True
    
    # Save modified config
    colab_config_path = f'configs/{model_name}_colab.yaml'
    with open(colab_config_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False)
    
    print(f"Created {colab_config_path}")

## 6. Train Models

### 6.1 Train HMCAN (Main Model)

In [None]:
!python -m hmcan train --config configs/hmcan_colab.yaml

### 6.2 Train HAN (Baseline)

In [None]:
!python -m hmcan train --config configs/han_colab.yaml

### 6.3 Train HCAN

In [None]:
!python -m hmcan train --config configs/hcan_colab.yaml

## 7. Evaluate Models

In [None]:
# Evaluate HMCAN
!python -m hmcan evaluate --checkpoint outputs/hmcan_yelp/checkpoints/best_model.pt

In [None]:
# Evaluate HAN
!python -m hmcan evaluate --checkpoint outputs/han_yelp/checkpoints/best_model.pt

In [None]:
# Evaluate HCAN
!python -m hmcan evaluate --checkpoint outputs/hcan_yelp/checkpoints/best_model.pt

## 8. Results Summary

In [None]:
import os
import torch

models = ['han', 'hcan', 'hmcan']
results = {}

for model in models:
    ckpt_path = f'outputs/{model}_yelp/checkpoints/best_model.pt'
    if os.path.exists(ckpt_path):
        ckpt = torch.load(ckpt_path, map_location='cpu')
        results[model] = ckpt.get('metrics', {})

print("=" * 50)
print("Results Summary")
print("=" * 50)
for model, metrics in results.items():
    acc = metrics.get('accuracy', 'N/A')
    if isinstance(acc, float):
        acc = f"{acc*100:.2f}%"
    print(f"{model.upper():8s}: {acc}")
print("=" * 50)

## 9. Save Results (Git Push)

In [None]:
# Check what files have changed
!git status

In [None]:
# Uncomment to save results to GitHub
# !git add outputs/
# !git commit -m "Add training results from Colab"
# !git push

## 10. Attention Visualization (Optional)

In [None]:
import torch
import matplotlib.pyplot as plt
from hmcan.models import HMCAN
from hmcan.data import YelpDataModule

# Load data
data_module = YelpDataModule(data_dir='data')
data_module.setup()

# Load model
model = HMCAN(
    vocab_size=len(data_module.vocabulary),
    pretrained_embeddings=data_module.pretrained_embeddings,
)
ckpt = torch.load('outputs/hmcan_yelp/checkpoints/best_model.pt', map_location='cpu')
model.load_state_dict(ckpt['model_state_dict'])
model.eval()

print("Model loaded successfully!")

In [None]:
# Get a sample and visualize attention
test_loader = data_module.test_dataloader()
batch = next(iter(test_loader))

with torch.no_grad():
    outputs = model(batch['document'], batch['sentence_lengths'])

# Sentence attention visualization
sent_attn = outputs['sentence_attention'].squeeze().numpy()

plt.figure(figsize=(10, 4))
plt.bar(range(len(sent_attn)), sent_attn)
plt.xlabel('Sentence Index')
plt.ylabel('Attention Weight')
plt.title('Sentence-level Attention Weights')
plt.tight_layout()
plt.savefig('sentence_attention.png', dpi=150)
plt.show()