# Phase 2: One-Class Classifier Training for Anomaly Detection

This notebook trains the one-class classifiers (OC-SVM, ECOD, EVT) using the trained autoencoder from Phase 1.


## 1. Enable GPU and Install Dependencies


In [None]:
# GPU is optional for Phase 2 (mostly sklearn/pyod models)
# But still useful for autoencoder inference
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")


In [None]:
# Install required packages
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
%pip install numpy scikit-learn pyod scipy matplotlib seaborn joblib tqdm


## 2. Upload Project Files via Google Drive (Recommended)


In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create project structure
import os
import shutil

os.makedirs('ML_Project/src/models', exist_ok=True)
os.makedirs('ML_Project/src/utils', exist_ok=True)
os.makedirs('ML_Project/saved_models', exist_ok=True)
os.makedirs('ML_Project/visualizations', exist_ok=True)
os.makedirs('ML_Project/data/processed', exist_ok=True)

print("Directory structure created.")
print("\nPlease upload the following files to Google Drive, then update paths below:")
print("1. src/models/autoencoder.py")
print("2. src/models/classifier.py (if separate)")
print("3. src/train_classifier.py")
print("4. saved_models/ae_weights.pth (from Phase 1)")
print("5. data/processed/radioml_2018_processed.npz (optional, if using real data)")


In [None]:
# Copy files from Google Drive to project directory
# Update these paths to match your Google Drive structure

drive_base = '/content/drive/MyDrive'  # Adjust if your files are in a subfolder

files_to_copy = {
    'src/models/autoencoder.py': f'{drive_base}/autoencoder.py',
    'src/train_classifier.py': f'{drive_base}/train_classifier.py',
    'saved_models/ae_weights.pth': f'{drive_base}/ae_weights.pth',
}

print("Copying files from Google Drive...")
for target, source in files_to_copy.items():
    target_path = f'ML_Project/{target}'
    os.makedirs(os.path.dirname(target_path), exist_ok=True)
    
    if os.path.exists(source):
        shutil.copy(source, target_path)
        print(f"✓ Copied {os.path.basename(source)}")
    else:
        print(f"✗ Not found: {source}")
        print(f"  Please upload to Google Drive and update the path above")


## 3. Alternative: Upload Files Directly (for smaller files)


In [None]:
# If you prefer direct upload (for smaller files only)
from google.colab import files
import shutil

print("Upload the trained autoencoder weights (ae_weights.pth)")
uploaded = files.upload()

for filename in uploaded.keys():
    if filename.endswith('.pth'):
        shutil.move(filename, 'ML_Project/saved_models/ae_weights.pth')
        print(f"✓ Moved {filename} to ML_Project/saved_models/")
    elif filename.endswith('.py'):
        if 'autoencoder' in filename:
            shutil.move(filename, 'ML_Project/src/models/autoencoder.py')
        elif 'classifier' in filename or 'train' in filename:
            shutil.move(filename, 'ML_Project/src/train_classifier.py')
        print(f"✓ Moved {filename}")


## 4. Verify Setup


In [None]:
# Check required files
required_files = [
    'ML_Project/src/models/autoencoder.py',
    'ML_Project/src/train_classifier.py',
    'ML_Project/saved_models/ae_weights.pth'
]

print("Checking required files...")
all_ok = True
for file_path in required_files:
    if os.path.exists(file_path):
        file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
        print(f"✓ {file_path} ({file_size:.2f} MB)")
    else:
        print(f"✗ {file_path} - MISSING!")
        all_ok = False

if all_ok:
    print("\n✓ All files are ready!")
else:
    print("\n✗ Please upload the missing files!")


## 5. Train One-Class Classifiers


In [None]:
# Change to project directory
%cd ML_Project

# Add src to Python path
import sys
sys.path.insert(0, 'src')

# Import training components
from train_classifier import OneClassTrainer, CONFIG
import torch
import numpy as np
from torch.utils.data import DataLoader, TensorDataset

# Import autoencoder model
try:
    from models.autoencoder import RFAutoencoder
    print("✓ Imported RFAutoencoder from models.autoencoder")
except ImportError:
    # Fallback: use mock from train_classifier
    from train_classifier import RFAutoencoder
    print("✓ Using RFAutoencoder from train_classifier (mock)")

print("Setup complete. Starting Phase 2 training...")


In [None]:
# Load trained autoencoder from Phase 1
print("Loading trained autoencoder...")
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize model - check which RFAutoencoder was imported
try:
    # Try Phase 1 architecture (from models.autoencoder)
    model = RFAutoencoder(input_channels=2, seq_len=128, latent_dim=CONFIG['latent_dim']).to(device)
    print("Using Phase 1 autoencoder architecture (Conv1D)")
    
    # Load weights
    try:
        state_dict = torch.load('saved_models/ae_weights.pth', map_location=device)
        model.load_state_dict(state_dict)
        model.eval()
        print("✓ Autoencoder loaded successfully from Phase 1!")
    except Exception as e:
        print(f"⚠ Warning: Could not load autoencoder weights: {e}")
        print("Will use untrained model")
        
except TypeError:
    # Fallback to mock autoencoder (from train_classifier)
    model = RFAutoencoder(input_len=256, latent_dim=CONFIG['latent_dim']).to(device)
    print("Using mock autoencoder architecture (Linear layers)")
    
    # Try to load weights anyway
    try:
        state_dict = torch.load('saved_models/ae_weights.pth', map_location=device)
        model.load_state_dict(state_dict, strict=False)
        model.eval()
        print("✓ Loaded compatible weights (some layers may not match)")
    except Exception as e:
        print(f"⚠ Using untrained mock autoencoder: {e}")


In [None]:
# Prepare data based on which autoencoder we're using
# Check if model expects (B, 2, 128) or (B, 256) input

try:
    # Try to load processed data
    data = np.load('data/processed/radioml_2018_processed.npz')
    X_train = torch.FloatTensor(data['X_train'])
    print(f"✓ Loaded processed data: {X_train.shape}")
    
    # Check model architecture to determine input format
    # Phase 1 model expects (N, 2, 128), mock expects (N, 256)
    if hasattr(model, 'enc_conv1'):
        # Phase 1 Conv1D model - keep as (N, 2, 128)
        if len(X_train.shape) == 2:
            # Reshape from (N, 256) to (N, 2, 128)
            X_train = X_train.view(-1, 2, 128)
        print(f"Data shape for Conv1D model: {X_train.shape}")
    else:
        # Mock Linear model - flatten to (N, 256)
        if len(X_train.shape) == 3:
            X_train = X_train.view(X_train.size(0), -1)
        print(f"Data shape for Linear model: {X_train.shape}")
    
except FileNotFoundError:
    # Generate synthetic data for demonstration
    print("Processed data not found. Generating synthetic data...")
    samples = 5000
    
    if hasattr(model, 'enc_conv1'):
        # Phase 1 model: (N, 2, 128)
        X_train = torch.randn(samples, 2, 128).float()
    else:
        # Mock model: (N, 256)
        X_train = torch.randn(samples, 256).float()
    
    print(f"Generated synthetic data: {X_train.shape}")

# Create DataLoader
dataset = TensorDataset(X_train)
train_loader = DataLoader(dataset, batch_size=CONFIG['batch_size'], shuffle=True)
print(f"DataLoader created with batch_size={CONFIG['batch_size']}")


In [None]:
# Initialize trainer and train
print("\n" + "="*50)
print("Starting One-Class Classifier Training")
print("="*50)

trainer = OneClassTrainer(model)
trainer.train(train_loader)

print("\n" + "="*50)
print("Training Complete!")
print("="*50)


In [None]:
# Save trained models
print("\nSaving models...")
trainer.save_state()
print("✓ Models saved to saved_models/classifier_model.joblib")


## 6. Download Trained Models


In [None]:
# Download trained classifier models
from google.colab import files

if os.path.exists('saved_models/classifier_model.joblib'):
    files.download('saved_models/classifier_model.joblib')
    print("✓ Classifier model downloaded!")

# Download visualizations if available
if os.path.exists('visualizations/evt_distribution.png'):
    files.download('visualizations/evt_distribution.png')
    print("✓ Visualization downloaded!")


## 7. Verify Model Loading (Optional Test)


In [None]:
# Test loading the saved model
from train_classifier import OneClassTrainer

print("Testing model loading...")
try:
    classifier_model = OneClassTrainer.load_state()
    print("✓ Model loaded successfully!")
    print(f"Model contains: {list(classifier_model.keys())}")
except Exception as e:
    print(f"✗ Error loading model: {e}")


## Tips for Phase 2 Training

1. **GPU Usage**: Phase 2 is less GPU-intensive than Phase 1, but GPU still helps with autoencoder inference
2. **Training Time**: Classifier training is typically faster (minutes vs hours)
3. **Models Saved**: All classifiers (OC-SVM, ECOD, EVT) are saved in one `classifier_model.joblib` file
4. **Visualizations**: Check `visualizations/evt_distribution.png` for error distribution plot
5. **Data**: Can use real processed data or synthetic data for testing
6. **Memory**: Phase 2 uses less memory than Phase 1, so batch_size can be larger if needed
