# Model Experiment

This notebook contains experiments for training and evaluating models.


In [None]:
# Import standard libraries
import sys
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
{% if cookiecutter.enable_mlflow_tracking == 'y' or cookiecutter.enable_mlflow_tracking == 'yes' %}
import mlflow
import mlflow.pytorch
{% endif %}

# Set up paths
project_root = Path().resolve().parent.parent.parent
sys.path.insert(0, str(project_root))

# Data paths
data_dir = project_root / "science" / "data"
raw_data_dir = data_dir / "raw"
processed_data_dir = data_dir / "processed"
interim_data_dir = data_dir / "interim"
external_data_dir = data_dir / "external"
output_data_dir = data_dir / "output"

# Models path
models_dir = project_root / "science" / "models"
models_dir.mkdir(parents=True, exist_ok=True)

# Import project package
from {{ cookiecutter.python_package }}.vision import load_image, preprocess_image, predict_simple
from {{ cookiecutter.python_package }}.utils import load_state, save_state

{% if cookiecutter.enable_mlflow_tracking == 'y' or cookiecutter.enable_mlflow_tracking == 'yes' %}
# MLflow setup
mlflow.set_tracking_uri("{{ cookiecutter.mlflow_backend_store }}")
mlflow.set_experiment("{{ cookiecutter.project_name }}")
{% endif %}

print(f"Project root: {project_root}")
print(f"Processed data directory: {processed_data_dir}")
print(f"Models directory: {models_dir}")
print(f"Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")
{% if cookiecutter.enable_mlflow_tracking == 'y' or cookiecutter.enable_mlflow_tracking == 'yes' %}
print(f"MLflow tracking URI: {mlflow.get_tracking_uri()}")
{% endif %}


## Load Processed Data


In [None]:
# Example: Load processed data
# Modify based on your data format

# if processed_data_dir.exists():
#     # Load your processed data here
#     # For example, if you have numpy arrays:
#     # data = np.load(processed_data_dir / "train_data.npy")
#     # labels = np.load(processed_data_dir / "train_labels.npy")
#     print("Load your processed data here")
# else:
#     print(f"Processed data directory does not exist: {processed_data_dir}")
#     print("Please process your raw data first")


## Define Model


In [None]:
# Example: Define a simple model
# Replace with your actual model architecture

# class SimpleModel(nn.Module):
#     def __init__(self, num_classes=10):
#         super().__init__()
#         self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
#         self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
#         self.pool = nn.AdaptiveAvgPool2d(1)
#         self.fc = nn.Linear(64, num_classes)
#     
#     def forward(self, x):
#         x = torch.relu(self.conv1(x))
#         x = torch.relu(self.conv2(x))
#         x = self.pool(x)
#         x = x.view(x.size(0), -1)
#         x = self.fc(x)
#         return x

# model = SimpleModel(num_classes=10)
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = model.to(device)
# print(f"Model created and moved to {device}")


## Training Loop with MLflow Tracking


In [None]:
# Example: Training loop with MLflow tracking
# Uncomment and modify based on your needs

{% if cookiecutter.enable_mlflow_tracking == 'y' or cookiecutter.enable_mlflow_tracking == 'yes' %}
# Start MLflow run
with mlflow.start_run(run_name="model_experiment") as run:
    # Log hyperparameters
    num_epochs = 10
    learning_rate = 0.001
    batch_size = 32
    
    mlflow.log_params({
        "num_epochs": num_epochs,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "optimizer": "Adam",
        "device": str(device),
    })
    
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0.0
        correct = 0
        total = 0
        
        # Your training loop here
        # for batch_idx, (data, target) in enumerate(train_loader):
        #     data, target = data.to(device), target.to(device)
        #     optimizer.zero_grad()
        #     output = model(data)
        #     loss = criterion(output, target)
        #     loss.backward()
        #     optimizer.step()
        #     epoch_loss += loss.item()
        #     
        #     # Calculate accuracy
        #     _, predicted = torch.max(output.data, 1)
        #     total += target.size(0)
        #     correct += (predicted == target).sum().item()
        
        avg_loss = epoch_loss / len(train_loader) if len(train_loader) > 0 else 0.0
        accuracy = 100 * correct / total if total > 0 else 0.0
        train_losses.append(avg_loss)
        train_accs.append(accuracy)
        
        # Log metrics for each epoch
        mlflow.log_metrics({
            "train_loss": avg_loss,
            "train_accuracy": accuracy,
        }, step=epoch)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
        
        # Validation loop (if you have validation data)
        # model.eval()
        # val_loss = 0.0
        # val_correct = 0
        # val_total = 0
        # with torch.no_grad():
        #     for data, target in val_loader:
        #         data, target = data.to(device), target.to(device)
        #         output = model(data)
        #         val_loss += criterion(output, target).item()
        #         _, predicted = torch.max(output.data, 1)
        #         val_total += target.size(0)
        #         val_correct += (predicted == target).sum().item()
        # 
        # val_avg_loss = val_loss / len(val_loader)
        # val_accuracy = 100 * val_correct / val_total
        # val_losses.append(val_avg_loss)
        # val_accs.append(val_accuracy)
        # 
        # mlflow.log_metrics({
        #     "val_loss": val_avg_loss,
        #     "val_accuracy": val_accuracy,
        # }, step=epoch)
    
    # Log final metrics
    mlflow.log_metrics({
        "final_train_loss": train_losses[-1] if train_losses else 0.0,
        "final_train_accuracy": train_accs[-1] if train_accs else 0.0,
    })
    
    # Log model
    mlflow.pytorch.log_model(model, "model")
    
    print(f"MLflow run ID: {run.info.run_id}")
    print(f"View run at: {mlflow.get_tracking_uri()}")
{% else %}
# Training loop without MLflow
# num_epochs = 10
# learning_rate = 0.001
# 
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 
# train_losses = []
# 
# for epoch in range(num_epochs):
#     model.train()
#     epoch_loss = 0.0
#     
#     # Your training loop here
#     # for batch_idx, (data, target) in enumerate(train_loader):
#     #     data, target = data.to(device), target.to(device)
#     #     optimizer.zero_grad()
#     #     output = model(data)
#     #     loss = criterion(output, target)
#     #     loss.backward()
#     #     optimizer.step()
#     #     epoch_loss += loss.item()
#     
#     avg_loss = epoch_loss / len(train_loader) if len(train_loader) > 0 else 0.0
#     train_losses.append(avg_loss)
#     print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
{% endif %}


## Save Model and Log to MLflow


In [None]:
# Example: Save model checkpoint
# Uncomment when ready to save

# checkpoint = {
#     'model_state_dict': model.state_dict(),
#     'optimizer_state_dict': optimizer.state_dict(),
#     'epoch': num_epochs,
#     'loss': train_losses[-1] if train_losses else 0.0,
# }
# 
# checkpoint_path = save_state(
#     checkpoint,
#     models_dir,
#     prefix="model_experiment"
# )
# print(f"Model saved to: {checkpoint_path}")
# 
{% if cookiecutter.enable_mlflow_tracking == 'y' or cookiecutter.enable_mlflow_tracking == 'yes' %}
# Log checkpoint as artifact to MLflow
# mlflow.log_artifact(str(checkpoint_path), "checkpoints")
# print(f"Checkpoint logged to MLflow: {checkpoint_path.name}")
{% endif %}


## Load Saved Model


In [None]:
# Example: Load a saved model checkpoint
# Uncomment when you have a saved model

# if models_dir.exists():
#     checkpoint_files = list(models_dir.glob("*.pt"))
#     if checkpoint_files:
#         # Load the most recent checkpoint
#         latest_checkpoint = max(checkpoint_files, key=lambda p: p.stat().st_mtime)
#         print(f"Loading checkpoint: {latest_checkpoint}")
#         
#         checkpoint = load_state(latest_checkpoint)
#         model.load_state_dict(checkpoint['model_state_dict'])
#         print(f"Model loaded from epoch {checkpoint.get('epoch', 'unknown')}")
#     else:
#         print("No checkpoint files found in models directory")
# else:
#     print("Models directory does not exist")
