In [1]:
import sys
import os

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim

GRID_SEARCH = False



In [2]:
import torchinfo
import pytorchcv
import torchvision
from torch.nn.functional import one_hot

In [3]:
import json
import pickle
import itertools
import tqdm

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('seaborn-v0_8') # pretty matplotlib plots
sns.set('notebook', style='whitegrid', font_scale=1.25)

In [4]:
# print(torch.cuda.is_available())
# print(torch.cuda.device_count())
# print(torch.cuda.get_device_name(0))

In [5]:
# device = 'cuda' # TODO change to GPU if you have one (e.g. on Colab)
device = 'cuda'
# if torch.cuda.is_available():
#     device = 'cuda'
# else:
#     device = 'cpu'

host = 'hpc'

if host == 'hpc':
    DATA_DIR = os.environ.get('DATA_DIR', os.path.abspath('../l3d_pn_dataset1000'))
else:
    if os.name == 'nt':
        # DATA_DIR = os.environ.get('DATA_DIR', os.path.abspath("C:\\Users\\arman\\Downloads\\L3D_Project\\l3d_pn_dataset1000"))
        DATA_DIR = os.environ.get('DATA_DIR', os.path.abspath("C:\\Users\\arman\\Downloads\\L3D_Project\\l3d_pn_dataset500"))
        # DATA_DIR = os.environ.get('DATA_DIR', os.path.abspath("C:\\Users\\arman\\Downloads\\L3D_Project\\Quotient-train\\"))
    else:
        DATA_DIR = os.environ.get('DATA_DIR', os.path.abspath('./l3d_pn_dataset1000'))

print(DATA_DIR)


/Users/joseph280996/Code/School/L3D/Project/l3d_pn_dataset1000


In [6]:
!ls $DATA_DIR/train/

[34mFalse_PN[m[m [34mTrue_PN[m[m


In [7]:
%load_ext autoreload
%autoreload 2

In [8]:
# Import utils from provided local starter code files
import data_utils
import data_utils_pseudo
import data_utils_pseudo_2
import models
import train

In [9]:
def eval_acc(model, device, test_loader):
    """
    Evaluate accuracy of a model on the test loader.
    Args:
        model: PyTorch model
        device: 'cuda' or 'cpu'
        test_loader: DataLoader for test data

    Returns:
        Accuracy as a float
    """
    model.to(device)
    model.eval()
    correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device), y.to(device)
            outputs = model(x)
            _, predicted = torch.max(outputs, 1)  # Get predicted class
            correct += (predicted == y).sum().item()  # Count correct predictions
    return correct / len(test_loader.dataset)

In [10]:
def plot_training_progress(best_info):
    """
    Plot training and validation progress.
    Args:
        best_info: Dictionary containing 'epochs', 'tr' (training), and 'va' (validation) metrics.
    """
    plt.figure(figsize=(10, 6))

    # Training metrics
    plt.plot(best_info['epochs'], best_info['tr']['loss'], '--', color='b', label='Training Loss')
    plt.plot(best_info['epochs'], best_info['tr']['err'], '-', color='b', label='Training Error')

    # Validation metrics
    plt.plot(best_info['epochs'], best_info['va']['xent'], '--', color='r', label='Validation Loss')
    plt.plot(best_info['epochs'], best_info['va']['err'], '-', color='r', label='Validation Error')

    plt.xlabel('Epochs')
    plt.ylabel('Metrics')
    plt.title('Training and Validation Progress')
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:
if not GRID_SEARCH:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torchvision.models as models
    import torchvision.transforms as transforms
    import matplotlib.pyplot as plt
    import torch.nn.functional as F
    
    # MixMatch hyperparameters
    mixup_alpha = 0.75
    temperature = 0.5
    lambda_u = 10
    num_classes = 2  # Adjust to your dataset
    
    # Define sharpening function
    def sharpen(probabilities, T):
        return torch.pow(probabilities, 1 / T) / torch.sum(torch.pow(probabilities, 1 / T), dim=1, keepdim=True)
    
    # Define mixup function
    def mixup(x1, y1, x2, y2, alpha):
        lam = torch.distributions.Beta(alpha, alpha).sample().item()
        lam = max(lam, 1 - lam)
        x_mix = lam * x1 + (1 - lam) * x2
        y_mix = lam * y1 + (1 - lam) * y2
        return x_mix, y_mix
    
    class ResNetFeatureExtractor(nn.Module):
        def __init__(self, num_classes, dropout_rate=0.3):
            super(ResNetFeatureExtractor, self).__init__()
            # Use a deeper ResNet architecture
            resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
            
            # Remove final layers
            self.feature_extractor = nn.Sequential(*list(resnet.children())[:-2])
            
            # Add custom classification head
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = nn.Dropout(dropout_rate)
            
            # Two fully connected layers with batch norm
            self.fc1 = nn.Linear(2048, 512)
            self.bn1 = nn.BatchNorm1d(512)
            self.fc2 = nn.Linear(512, num_classes)
            
        def forward(self, x):
            features = self.feature_extractor(x)
            features = self.avgpool(features)
            features = features.view(features.size(0), -1)
            
            features = self.dropout(features)
            features = F.relu(self.bn1(self.fc1(features)))
            features = self.dropout(features)
            out = self.fc2(features)
            return out
    
    
    root_path = DATA_DIR
    tr_loader, unloader1, unloader2, va_loader, test_loader  = data_utils_pseudo.make_PN_data_loaders_with_unlabeled(
        root=root_path,
        batch_size=32,
        frac_valid=50/850
    )
    
    # Model, optimizer, and criterion
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = ResNetFeatureExtractor(num_classes=num_classes).to(device)
    
    
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    criterion = nn.CrossEntropyLoss()
    
    # Data collection for training visualization
    best_info = {
        'epochs': [],
        'tr': {'loss': [], 'err': []},
        'va': {'xent': [], 'err': []},
    }
    
    # Training loop
    num_epochs = 50
    best_val_error = float('inf')
    patience = 10
    patience_counter = 0
    results = []
    
    for epoch in range(1, num_epochs + 1):
        model.train()
        total_loss = 0
        correct = 0
        total_samples = 0
    
        for (inputs_x, targets_x), inputs_u1, inputs_u2 in zip(tr_loader, unloader1, unloader2):
            # Transfer to device
            inputs_x, targets_x = inputs_x.to(device), targets_x.to(device)
            inputs_u1 = inputs_u1[0].to(device)  # Unpack inputs from unloader's batch
            inputs_u2 = inputs_u2[0].to(device)  # Unpack inputs from unloader's batch
    
            # Forward pass for unlabeled data
            with torch.no_grad():
                outputs_u1 = model(inputs_u1)
                outputs_u2 = model(inputs_u2)
                pseudo_labels1 = torch.softmax(outputs_u1, dim=1)
                pseudo_labels2 = torch.softmax(outputs_u2, dim=1)
                p = (pseudo_labels1 + pseudo_labels2) / 2
                pseudo_labels= sharpen(p, temperature)
    
            # Combine labeled and pseudo-labeled data
            all_inputs = torch.cat([inputs_x, inputs_u1, inputs_u2], dim=0)
            all_labels = torch.cat([
                torch.nn.functional.one_hot(targets_x, num_classes).float(),
                pseudo_labels,
                pseudo_labels,
            ], dim=0)
            inputs_mixed, labels_mixed = mixup(all_inputs, all_labels, all_inputs, all_labels, mixup_alpha)

            
            # Forward pass
            outputs = model(inputs_mixed)
            supervised_loss = criterion(outputs[:len(targets_x)], targets_x)
            # unsupervised_loss = -(labels_mixed[len(targets_x):] * torch.log_softmax(outputs[len(targets_x):], dim=1)).sum(dim=1).mean()
            unsupervised_loss = F.mse_loss(outputs[len(targets_x):], labels_mixed[len(targets_x):])
            loss = supervised_loss + lambda_u * unsupervised_loss
    
            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
    
            # Track training metrics
            total_loss += loss.item() * inputs_x.size(0)
            _, predicted = outputs[:len(targets_x)].max(1)
            correct += predicted.eq(targets_x).sum().item()
            total_samples += inputs_x.size(0)
    
        # Training metrics
        train_loss = total_loss / total_samples
        train_err = 1 - correct / total_samples
        best_info['tr']['loss'].append(train_loss)
        best_info['tr']['err'].append(train_err)
    
        # Validation phase
        model.eval()
        val_loss = 0
        val_correct = 0
        val_samples = 0
        with torch.no_grad():
            for inputs_val, targets_val in va_loader:
                inputs_val, targets_val = inputs_val.to(device), targets_val.to(device)
                outputs_val = model(inputs_val)
                loss_val = criterion(outputs_val, targets_val)
    
                val_loss += loss_val.item() * inputs_val.size(0)
                _, predicted_val = outputs_val.max(1)
                val_correct += predicted_val.eq(targets_val).sum().item()
                val_samples += inputs_val.size(0)
    
    
        
        # Validation metrics
        val_xent = val_loss / val_samples
        val_err = 1 - val_correct / val_samples
        val_acc = val_correct / val_samples
        best_info['va']['xent'].append(val_xent)
        best_info['va']['err'].append(val_err)
    
        # Save results for this epoch
        results.append({
            "epoch": epoch,
            "val_acc": val_acc,
            "val_err": val_err,
            "config": {"learning_rate": optimizer.param_groups[0]['lr'], "mixup_alpha": mixup_alpha, "lambda_u": lambda_u},
            "model": model.state_dict()  # Save model state dict
        })
        
        
        # Track epochs
        best_info['epochs'].append(epoch)
    
            # Early stopping logic
        if val_err < best_val_error:
            best_val_error = val_err
            patience_counter = 0
            # Save best model
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after epoch {epoch}")
                break
    
        # Logging
        print(f"Epoch {epoch}/{num_epochs}")
        print(f"  Train Loss: {train_loss:.4f}, Train Error: {train_err:.4f}")
        print(f"  Val Loss: {val_xent:.4f}, Val Error: {val_err:.4f}")
    
    # Plotting
    plt.plot(best_info['epochs'], best_info['tr']['loss'], '--', color='b', label='Train Loss')
    plt.plot(best_info['epochs'], best_info['tr']['err'], '-', color='b', label='Train Error')
    plt.plot(best_info['epochs'], best_info['va']['xent'], '--', color='r', label='Validation Xent')
    plt.plot(best_info['epochs'], best_info['va']['err'], '-', color='r', label='Validation Error')
    plt.legend()
    plt.show()


       splitname   0   1
   train_labeled 223 177
train_unlabeled1 225 175
train_unlabeled2 225 175
           valid  25  25
            test  25  25
Epoch 1/50
  Train Loss: 3.6410, Train Error: 0.4475
  Val Loss: 0.7258, Val Error: 0.5400


KeyboardInterrupt: 

In [None]:
if not GRID_SEARCH:
    # Sort results by validation accuracy
    results = sorted(results, key=lambda x: x["val_acc"], reverse=True)
    
    # Display top results
    print("\nTop 5 Results:")
    for res in results[:5]:
        print(f"Epoch: {res['epoch']}, Config: {res['config']}, Val Accuracy: {res['val_acc']:.4f}")
    
    # Retrieve the best configuration and model
    best_result = results[0]
    best_model_state = best_result["model"]
    
    # Load the best model if needed
    model.load_state_dict(best_model_state)

In [None]:
if not GRID_SEARCH:
    # plt.figure(figsize=(4, 4))  # Set the dimensions of the figure to 4x4 inches
    plt.plot(best_info['epochs'], best_info['tr']['loss'], '--', color='b', label='Train Loss')
    plt.plot(best_info['epochs'], best_info['tr']['err'], '-', color='b', label='Train Error')
    plt.plot(best_info['epochs'], best_info['va']['xent'], '--', color='r', label='Validation Xent')
    plt.plot(best_info['epochs'], best_info['va']['err'], '-', color='r', label='Validation Error')
    plt.title('MixMatch SSL method')
    # Add axis labels
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    
    # Add legend and display the plot
    plt.legend()
    plt.show()

In [None]:
if not GRID_SEARCH:
    tar_acc = {}
    tar_acc[('ResNet50', 'ImageNet1k')] = eval_acc(model, device, test_loader)
    print(tar_acc)

    test_accuracy = eval_acc(model, device, test_loader)
    print(f"Test Accuracy: {test_accuracy:.4f}")

In [None]:
if GRID_SEARCH:
    import itertools
    import torch
    import torch.nn as nn
    import torch.optim as optim
    import torchvision.models as models
    import torchvision.transforms as transforms
    import matplotlib.pyplot as plt
    import torch.nn.functional as F
    
    # MixMatch hyperparameters
    mixup_alpha = 0.75
    temperature = 0.5
    lambda_u = 10
    num_classes = 2  # Adjust to your dataset

    # Data collection for training visualization
    best_info = {
        'epochs': [],
        'tr': {'loss': [], 'err': []},
        'va': {'xent': [], 'err': []},
    }
    
    # Training loop
    num_epochs = 50
    best_val_error = float('inf')
    patience = 10
    patience_counter = 0
    results = []
    
    # Define sharpening function
    def sharpen(probabilities, T):
        return torch.pow(probabilities, 1 / T) / torch.sum(torch.pow(probabilities, 1 / T), dim=1, keepdim=True)
    
    # Define mixup function
    def mixup(x1, y1, x2, y2, alpha):
        lam = torch.distributions.Beta(alpha, alpha).sample().item()
        lam = max(lam, 1 - lam)
        x_mix = lam * x1 + (1 - lam) * x2
        y_mix = lam * y1 + (1 - lam) * y2
        return x_mix, y_mix
    
    class ResNetFeatureExtractor(nn.Module):
        def __init__(self, num_classes, dropout_rate=0.3):
            super(ResNetFeatureExtractor, self).__init__()
            # Use a deeper ResNet architecture
            resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
            
            # Remove final layers
            self.feature_extractor = nn.Sequential(*list(resnet.children())[:-2])
            
            # Add custom classification head
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.dropout = nn.Dropout(dropout_rate)
            
            # Two fully connected layers with batch norm
            self.fc1 = nn.Linear(2048, 512)
            self.bn1 = nn.BatchNorm1d(512)
            self.fc2 = nn.Linear(512, num_classes)
            
        def forward(self, x):
            features = self.feature_extractor(x)
            features = self.avgpool(features)
            features = features.view(features.size(0), -1)
            
            features = self.dropout(features)
            features = F.relu(self.bn1(self.fc1(features)))
            features = self.dropout(features)
            out = self.fc2(features)
            return out
    
    
    root_path = DATA_DIR
    tr_loader, unloader, va_loader, test_loader  = data_utils_pseudo.make_PN_data_loaders_with_unlabeled(
        root=root_path,
        batch_size=32,
        n_samples_per_class_trainandvalid=500
    )
    
    # Model, optimizer, and criterion
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = ResNetFeatureExtractor(num_classes=num_classes).to(device)

    # Define the grid search function
    def train_and_evaluate(lr, lambda_u, temperature, mixup_alpha, num_epochs=50):
        # Model, optimizer, and criterion
        model = ResNetFeatureExtractor(num_classes=num_classes).to(device)
        optimizer = optim.SGD(
            model.parameters(),
            lr=lr,
            momentum=0.9,
            weight_decay=5e-4
        )
        criterion = nn.CrossEntropyLoss()
    
        # Training loop
        best_val_error = float('inf')
        patience_counter = 0
        results = []
    
        for epoch in range(1, num_epochs + 1):
            model.train()
            total_loss = 0
            correct = 0
            total_samples = 0
    
            for (inputs_x, targets_x), inputs_u in zip(tr_loader, unloader):
                # Transfer to device
                inputs_x, targets_x = inputs_x.to(device), targets_x.to(device)
                inputs_u = inputs_u[0].to(device)
    
                # Forward pass for unlabeled data
                with torch.no_grad():
                    outputs_u = model(inputs_u)
                    pseudo_labels = torch.softmax(outputs_u, dim=1)
                    pseudo_labels = sharpen(pseudo_labels, temperature)
    
                # Combine labeled and pseudo-labeled data
                all_inputs = torch.cat([inputs_x, inputs_u], dim=0)
                all_labels = torch.cat([
                    torch.nn.functional.one_hot(targets_x, num_classes).float(),
                    pseudo_labels
                ], dim=0)
                inputs_mixed, labels_mixed = mixup(all_inputs, all_labels, all_inputs, all_labels, mixup_alpha)
    
                # Forward pass
                outputs = model(inputs_mixed)

                supervised_loss = criterion(outputs[:len(targets_x)], targets_x)
    
                # Compute unsupervised loss (set reduction='sum' and normalize manually for per-sample loss)
                unsupervised_loss = F.mse_loss(
                    outputs[len(targets_x):], labels_mixed[len(targets_x):])
                
                # Combine supervised and unsupervised losses
                loss = supervised_loss + lambda_u * unsupervised_loss
                
                # Backward pass and optimization
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                # Track training metrics
                total_loss += loss.item() * len(targets_x)  # Accumulate total loss, scaled by batch size
                total_samples += inputs_x.size(0) + inputs_u.size(0)  # Track total number of samples
                _, predicted = outputs[:len(targets_x)].max(1)
                correct += predicted.eq(targets_x).sum().item()
        
            # Training metrics
            train_loss = total_loss / total_samples
            train_err = 1 - correct / total_samples
            best_info['tr']['loss'].append(train_loss)
            best_info['tr']['err'].append(train_err)
        
            # Validation phase
            model.eval()
            val_loss = 0
            val_correct = 0
            val_samples = 0
            
            with torch.no_grad():
                for inputs_val, targets_val in va_loader:
                    inputs_val, targets_val = inputs_val.to(device), targets_val.to(device)
            
                    # Forward pass
                    outputs_val = model(inputs_val)
            
                    # Compute validation loss (per-sample average)
                    loss_val = criterion(outputs_val, targets_val)  # CrossEntropyLoss defaults to mean
                    val_loss += loss_val.item() * inputs_val.size(0)  # Scale by batch size to get total loss for this batch
            
                    # Compute accuracy
                    _, predicted_val = outputs_val.max(1)
                    val_correct += predicted_val.eq(targets_val).sum().item()
                    val_samples += inputs_val.size(0)
            
            # Compute per-sample average loss across the entire validation set
            val_xent = val_loss / val_samples
            val_err = 1 - val_correct / val_samples
            val_acc = val_correct / val_samples
    
            # Early stopping logic
            if val_err < best_val_error:
                best_val_error = val_err
                patience_counter = 0
                # Save best model
                torch.save(model.state_dict(), 'best_model.pth')
            else:
                patience_counter += 1
                if patience_counter >= patience:
                    print(f"Early stopping triggered after epoch {epoch}")
                    break
    
            # Logging
            print(f"Epoch {epoch}/{num_epochs}")
            print(f"  Train Loss: {total_loss / total_samples:.4f}, Train Error: {1 - correct / total_samples:.4f}")
            print(f"  Val Loss: {val_xent:.4f}, Val Error: {val_err:.4f}")
    
        return val_err, model.state_dict()
    
    # Define hyperparameter grid
    param_grid = {
        "lr": [0.0001, 0.00001],
        "lambda_u": [10, 30, 50, 80],
        "temperature": [0.5, 1.0],
        "mixup_alpha": [0.5, 0.75, 1.0]
    }
    
    # Perform grid search
    best_model_state = None
    best_hyperparams = None
    best_val_error = float('inf')
    
    for lr, lambda_u, temperature, mixup_alpha in itertools.product(
            param_grid["lr"], param_grid["lambda_u"], param_grid["temperature"], param_grid["mixup_alpha"]):
        print(f"Testing configuration: lr={lr}, lambda_u={lambda_u}, temperature={temperature}, mixup_alpha={mixup_alpha}")
        val_err, model_state = train_and_evaluate(lr, lambda_u, temperature, mixup_alpha)
    
        if val_err < best_val_error:
            best_val_error = val_err
            best_model_state = model_state
            best_hyperparams = {"lr": lr, "lambda_u": lambda_u, "temperature": temperature, "mixup_alpha": mixup_alpha}
    
    # Save the best model and hyperparameters
    torch.save(best_model_state, "best_model_grid_search.pth")
    print(f"Best validation error: {best_val_error}")
    print(f"Best hyperparameters: {best_hyperparams}")