In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.datasets import cifar10
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader


In [2]:
np.random.seed(42)
torch.manual_seed(42)


<torch._C.Generator at 0x20e212f8630>

In [3]:
import torch
print(torch.version.cuda)  # Should print a version number if CUDA is supported


None


In [4]:
# Load the CIFAR-10 dataset
(x_train_full, y_train_full), (x_test_full, y_test_full) = cifar10.load_data()

# Flatten label arrays
y_train_full = y_train_full.flatten()
y_test_full = y_test_full.flatten()

# Define the classes (cats and dogs)
class_map = {5: 'dog', 3: 'cat'}

# Filter training data
train_filter = np.isin(y_train_full, list(class_map.keys()))
x_train = x_train_full[train_filter]
y_train = y_train_full[train_filter]

# Filter test data
test_filter = np.isin(y_test_full, list(class_map.keys()))
x_test = x_test_full[test_filter]
y_test = y_test_full[test_filter]

# Map labels to +1 and -1
label_map = {5: 1, 3: -1}
y_train = np.vectorize(label_map.get)(y_train)
y_test = np.vectorize(label_map.get)(y_test)

# Flatten the images (32x32x3 -> 3072)
x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

# Convert to float32
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

# Scale the data
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [5]:
# Convert data to PyTorch tensors
x_train_tensor = torch.tensor(x_train)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
x_test_tensor = torch.tensor(x_test)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)


In [6]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(MLP, self).__init__()
        self.hidden_layer = nn.Linear(input_size, hidden_size)
        self.output_layer = nn.Linear(hidden_size, 1)
        
    def forward(self, x):
        x = torch.relu(self.hidden_layer(x))
        x = self.output_layer(x)
        return x

In [7]:
class HingeLoss(nn.Module):
    def __init__(self):
        super(HingeLoss, self).__init__()
    
    def forward(self, outputs, labels):
        outputs = outputs.view(-1)
        labels = labels.view(-1)
        loss = torch.mean(torch.clamp(1 - outputs * labels, min=0))
        return loss

In [None]:
def train_model(model, criterion, optimizer, train_loader, test_loader, num_epochs=30, device='cpu'):
    model.to(device)
    train_losses = []
    test_losses = []
    train_accuracies = []
    test_accuracies = []
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item() * inputs.size(0)
            predictions = torch.sign(outputs).view(-1)
            correct += (predictions == labels.view(-1)).sum().item()
            total += labels.size(0)
        
        avg_loss = total_loss / total
        accuracy = correct / total
        train_losses.append(avg_loss)
        train_accuracies.append(accuracy)
        
        # Evaluate on test data
        model.eval()
        total_loss_test = 0
        correct_test = 0
        total_test = 0
        
        with torch.no_grad():
            for inputs_test, labels_test in test_loader:
                inputs_test, labels_test = inputs_test.to(device), labels_test.to(device)
                outputs_test = model(inputs_test)
                loss_test = criterion(outputs_test, labels_test)
                total_loss_test += loss_test.item() * inputs_test.size(0)
                predictions_test = torch.sign(outputs_test).view(-1)
                correct_test += (predictions_test == labels_test.view(-1)).sum().item()
                total_test += labels_test.size(0)
        
        avg_loss_test = total_loss_test / total_test
        accuracy_test = correct_test / total_test
        test_losses.append(avg_loss_test)
        test_accuracies.append(accuracy_test)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_loss:.4f}, Train Acc: {accuracy*100:.2f}%, Test Loss: {avg_loss_test:.4f}, Test Acc: {accuracy_test*100:.2f}%")
    
    # Final evaluation
    model.eval()
    with torch.no_grad():
        train_outputs = model(x_train_tensor.to(device))
        train_predictions = torch.sign(train_outputs).view(-1)
        train_labels = y_train_tensor.view(-1).to(device)
        train_accuracy = (train_predictions == train_labels).sum().item() / train_labels.size(0)
        
        test_outputs = model(x_test_tensor.to(device))
        test_predictions = torch.sign(test_outputs).view(-1)
        test_labels = y_test_tensor.view(-1).to(device)
        test_accuracy = (test_predictions == test_labels).sum().item() / test_labels.size(0)
    
    print(f"\nFinal Training Accuracy: {train_accuracy * 100:.2f}%")
    print(f"Final Test Accuracy: {test_accuracy * 100:.2f}%")
    
    return {
        'model': model,
        'train_losses': train_losses,
        'test_losses': test_losses,
        'train_accuracies': train_accuracies,
        'test_accuracies': test_accuracies,
        'final_train_acc': train_accuracy,
        'final_test_acc': test_accuracy
    }

# Define hyperparameter space
hyperparameter_space = {
    'learning_rate': [ 0.01, 0.001, 0.0001],
    'hidden_size': [128, 256, 512],
    'batch_size': [32, 64, 128],
    'optimizer': ['SGD', 'Adam'],
    'num_epochs': [30]
}


In [None]:
import random
def random_search(hyperparameter_space, num_iterations=20, device='cpu'):
    results = []
    keys = list(hyperparameter_space.keys())
    
    for i in range(num_iterations):
        # Sample a random hyperparameter combination
        sampled_params = {k: random.choice(v) for k, v in hyperparameter_space.items()}
        print(f"\nRunning iteration {i+1}/{num_iterations} with parameters: {sampled_params}")
        
        # Unpack hyperparameters
        learning_rate = sampled_params['learning_rate']
        hidden_size = sampled_params['hidden_size']
        batch_size = sampled_params['batch_size']
        optimizer_type = sampled_params['optimizer']
        num_epochs = sampled_params['num_epochs']
        
        # Create DataLoaders with the current batch size
        train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
        test_dataset = TensorDataset(x_test_tensor, y_test_tensor)
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=batch_size)
        
        # Initialize model
        input_size = x_train_tensor.shape[1]
        model = MLP(input_size, hidden_size)
        
        # Define loss and optimizer
        criterion = HingeLoss()
        if optimizer_type == 'SGD':
            optimizer = optim.SGD(model.parameters(), lr=learning_rate)
        elif optimizer_type == 'Adam':
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        else:
            raise ValueError(f"Unsupported optimizer type: {optimizer_type}")
        
        # Train the model
        metrics = train_model(model, criterion, optimizer, train_loader, test_loader, num_epochs=num_epochs, device=device)
        
        # Save results
        results.append({
            'params': sampled_params,
            'final_train_acc': metrics['final_train_acc'],
            'final_test_acc': metrics['final_test_acc']
        })
    
    # Sort results by test accuracy
    results_sorted = sorted(results, key=lambda x: x['final_test_acc'], reverse=True)
    return results_sorted

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Perform random search
best_results = random_search(hyperparameter_space, num_iterations=20, device=device)

# Display top 5 results
print("\nTop 5 Hyperparameter Combinations:")
for i, res in enumerate(best_results[:5], 1):
    print(f"Rank {i}: Test Accuracy: {res['final_test_acc']*100:.2f}% with parameters: {res['params']}")

Using device: cpu

Running iteration 1/20 with parameters: {'learning_rate': 0.01, 'hidden_size': 256, 'batch_size': 128, 'optimizer': 'SGD', 'num_epochs': 30}
Epoch 1/30, Train Loss: 0.8837, Train Acc: 59.15%, Test Loss: 0.8558, Test Acc: 60.65%
Epoch 2/30, Train Loss: 0.8286, Train Acc: 62.21%, Test Loss: 0.8451, Test Acc: 61.55%
Epoch 3/30, Train Loss: 0.8051, Train Acc: 63.32%, Test Loss: 0.8169, Test Acc: 63.70%
Epoch 4/30, Train Loss: 0.7812, Train Acc: 65.12%, Test Loss: 0.8076, Test Acc: 64.40%
Epoch 5/30, Train Loss: 0.7607, Train Acc: 66.35%, Test Loss: 0.8387, Test Acc: 62.00%
Epoch 6/30, Train Loss: 0.7439, Train Acc: 67.23%, Test Loss: 0.7982, Test Acc: 64.95%
Epoch 7/30, Train Loss: 0.7216, Train Acc: 68.95%, Test Loss: 0.8417, Test Acc: 61.75%
Epoch 8/30, Train Loss: 0.7051, Train Acc: 69.59%, Test Loss: 0.8135, Test Acc: 63.60%
Epoch 9/30, Train Loss: 0.6858, Train Acc: 70.34%, Test Loss: 0.8120, Test Acc: 63.90%
Epoch 10/30, Train Loss: 0.6666, Train Acc: 71.43%, Test 