Imports


In [68]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import time

Dataset loading

In [69]:
def load_or_create_dataset(filepath='dataset.csv', create_new=False, n_samples=1000, n_features=20):
    if create_new:
        data = np.random.rand(n_samples, n_features)
        labels = np.random.randint(0, 2, n_samples)
        dataset = pd.DataFrame(data)
        dataset['label'] = labels
        dataset.to_csv(filepath, index=False)
    else:
        dataset = pd.read_csv(filepath)
    
    return dataset

# Load or create the dataset
dataset = load_or_create_dataset(create_new=False)
dataset = dataset.apply(pd.to_numeric, errors='coerce')
dataset = dataset.fillna(0)

#change the column name of the 'tip' column to 'label'
label_name = 'tip'
dataset.rename(columns={label_name:'label'}, inplace=True)

dataset.head()

Unnamed: 0,total_bill,label,sex,smoker,day,time,size
0,16.99,1.01,0.0,0.0,0.0,0.0,2
1,10.34,1.66,0.0,0.0,0.0,0.0,3
2,21.01,3.5,0.0,0.0,0.0,0.0,3
3,23.68,3.31,0.0,0.0,0.0,0.0,2
4,24.59,3.61,0.0,0.0,0.0,0.0,4


Clustering Phase:

Cluster the dataset into different clusters
TODO: identify 3-5 different clustering methods to use
TODO: identify visualization and statistics relevant to the method to display (clustering index, cluster weight etc)

In [None]:
def cluster_dataset(dataset, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters)
    features = dataset.drop('tip', axis=1).values
    clusters = kmeans.fit_predict(features)
    dataset['cluster'] = clusters
    
    return dataset, clusters

# Cluster the dataset
n_clusters = 5
dataset, clusters = cluster_dataset(dataset, n_clusters=n_clusters)
dataset.head()

#graph the clusters
import matplotlib.pyplot as plt
import seaborn as sns

sns.countplot(x='cluster', data=dataset)
plt.show()



Model and Optimizer Definitions

In [None]:
#Define the model architecture
class CustomModel(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size):
        super(CustomModel, self).__init__()
        layers = []
        in_size = input_size
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(in_size, hidden_size))
            layers.append(nn.ReLU())
            in_size = hidden_size
        layers.append(nn.Linear(in_size, output_size))
        self.model = nn.Sequential(*layers)
    
    def forward(self, x):
        return self.model(x)
    
#Define the custom optimizer
class COSGD(optim.Optimizer):
    def __init__(self, params, lr=1e-3):
        defaults = dict(lr=lr)
        super(COSGD, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                p.data.add_(-group['lr'], d_p)
        
        return loss

# Orthogonalize gradients using Gram-Schmidt process
def gram_schmidt(gradients):
    orthogonalized = []
    for g in gradients:
        w = g.clone()
        for og in orthogonalized:
            w -= torch.dot(w, og) * og
        w /= torch.norm(w)
        orthogonalized.append(w)
    return orthogonalized

Control Model

In [None]:
# Define the training loop for the control model (standard)
def train_model(model, optimizer, dataloader, loss_function, epochs=10):
    criterion = loss_function
    model.train()
    
    for epoch in range(epochs):
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
    return model


#prepare data loader for the control model
features = dataset.drop(['label', 'cluster'], axis=1).values
labels = dataset['label'].values
tensor_data = TensorDataset(torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32))
dataloader_control = DataLoader(tensor_data, batch_size=32, shuffle=True)


COSGD Model

In [None]:
# Prepare data loaders for each cluster for the COSGD optimizer and name them as such
dataloaders_cosgd = {}
for cluster in np.unique(clusters):
    cluster_data = dataset[dataset['cluster'] == cluster]
    features = cluster_data.drop(['tip', 'cluster'], axis=1).values
    labels = cluster_data['tip'].values
    tensor_data = TensorDataset(torch.tensor(features, dtype=torch.float32), torch.tensor(labels, dtype=torch.float32))
    dataloaders_cosgd[cluster] = DataLoader(tensor_data, batch_size=32, shuffle=True)

#create a function to train the model using COSGD
def train_model_COSGD(model, dataloaders, loss_function, epochs=10):
    criterion = loss_function
    model.train()
    
    for epoch in range(epochs):
        gradient_updates = []
        for cluster, dataloader in dataloaders.items():
            for inputs, labels in dataloader:
                optimizer = COSGD(model.parameters())
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                gradients = [p.grad.data for p in model.parameters()]
                gradient_updates.append(gradients)
            
        #we have our gradients
        #todo sort the gradients on average absolute value descending
        orthogonalized_gradients = gram_schmidt(gradient_updates)
        #now we update the model with the orthogonalized gradients
        #Todo write this code
                
    return model


Define the models

In [None]:
# Define model architectures
input_size = dataset.shape[1] - 2  # excluding label and cluster columns
hidden_layers = [6]
output_size = 1
model_control = CustomModel(input_size, hidden_layers, output_size)
model_cosgd = CustomModel(input_size, hidden_layers, output_size)

# Define optimizers
optimizer_control = optim.SGD(model_control.parameters(), lr=0.01)
optimizer_cosgd = COSGD(model_cosgd.parameters(), lr=0.01)

Train the models

In [None]:
# Train models
start_time_control = time.time()
model_control = train_model(model_control, optimizer_control, dataloader_control, epochs=10)
end_time_control = time.time()

start_time_cosgd = time.time()
model_cosgd = train_model_COSGD(model_cosgd, optimizer_cosgd, dataloader_cosgd, epochs=10)
end_time_cosgd = time.time()

# Print results
print(f"SGD Training Time: {end_time_sgd - start_time_sgd:.2f} seconds")
print(f"COSGD Training Time: {end_time_cosgd - start_time_cosgd:.2f} seconds")

Analyse the models performance
TODO - decide on metrics to use 
