# Data Preparation
***
## Data Collection
***

In [None]:
import pandas as pd
import numpy as np

# Load RawData
rawData = pd.read_csv("../Data/Almond.csv")

***
## Data Vizualization
***

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

def plot_boxplot(column, data):
    plt.figure(figsize=(8, 6))
    sns.boxplot(x=data[column])
    plt.title(f'Boxplot of {column}')
    plt.show()


***
## Imputation
***

In [None]:
# Import Sklearn libraries
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Retrieve Length, Width and Thickness for imputation
# Aswell as Area
# rawData_no_outliers.reset_index(drop=True, inplace=True)
p_LWTA = rawData[['Length (major axis)','Width (minor axis)','Thickness (depth)','Area']].copy()
# Set Area to NaN where length is NaN
p_LWTA['Area'] = np.where(p_LWTA['Length (major axis)'].notna(),
                          p_LWTA['Area'],
                          np.nan)

# Use iterative imputation using sklearn
imputer = IterativeImputer(max_iter=10, random_state=0)
d_LWTA_imputed = pd.DataFrame(imputer.fit_transform(p_LWTA), columns=p_LWTA.columns)

# Calculate Roundness using the imputed Area when there is length
d_LWTA_imputed['Roundness'] = 4 * d_LWTA_imputed['Area'] / (np.pi * d_LWTA_imputed['Length (major axis)']**2)

# Remove irrelavent features
p_proc = rawData.drop(columns=['Id']).copy()
p_proc[['Length (major axis)','Width (minor axis)','Thickness (depth)','Roundness']] = d_LWTA_imputed[['Length (major axis)','Width (minor axis)','Thickness (depth)','Roundness']]
p_proc['Aspect Ratio'] = p_proc['Length (major axis)']/p_proc['Width (minor axis)']
p_proc['Eccentricity'] = (1 - (p_proc['Width (minor axis)']/p_proc['Length (major axis)'])**2) ** 0.5

***
## Handling Potential Bias
***

In [None]:
from imblearn.under_sampling import RandomUnderSampler

In [None]:
X = p_proc[['Length (major axis)', 'Width (minor axis)', 'Thickness (depth)', 'Area', 'Perimeter', 'Roundness', 'Solidity', 'Compactness', 'Aspect Ratio', 'Eccentricity', 'Extent', 'Convex hull(convex area)']]
Y = p_proc['Type']

rus = RandomUnderSampler(random_state=42)
X_resampled, y_resampled = rus.fit_resample(X, Y)

X_resampled = pd.DataFrame(X_resampled, columns=X.columns)
y_resampled = pd.Series(y_resampled, name='Type')

print(X_resampled.shape)
print(y_resampled.shape)

***
## Data Split
***

In [None]:
# Import libraries for NN
# Pretty sure this shit is just magic
import torch
from sklearn.preprocessing import LabelEncoder

X_resampled.reset_index(drop=True, inplace=True)
y_resampled.reset_index(drop=True, inplace=True)

# Label encoding (Not Really?)
label_encoder = LabelEncoder()

# X,Y -> X_tensor,Y_tensor
X_tensor = torch.tensor(X_resampled.values, dtype=torch.float32)
y_tensor = torch.tensor(label_encoder.fit_transform(y_resampled), dtype=torch.long)

# Splitting Dataset into training, validation, and testing sets
# Train + (Val | Test)
X_train, X_temp, y_train, y_temp = train_test_split(
    X_tensor, 
    y_tensor, 
    test_size=0.3, 
    stratify=y_tensor, 
    random_state=21
)

# Val + Test
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, 
    y_temp, 
    test_size=1/3, 
    stratify=y_temp, 
    random_state=69
)

# Check the distribution of classes in each set
print("Training set class distribution:\n", pd.Series(y_train).value_counts(normalize=True))
print("Validation set class distribution:\n", pd.Series(y_val).value_counts(normalize=True))
print("Testing set class distribution:\n", pd.Series(y_test).value_counts(normalize=True))


# Apply training data transformation across all three sets for consistency
mean = X_train.mean(dim=0)
std = X_train.std(dim=0)

X_train_norm = (X_train - mean)/std
X_val_norm = (X_val - mean)/std
X_test_norm = (X_test - mean)/std


***
## Neural Network Definitions
***

In [None]:
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as Func
import torch.nn.init as init

class FirstLayer(nn.Module):
    def __init__(self, output_size, activation='relu', init_method='he'):
        super(FirstLayer, self).__init__()
        self.fc = nn.Linear(12, output_size)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'tanh':
            self.activation = nn.Tanh()
        else:
            raise ValueError(f"Unsupported activation function: {activation}")

        if init_method == 'he':
            init.kaiming_normal_(self.fc.weight, nonlinearity='relu')
        elif init_method == 'xavier':
            init.xavier_normal_(self.fc.weight)
        else:
            raise ValueError(f"Unsupported initialization method: {init_method}")

    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

class OutputLayer(nn.Module):
    def __init__(self, input_size):
        super(OutputLayer, self).__init__()
        self.fc = nn.Linear(input_size, 3)

    def forward(self, x):
        x = self.fc(x)
        return x

class ReluLayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(ReluLayer, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        self.activation = nn.ReLU()
        init.kaiming_normal_(self.fc.weight, nonlinearity='relu')
        self.out_features = output_size

    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

class LeakyReluLayer(nn.Module):
    def __init__(self, input_size, output_size, neg_slope=0.01):
        super(LeakyReluLayer, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        self.activation = nn.LeakyReLU(negative_slope=neg_slope)
        init.kaiming_normal_(self.fc.weight, nonlinearity='leaky_relu')

    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

class PReLULayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(PReLULayer, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        self.activation = nn.PReLU()
        init.kaiming_normal_(self.fc.weight, nonlinearity='leaky_relu')

    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

class SigmoidLayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(SigmoidLayer, self).__init__()
        self.fc = nn.Linear(input_size, output_size)
        self.activation = nn.Sigmoid()
        init.xavier_normal_(self.fc.weight)

    def forward(self, x):
        x = self.fc(x)
        x = self.activation(x)
        return x

class SwishLayer(nn.Module):
    def __init__(self, input_size, output_size):
        super(SwishLayer, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        x = self.fc(x)
        return x * torch.sigmoid(x)

class DynamicNN(nn.Module):
    def __init__(self, no_hidden_layers, first_layer_neurons, alpha):
        super(DynamicNN, self).__init__()
        self.in_layer = FirstLayer(output_size=first_layer_neurons)
        hidden_layers = []
        
        current_neurons = first_layer_neurons
        
        for i in range(no_hidden_layers):
            if i > 0:
                current_neurons = max(int(first_layer_neurons * (1 - alpha * i)), 3)
                hidden_layers.append(ReluLayer(input_size=hidden_layers[-1].out_features, output_size=current_neurons))
            else:
                hidden_layers.append(ReluLayer(input_size=first_layer_neurons, output_size=current_neurons))
        
        
        self.out = OutputLayer(input_size=current_neurons)

        self.model = nn.Sequential(*hidden_layers)

    def forward(self, x):
        x = self.in_layer(x)
        x = self.model(x)
        return self.out(x)
        
    def print_structure(self):
        print("Input Layer: ", self.in_layer)
        for i, layer in enumerate(self.model):
            print(f"Hidden Layer {i + 1}: {layer}")
        print("Output Layer: ", self.out)

***
## Helpers
***

In [None]:
from enum import Enum

class TrainingAlgo(Enum):
    ADAM = 0        # Adam
    RPROP = 1       # Resilient Backpropagation
    SGD = 2         # Schocastic Gradient Decent

class ObjectiveFunc(Enum):
    CEL = 0         # CrossEntropyLoss
    BCEWLL = 1      # BCEWithLogitsLoss

def learningAlgo(opt: TrainingAlgo):
    if opt == TrainingAlgo.ADAM:
        return optim.Adam
    elif opt == TrainingAlgo.RPROP:
        return optim.Rprop
    else:
        return optim.SGD

def objectiveFunc(opt: ObjectiveFunc):
    if opt == ObjectiveFunc.CEL:
        return nn.CrossEntropyLoss()
    elif opt == ObjectiveFunc.BCEWLL:
        return nn.BCEWithLogitsLoss()

def convert_tensor(X,Y,batch,shuffle):
    return torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X, Y), batch_size=batch, shuffle=shuffle)

def set_random_seed(seed):
    torch.manual_seed(seed)
    np.random.seed(seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def train_model(model, train_loader, criterion, optimizer):
    model.train()
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

def evaluate_model(model, val_loader, criterion):
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)
        
    
    accuracy = correct_predictions / total_samples
    return total_loss / len(val_loader), accuracy

***
## NN Architecture Grid Search using K-Fold cross Validation
***

In [None]:
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold

def grid_search_k_fold(X, y, hidden_layers_options, neurons_options, alpha, k=5):
    skf = StratifiedKFold(n_splits=k, shuffle=True, random_state=42)
    results = []

    for hidden_layers in hidden_layers_options:
        for first_layer_neurons in neurons_options:
            fold_accuracies = []
            fold_losses = []

            model = DynamicNN(no_hidden_layers=hidden_layers, first_layer_neurons=first_layer_neurons, alpha=alpha)
            optimizer = learningAlgo(learning_opt)(model.parameters(),lr=learning_rate)
            criterion = objectiveFunc(objective_opt)

            model.print_structure()
            
            for fold, (train_index, val_index) in enumerate(skf.split(X, y)):
                print(f'Fold {fold + 1}/{k}')
                X_train, X_val = X[train_index], X[val_index]
                y_train, y_val = y[train_index], y[val_index]
                
                mean = X_train.mean(dim=0)
                std = X_train.std(dim=0)
                
                X_train_norm = (X_train - mean)/std
                X_val_norm = (X_val - mean)/std

                train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_train_norm, y_train), batch_size=32, shuffle=True)
                val_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(X_val_norm, y_val), batch_size=32, shuffle=False)

                for epoch in range(100):
                    train_model(model, train_loader, criterion, optimizer)

                loss, accuracy = evaluate_model(model, val_loader, criterion)
                fold_losses.append(loss)
                fold_accuracies.append(accuracy)

            mean_accuracy = np.mean(fold_accuracies)
            mean_loss = np.mean(fold_losses)
            results.append({
                'hidden_layers': hidden_layers,
                'first_layer_neurons': first_layer_neurons,
                'mean_accuracy': mean_accuracy,
                'mean_loss': mean_loss,
                'std_accuracy': np.std(fold_accuracies),
                'std_loss': np.std(fold_losses),
            })

    return results

In [None]:
hidden_layers_options = [1, 2, 3, 4, 5, 6]
neurons_options = [12, 11, 10, 9, 8]
alpha = 0.25

results = grid_search_k_fold(X_tensor, y_tensor, hidden_layers_options, neurons_options, alpha)

In [None]:
results_df = pd.DataFrame(results)

# Pivot the DataFrame to create a matrix for the heatmap
heatmap_data = results_df.pivot(index='hidden_layers', columns='first_layer_neurons', values='mean_accuracy')

# Create the heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(heatmap_data, annot=True, cmap='viridis', cbar_kws={'label': 'Performance Metric'})
plt.title('Grid Search Results Heatmap')
plt.xlabel('Number of neurons in the first Layer')
plt.ylabel('Number of Hidden Layers')
plt.show()

***
## Training Algorithms
***
### Shared Hyperparameters
***

In [None]:
learning_rate  =0.005
num_epochs = 300
batch_size = 34
objective_opt = ObjectiveFunc.CEL
random_seed = 42

***
## Adam
***

In [None]:
learning_opt = TrainingAlgo.ADAM

***
## RProp
***

In [None]:
learning_opt = TrainingAlgo.RPROP

***
## SGD
***

In [None]:
learning_opt = TrainingAlgo.SGD

***
## Hybrid
***