In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader,TensorDataset
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
from sklearn.model_selection import KFold, train_test_split
import pandas as pd 
import functions as func
import importlib
importlib.reload(func)



<module 'functions' from 'c:\\Applied AI\\Applied_AI\\Applied AI project\\applied_ai_project\\functions.py'>

In [2]:
class ClassificationNN(nn.Module):
    def __init__(self,num_features,num_targets):
        super(ClassificationNN, self).__init__()

        self.num_featurs = num_features
        self.num_targets = num_targets
        
        self.fc1 = nn.Linear(num_features,128)
        self.fc2 = nn.Linear(128,128)

        if num_targets == 2:
            self.output = nn.Linear(128,1)
        else:
            self.output = nn.Linear(128,num_targets)

        self.dropout = nn.Dropout(0.2)

    def forward(self,x):
        x = F.relu(self.fc1(x))

        x = self.dropout(x)

        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.output(x)
            
        if self.num_targets == 2:
            x = torch.sigmoid(x)
        #No need if num_targets is not binary (cross entropy loss applied from the outside)
        
        return x 







In [3]:

def train_sv_classification_model(dataset,epochs,n_splits):

    data,target,num_targets,num_features = func.read_data(dataset)

    model = ClassificationNN(num_features, num_targets=num_targets)
    learning_rate=0.001

    model.train()
    running_loss=0

    X_train_val, X_test, y_train_val, y_test = train_test_split(data.drop(target,axis=1), data[target], test_size=0.33, random_state=42)

    skf = KFold(n_splits=n_splits)

    fold = 0
    fold_accuracies = []

    for train_index, val_index in skf.split(X_train_val,y_train_val):
        X_train,X_val = func.convert_df_to_tensor([X_train_val.iloc[train_index],X_train_val.iloc[val_index]])
        y_train,y_val = func.convert_df_to_tensor([y_train_val.iloc[train_index],y_train_val.iloc[val_index]])

        train_loader = func.convert_to_dataloader(X_train,y_train)
        val_loader = func.convert_to_dataloader(X_val,y_val)

        
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.to(device)
        
        if num_targets == 2:
            criterion = nn.BCELoss() 
        else:
            criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        for epoch in range(1,epochs+1):
            model.train()
            running_loss = 0.0
            for batch_X, batch_y in train_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                
                optimizer.zero_grad()
                outputs = model(batch_X)
                if num_targets == 2:
                    loss = criterion(outputs, batch_y.unsqueeze(1).float())
                else:
                    loss = criterion(outputs, batch_y.long())


                loss.backward()
                optimizer.step()
                
                running_loss += loss.item()
            
            avg_loss = running_loss / len(train_loader)
            
            if epoch % 10 == 0 or epoch == 1:
                print(f'Epoch [{epoch}/{epochs}], Loss: {avg_loss:.4f}')            
        
        #Test model

        model.eval()
        correct = 0
        total =0 

        with torch.no_grad():
            for batch_x,batch_y in val_loader:
                batch_X, batch_y = batch_X.to(device), batch_y.to(device)
                outputs = model(batch_X)

                if num_targets == 2:
                    
                    predicted = (outputs > 0.5).float()
                    correct += (predicted.squeeze(1) == batch_y.float()).sum().item()
                else:
                    
                    _, predicted = torch.max(outputs, dim=1)
                    correct += (predicted == batch_y.long()).sum().item()
                total += batch_y.size(0)
        
        accuracy = 100 * correct / total
        print(f'Fold {fold} Accuracy: {accuracy:.2f}% for dataset {dataset}')
        fold_accuracies.append(accuracy)
        fold += 1

    return model




        





In [4]:

def evaluate_sv_classification_model(dataset,model):

    data,target,num_targets,num_features = func.read_data(dataset)
    
    X_train_val, X_test, y_train_val, y_test = train_test_split(data.drop(target,axis=1), data[target], test_size=0.33, random_state=42)

    X_test = func.convert_df_to_tensor(X_test)
    y_test = func.convert_df_to_tensor(y_test)

    eval_loader = func.convert_to_dataloader(X_test,y_test)
        
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
                   
    #Test model
    model.eval()
    correct = 0
    total =0 

    with torch.no_grad():
        for batch_x,batch_y in eval_loader:
            batch_X, batch_y = batch_x.to(device), batch_y.to(device)
            outputs = model(batch_X)

            if num_targets == 2:
                
                predicted = (outputs > 0.5).float()
                correct += (predicted.squeeze(1) == batch_y.float()).sum().item()
            else:
                _, predicted = torch.max(outputs, dim=1)
                correct += (predicted == batch_y.long()).sum().item()
            total += batch_y.size(0)
    
    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}% for dataset {dataset}')
    
    return accuracy




        





In [5]:
datasets = {'ecoli':[30,5],'mammographic':[30,5],'seeds':[30,5],'yeast':[10,2],'ozone':[30,5]}
model_dict = {}
for dataset_name,param_list in datasets.items():
    model_dict[dataset_name] = train_sv_classification_model(dataset_name,*param_list)
    

Epoch [1/30], Loss: 1.7377
Epoch [10/30], Loss: 0.2651
Epoch [20/30], Loss: 0.3144
Epoch [30/30], Loss: 0.1966
Fold 0 Accuracy: 35.33% for dataset ecoli
Epoch [1/30], Loss: 0.3445
Epoch [10/30], Loss: 0.2783
Epoch [20/30], Loss: 0.2186
Epoch [30/30], Loss: 0.2561
Fold 1 Accuracy: 59.00% for dataset ecoli
Epoch [1/30], Loss: 0.2791
Epoch [10/30], Loss: 0.2640
Epoch [20/30], Loss: 0.2934
Epoch [30/30], Loss: 0.3038
Fold 2 Accuracy: 40.42% for dataset ecoli
Epoch [1/30], Loss: 0.3806
Epoch [10/30], Loss: 0.3383
Epoch [20/30], Loss: 0.2884
Epoch [30/30], Loss: 0.2277
Fold 3 Accuracy: 36.85% for dataset ecoli
Epoch [1/30], Loss: 0.4639
Epoch [10/30], Loss: 0.2584
Epoch [20/30], Loss: 0.3508
Epoch [30/30], Loss: 0.3314
Fold 4 Accuracy: 62.34% for dataset ecoli
Epoch [1/30], Loss: 0.7042
Epoch [10/30], Loss: 0.4320
Epoch [20/30], Loss: 0.4192
Epoch [30/30], Loss: 0.4080
Fold 0 Accuracy: 48.65% for dataset mammographic
Epoch [1/30], Loss: 0.4359
Epoch [10/30], Loss: 0.4022
Epoch [20/30], Loss:

In [6]:
accuracy_dict = {}
for dataset,model in model_dict.items():
    accuracy_dict[dataset] = evaluate_sv_classification_model(dataset,model)
    

Accuracy: 92.56% for dataset ecoli
Accuracy: 85.40% for dataset mammographic
Accuracy: 85.71% for dataset seeds
Accuracy: 55.33% for dataset yeast
Accuracy: 91.31% for dataset ozone
