## Imports ##

In [2]:
import pandas as pd
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import torch.nn as nn
from src.utils import OurModel

import wandb

In [3]:
#Loading Dataset
DATASET_PATH = "../Datasets/Covid_Cleaned.csv"
COVID_data = pd.read_csv(DATASET_PATH)

In [4]:
corr = COVID_data.corr(numeric_only=True)
threshold = 0.0

# Identify columns with correlation with predicted value below the threshold
col_to_drop = corr['CLASIFFICATION_FINAL'][abs(corr['CLASIFFICATION_FINAL']) < threshold].index

print(col_to_drop)

# COVID_data.drop(col_to_drop, axis=1, inplace=True)

# COVID_data.describe()

Index([], dtype='object')


## Batch preperation ##

In [5]:
X = COVID_data.drop(columns=['CLASIFFICATION_FINAL'])  # features (20)
y = COVID_data['CLASIFFICATION_FINAL']                 # target (low / high risk of COVID-19)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.1, random_state=32) # 70% train, 30% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=32) # test -> 50% validation, 50% final test

In [6]:
# Creating Pytorch Tensors
X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)

X_val_tensor = torch.tensor(X_val.to_numpy(), dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.to_numpy(), dtype=torch.float32)

X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [7]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [8]:
batch_size = 1024 # bigger batch size because of 193000 samples (x^2)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

## Model Training ##

In [9]:
ml = OurModel()

In [10]:
# model configuration
config = nn.Sequential(
    nn.Linear(COVID_data.shape[1]-1, 8),
    nn.ReLU(),
    nn.Linear(8, 32),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.ReLU(),
    nn.Linear(16, 1),
)
ml.add_configuration(config)

In [11]:
# model configuration
config = nn.Sequential(
            nn.Linear(19, 128),      # expand feature space
            nn.ReLU(),
            nn.Linear(128, 64),      # compress to mid-level representation
            nn.ReLU(),
            nn.Linear(64, 32),      # smaller latent space
            nn.ReLU(),
            nn.Linear(32, 1)        # single output (logit)
        )
ml.add_configuration(config)

In [12]:
#GPU optimalization TODO: Doesnt work
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
ml.to(device)

cuda


OurModel(
  (net): Sequential(
    (0): Linear(in_features=19, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=1, bias=True)
  )
)

In [13]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.RMSprop(ml.parameters(), lr=0.001)

In [14]:
# # Model Training

epochs = 20
for epoch in range(epochs):
    ml.train()  # set model to training mode
    total_loss = 0
    correct = 0
    total = 0

    for inputs, true in train_loader:
        inputs, true = inputs.to(device), true.to(device)
        true = true.unsqueeze(1)
        y_pred = ml(inputs)
        loss = criterion(y_pred, true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.sigmoid(y_pred)              # convert logits → probabilities
        predicted = (preds > 0.5).float()          # threshold to 0/1
        correct += (predicted == true).sum().item()
        total += true.size(0)
    # Epoch summary
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total

    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.4f}")


Epoch [1/20] | Loss: 0.6735 | Accuracy: 0.5874
Epoch [2/20] | Loss: 0.6475 | Accuracy: 0.6203
Epoch [3/20] | Loss: 0.6431 | Accuracy: 0.6252
Epoch [4/20] | Loss: 0.6419 | Accuracy: 0.6259
Epoch [5/20] | Loss: 0.6403 | Accuracy: 0.6287
Epoch [6/20] | Loss: 0.6393 | Accuracy: 0.6295
Epoch [7/20] | Loss: 0.6387 | Accuracy: 0.6306
Epoch [8/20] | Loss: 0.6378 | Accuracy: 0.6307
Epoch [9/20] | Loss: 0.6376 | Accuracy: 0.6314
Epoch [10/20] | Loss: 0.6372 | Accuracy: 0.6319
Epoch [11/20] | Loss: 0.6370 | Accuracy: 0.6327
Epoch [12/20] | Loss: 0.6368 | Accuracy: 0.6313
Epoch [13/20] | Loss: 0.6367 | Accuracy: 0.6312
Epoch [14/20] | Loss: 0.6359 | Accuracy: 0.6325
Epoch [15/20] | Loss: 0.6359 | Accuracy: 0.6337
Epoch [16/20] | Loss: 0.6358 | Accuracy: 0.6330
Epoch [17/20] | Loss: 0.6355 | Accuracy: 0.6342
Epoch [18/20] | Loss: 0.6352 | Accuracy: 0.6341
Epoch [19/20] | Loss: 0.6352 | Accuracy: 0.6341
Epoch [20/20] | Loss: 0.6351 | Accuracy: 0.6340


## Sweep ##

In [15]:
wandb.login()

wandb: Currently logged in as: xbukovinam (xbukovinam-faculty-of-informatics-and-infromation-techno) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


True

In [21]:
#Hyperparameter configuration
#Random search
sweep_config = {
    "method": "random",
    "metric": {"name": "loss", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 0.0001, "max": 0.01},
        # "epochs": {"min": 2, "max": 20},
        "hidden_size_1": {"values": [32, 64, 128, 248]},
        "hidden_size_2": {"values": [32, 64, 128]},
        "hidden_size_3": {"values": [32, 64, 128]},
        "hidden_size_4": {"values": [32, 64, 128]},
        "hidden_size_5": {"values": [32, 64, 128]},
        "hidden_size_6": {"values": [32, 64, 128]},
        "n_layers": {"values": [2, 4, 6]},
        # "dropout": {"min": 0.0, "max": 0.5},
        "dropout": {"values": [0]},
        "optimizer": {"values": ["Adam"]},
        "activation": {"values": ["relu", "tanh"]},
        "batch_size": {"values": [2048]},
    }
}

In [22]:
#Grid search
# sweep_config = {
#     "method": "grid",
#     "metric": {"name": "loss", "goal": "minimize"},
#     "parameters": {
#         "learning_rate": {"values": [0.001, 0.005]},  # changed from range to discrete values
#         "hidden_size_1": {"values": [64, 128]},
#         "hidden_size_2": {"values": [64, 128]},
#         "hidden_size_3": {"values": [64, 128]},
#         "hidden_size_4": {"values": [64, 128]},
#         # "hidden_size_5": {"values": [64, 128]},
#         # "hidden_size_6": {"values": [64, 128]},
#         "n_layers": {"values": [2, 4]},
#         "dropout": {"values": [0.0, 0.2, 0.4, 0.5]},  # converted range to discrete values
#         "optimizer": {"values": ["Adam"]},
#         "activation": {"values": ["relu", "tanh"]},
#         "batch_size": {"values": [4096]},
#     }
# }

In [23]:
def train():
    wandb.init()
    CONFIG = wandb.config
    train_loader = DataLoader(train_dataset, batch_size=CONFIG.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG.batch_size, shuffle=False)
    # test_loader = DataLoader(test_dataset, batch_size=CONFIG.batch_size, shuffle=False)


    hl_sizes = [
        CONFIG.hidden_size_1, CONFIG.hidden_size_2, CONFIG.hidden_size_3,
        CONFIG.hidden_size_4,
        CONFIG.hidden_size_5, CONFIG.hidden_size_6,
        # CONFIG.hidden_size_7, CONFIG.hidden_size_8
                ]
    #Building the neural network from CONFIG
    sw_nn = OurModel()
    sw_nn.sweeping_build(COVID_data.shape[1]-1, 1, n_layers=CONFIG.n_layers, hidden_size=hl_sizes, activation_f=CONFIG.activation, dropout=CONFIG.dropout)
    optimizer = getattr(torch.optim, CONFIG.optimizer)(sw_nn.parameters(), lr=CONFIG.learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    #GPU computing
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    sw_nn.to(device)

    for epoch in range(10):
        sw_nn.train()
        total_loss = 0
        correct = 0
        total = 0
        for inputs, true in train_loader:
            inputs, true = inputs.to(device), true.to(device)
            true = true.unsqueeze(1)
            optimizer.zero_grad()
            y_pred = sw_nn(inputs)
            loss = criterion(y_pred, true)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            preds = torch.sigmoid(y_pred)              # convert logits → probabilities
            predicted = (preds > 0.5).float()          # threshold to 0/1
            correct += (predicted == true).sum().item()
            total += true.size(0)

        avg_train_loss = total_loss / len(train_loader)
        accuracy = correct / total

        sw_nn.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, true in val_loader:
                inputs, true = inputs.to(device), true.to(device)
                y_preds = sw_nn(inputs)
                true = true.unsqueeze(1)
                loss = criterion(y_preds, true)
                val_loss += loss.item()
                preds = torch.sigmoid(y_preds)
                predicted = (preds > 0.5).float()
                correct += (predicted == true).sum().item()
                total += true.size(0)
        avg_val_loss = val_loss / len(val_loader)
        accuracy = correct / total

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_train_loss,
            "val_loss": avg_val_loss,
            "val_accuracy": accuracy
        })


In [None]:
sweep_id = wandb.sweep(sweep=sweep_config, project="xbukovinam-faculty-of-informatics-and-infromation-techno")
wandb.agent(sweep_id, function=train)

Create sweep with ID: 92kwxi1x
Sweep URL: https://wandb.ai/xbukovinam-faculty-of-informatics-and-infromation-techno/xbukovinam-faculty-of-informatics-and-infromation-techno/sweeps/92kwxi1x


wandb: Agent Starting Run: pz3mtx72 with config:
wandb: 	activation: relu
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 128
wandb: 	hidden_size_2: 64
wandb: 	hidden_size_3: 64
wandb: 	hidden_size_4: 32
wandb: 	hidden_size_5: 32
wandb: 	hidden_size_6: 64
wandb: 	learning_rate: 0.005331463579371653
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▁▁▁▁▁▁▁▁
val_accuracy,▁▅▄▄▃▆▇▇█▆
val_loss,█▃▄▃▄▂▁▂▁▂

0,1
epoch,10.0
train_loss,0.63685
val_accuracy,0.63521
val_loss,0.63547


wandb: Agent Starting Run: lx2tbfrs with config:
wandb: 	activation: tanh
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 32
wandb: 	hidden_size_2: 128
wandb: 	hidden_size_3: 128
wandb: 	hidden_size_4: 32
wandb: 	hidden_size_5: 128
wandb: 	hidden_size_6: 128
wandb: 	learning_rate: 0.005896178173657722
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▁▂▁▁▁▁▁▁
val_accuracy,▄▅▃▅██▁▅█▆
val_loss,█▅▃▃▁▁▄▃▁▁

0,1
epoch,10.0
train_loss,0.63615
val_accuracy,0.63726
val_loss,0.6325


wandb: Agent Starting Run: ikynk4c8 with config:
wandb: 	activation: tanh
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 32
wandb: 	hidden_size_2: 128
wandb: 	hidden_size_3: 64
wandb: 	hidden_size_4: 64
wandb: 	hidden_size_5: 128
wandb: 	hidden_size_6: 32
wandb: 	learning_rate: 0.008582010101467257
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▂▁▁▁▁▁▁
val_accuracy,▁▆▆▆▁██▇█▅
val_loss,█▃▃▂▅▁▁▁▁▅

0,1
epoch,10.0
train_loss,0.63682
val_accuracy,0.63347
val_loss,0.63802


wandb: Agent Starting Run: e5q8kkz1 with config:
wandb: 	activation: relu
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 64
wandb: 	hidden_size_2: 32
wandb: 	hidden_size_3: 32
wandb: 	hidden_size_4: 32
wandb: 	hidden_size_5: 128
wandb: 	hidden_size_6: 32
wandb: 	learning_rate: 0.0015754941374713277
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▁▁▁▁▁▁▁
val_accuracy,▁▇██▇▇████
val_loss,█▂▂▂▂▃▁▁▁▁

0,1
epoch,10.0
train_loss,0.63706
val_accuracy,0.63818
val_loss,0.6337


wandb: Agent Starting Run: urswgiie with config:
wandb: 	activation: tanh
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 248
wandb: 	hidden_size_2: 128
wandb: 	hidden_size_3: 32
wandb: 	hidden_size_4: 32
wandb: 	hidden_size_5: 32
wandb: 	hidden_size_6: 32
wandb: 	learning_rate: 0.000743070654203068
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▂▁▂▁▁▁▁
val_accuracy,▁▇▇▇▇█▆▇▆█
val_loss,█▃▃▂▂▁▃▂▃▁

0,1
epoch,10.0
train_loss,0.63673
val_accuracy,0.6392
val_loss,0.63282


wandb: Agent Starting Run: 5v5obfek with config:
wandb: 	activation: relu
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 32
wandb: 	hidden_size_2: 32
wandb: 	hidden_size_3: 32
wandb: 	hidden_size_4: 128
wandb: 	hidden_size_5: 32
wandb: 	hidden_size_6: 128
wandb: 	learning_rate: 0.008828288736825823
wandb: 	n_layers: 2
wandb: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▁▂▁▁▁▁
val_accuracy,▁▁▅▁▅▄▅█▄▄
val_loss,█▆▄▇▂▅▅▁▄▅

0,1
epoch,10.0
train_loss,0.63587
val_accuracy,0.63408
val_loss,0.63508


wandb: Agent Starting Run: m35nabx0 with config:
wandb: 	activation: tanh
wandb: 	batch_size: 2048
wandb: 	dropout: 0
wandb: 	hidden_size_1: 64
wandb: 	hidden_size_2: 32
wandb: 	hidden_size_3: 64
wandb: 	hidden_size_4: 32
wandb: 	hidden_size_5: 128
wandb: 	hidden_size_6: 64
wandb: 	learning_rate: 0.00828722030193725
wandb: 	n_layers: 4
wandb: 	optimizer: Adam
