## Imports ##

In [37]:
import pandas as pd
import torch
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
import torch.nn as nn
from src.utils import OurModel

import wandb

In [38]:
#Loading Dataset
DATASET_PATH = "../Datasets/Covid_Cleaned.csv"
COVID_data = pd.read_csv(DATASET_PATH)

In [20]:
corr = COVID_data.corr(numeric_only=True)
threshold = 0.0

# Identify columns with correlation with predicted value below the threshold
col_to_drop = corr['CLASIFFICATION_FINAL'][abs(corr['CLASIFFICATION_FINAL']) < threshold].index

print(col_to_drop)

# COVID_data.drop(col_to_drop, axis=1, inplace=True)

# COVID_data.describe()

Index([], dtype='object')


## Batch preperation ##

In [39]:
X = COVID_data.drop(columns=['CLASIFFICATION_FINAL'])  # features (20)
y = COVID_data['CLASIFFICATION_FINAL']                 # target (low / high risk of COVID-19)

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.1, random_state=32) # 70% train, 30% test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=32) # test -> 50% validation, 50% final test

In [40]:
# Creating Pytorch Tensors
X_train_tensor = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)

X_val_tensor = torch.tensor(X_val.to_numpy(), dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.to_numpy(), dtype=torch.float32)

X_test_tensor = torch.tensor(X_test.to_numpy(), dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [41]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

In [42]:
batch_size = 1024 # bigger batch size because of 193000 samples (x^2)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

## Model Training ##

In [93]:
ml = OurModel()

In [107]:
# model configuration
config = nn.Sequential(
    nn.Linear(COVID_data.shape[1]-1, 8),
    nn.ReLU(),
    nn.Linear(8, 32),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.ReLU(),
    nn.Linear(16, 1),
)
ml.add_configuration(config)

In [94]:
# model configuration
config = nn.Sequential(
            nn.Linear(19, 64),      # expand feature space
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64, 32),      # compress to mid-level representation
            nn.ReLU(),
            nn.BatchNorm1d(32),
            nn.Dropout(0.3),        # prevent overfitting
            nn.Linear(32, 16),      # smaller latent space
            nn.ReLU(),
            nn.Linear(16, 1)        # single output (logit)
        )
ml.add_configuration(config)

In [104]:
#GPU optimalization TODO: Doesnt work
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
ml.to(device)

cuda


OurModel(
  (net): Sequential(
    (0): Linear(in_features=19, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.3, inplace=False)
    (7): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [105]:
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.RMSprop(ml.parameters(), lr=0.001)

In [106]:
# # Model Training

epochs = 20
for epoch in range(epochs):
    ml.train()  # set model to training mode
    total_loss = 0
    correct = 0
    total = 0

    for inputs, true in train_loader:
        inputs, true = inputs.to(device), true.to(device)
        true = true.unsqueeze(1)
        y_pred = ml(inputs)
        loss = criterion(y_pred, true)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds = torch.sigmoid(y_pred)              # convert logits → probabilities
        predicted = (preds > 0.5).float()          # threshold to 0/1
        correct += (predicted == true).sum().item()
        total += true.size(0)
    # Epoch summary
    avg_loss = total_loss / len(train_loader)
    accuracy = correct / total

    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {avg_loss:.4f} | Accuracy: {accuracy:.4f}")


Epoch [1/20] | Loss: 0.6738 | Accuracy: 0.5811
Epoch [2/20] | Loss: 0.6505 | Accuracy: 0.6195
Epoch [3/20] | Loss: 0.6465 | Accuracy: 0.6241
Epoch [4/20] | Loss: 0.6451 | Accuracy: 0.6257
Epoch [5/20] | Loss: 0.6442 | Accuracy: 0.6258
Epoch [6/20] | Loss: 0.6436 | Accuracy: 0.6262
Epoch [7/20] | Loss: 0.6428 | Accuracy: 0.6280
Epoch [8/20] | Loss: 0.6429 | Accuracy: 0.6278
Epoch [9/20] | Loss: 0.6417 | Accuracy: 0.6289
Epoch [10/20] | Loss: 0.6411 | Accuracy: 0.6296
Epoch [11/20] | Loss: 0.6407 | Accuracy: 0.6295
Epoch [12/20] | Loss: 0.6407 | Accuracy: 0.6291
Epoch [13/20] | Loss: 0.6401 | Accuracy: 0.6307
Epoch [14/20] | Loss: 0.6401 | Accuracy: 0.6302
Epoch [15/20] | Loss: 0.6397 | Accuracy: 0.6313
Epoch [16/20] | Loss: 0.6398 | Accuracy: 0.6322
Epoch [17/20] | Loss: 0.6393 | Accuracy: 0.6315
Epoch [18/20] | Loss: 0.6393 | Accuracy: 0.6318
Epoch [19/20] | Loss: 0.6390 | Accuracy: 0.6325
Epoch [20/20] | Loss: 0.6391 | Accuracy: 0.6331


## Sweep ##

In [112]:
wandb.login()

True

In [113]:
sweep_config = {
    "method": "random",  # or "grid", "bayes"
    "metric": {"name": "loss", "goal": "minimize"},
    "parameters": {
        "learning_rate": {"min": 0.0001, "max": 0.01},
        # "epochs": {"min": 2, "max": 20},
        "hidden_size_1": {"values": [32, 64, 128]},
        "hidden_size_2": {"values": [8, 32, 64]},
        "hidden_size_3": {"values": [8, 32, 64]},
        "hidden_size_4": {"values": [8, 32, 64]},
        "n_layers": {"values": [2, 4]},
        # "dropout": {"min": 0.0, "max": 0.5},
        "optimizer": {"values": ["Adam", "SGD"]},
        "activation": {"values": ["relu", "tanh"]},
        "batch_size": {"values": [1024, 2048, 4096]},
    }
}

In [115]:
def train():
    wandb.init()
    CONFIG = wandb.config
    train_loader = DataLoader(train_dataset, batch_size=CONFIG.batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=CONFIG.batch_size, shuffle=False)
    # test_loader = DataLoader(test_dataset, batch_size=CONFIG.batch_size, shuffle=False)


    hl_sizes = [
        CONFIG.hidden_size_1, CONFIG.hidden_size_2, CONFIG.hidden_size_3,
        CONFIG.hidden_size_4,
        # CONFIG.hidden_size_5, CONFIG.hidden_size_6,
        # CONFIG.hidden_size_7, CONFIG.hidden_size_8
                ]
    #Building the neural network from CONFIG
    sw_nn = OurModel()
    sw_nn.sweeping_build(COVID_data.shape[1]-1, 1, n_layers=CONFIG.n_layers, hidden_size=hl_sizes, activation_f=CONFIG.activation, dropout=None)
    optimizer = getattr(torch.optim, CONFIG.optimizer)(sw_nn.parameters(), lr=CONFIG.learning_rate)
    criterion = nn.BCEWithLogitsLoss()

    #GPU computing
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    sw_nn.to(device)

    for epoch in range(10):
        sw_nn.train()
        total_loss = 0
        correct = 0
        total = 0
        for inputs, true in train_loader:
            inputs, true = inputs.to(device), true.to(device)
            true = true.unsqueeze(1)
            optimizer.zero_grad()
            y_pred = sw_nn(inputs)
            loss = criterion(y_pred, true)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            preds = torch.sigmoid(y_pred)              # convert logits → probabilities
            predicted = (preds > 0.5).float()          # threshold to 0/1
            correct += (predicted == true).sum().item()
            total += true.size(0)

        avg_train_loss = total_loss / len(train_loader)
        accuracy = correct / total

        sw_nn.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, true in val_loader:
                inputs, true = inputs.to(device), true.to(device)
                preds = sw_nn(inputs)
                true = true.unsqueeze(1)
                loss = criterion(preds, true)
                val_loss += loss.item()
                predicted = (preds > 0.5).float()
                correct += (predicted == true).sum().item()
                total += true.size(0)
        avg_val_loss = val_loss / len(val_loader)
        accuracy = correct / total

        wandb.log({
            "epoch": epoch + 1,
            "train_loss": avg_train_loss,
            "val_loss": avg_val_loss,
            "val_accuracy": accuracy
        })


In [116]:
sweep_id = wandb.sweep(sweep=sweep_config, project="xbukovinam-faculty-of-informatics-and-infromation-techno")
wandb.agent(sweep_id, function=train, count=10)

Create sweep with ID: 5w9hxs4y
Sweep URL: https://wandb.ai/xkvietokm-fiit-stu/xbukovinam-faculty-of-informatics-and-infromation-techno/sweeps/5w9hxs4y


[34m[1mwandb[0m: Agent Starting Run: dv8uwwaw with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	hidden_size_1: 128
[34m[1mwandb[0m: 	hidden_size_2: 64
[34m[1mwandb[0m: 	hidden_size_3: 8
[34m[1mwandb[0m: 	hidden_size_4: 8
[34m[1mwandb[0m: 	learning_rate: 0.00829223677975459
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▇▆▅▅▄▃▃▂▁
val_accuracy,▁▁▁▁▁▁▁▅▃█
val_loss,█▇▆▆▅▄▃▃▂▁

0,1
epoch,10.0
train_loss,0.66768
val_accuracy,0.54545
val_loss,0.66781


[34m[1mwandb[0m: Agent Starting Run: fpu30pcv with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 8
[34m[1mwandb[0m: 	hidden_size_3: 64
[34m[1mwandb[0m: 	hidden_size_4: 64
[34m[1mwandb[0m: 	learning_rate: 0.0071595161611884214
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▂▂▂▂▁▁▁
val_accuracy,▁▁▁█▅▂▄▇▆▄
val_loss,█▇▆▇▅▄▃▃▂▁

0,1
epoch,10.0
train_loss,0.68269
val_accuracy,0.45927
val_loss,0.68182


[34m[1mwandb[0m: Agent Starting Run: zplb1bqx with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	hidden_size_1: 128
[34m[1mwandb[0m: 	hidden_size_2: 32
[34m[1mwandb[0m: 	hidden_size_3: 8
[34m[1mwandb[0m: 	hidden_size_4: 32
[34m[1mwandb[0m: 	learning_rate: 0.008503512380391048
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▇▆▅▅▄▃▂▂▁
val_accuracy,▂▂▂▁▂▂▂▂▁█
val_loss,█▇▆▅▄▄▃▂▁▁

0,1
epoch,10.0
train_loss,0.68249
val_accuracy,0.46285
val_loss,0.68185


[34m[1mwandb[0m: Agent Starting Run: f0q0j086 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 8
[34m[1mwandb[0m: 	hidden_size_3: 32
[34m[1mwandb[0m: 	hidden_size_4: 32
[34m[1mwandb[0m: 	learning_rate: 0.0009652749819651516
[34m[1mwandb[0m: 	n_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▂▁▁▁▁▁▁▁
val_accuracy,▁▇█▇██▇▇▇▇
val_loss,█▂▂▂▂▂▁▁▁▁

0,1
epoch,10.0
train_loss,0.63621
val_accuracy,0.62086
val_loss,0.6323


[34m[1mwandb[0m: Agent Starting Run: d3ovxsu5 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 32
[34m[1mwandb[0m: 	hidden_size_3: 64
[34m[1mwandb[0m: 	hidden_size_4: 64
[34m[1mwandb[0m: 	learning_rate: 0.008995307717620088
[34m[1mwandb[0m: 	n_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▂▂▁▁▁▁▁
val_accuracy,█▂█▄▁▆▇▄▄█
val_loss,█▃▇▃▃▂▃▂▁▂

0,1
epoch,10.0
train_loss,0.63796
val_accuracy,0.62824
val_loss,0.63335


[34m[1mwandb[0m: Agent Starting Run: ewzzxnhc with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 8
[34m[1mwandb[0m: 	hidden_size_3: 8
[34m[1mwandb[0m: 	hidden_size_4: 8
[34m[1mwandb[0m: 	learning_rate: 0.001956461600579146
[34m[1mwandb[0m: 	n_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▆▃▁▁▁▁▁▁▁
val_accuracy,▁▃█▇██████
val_loss,█▅▂▂▁▁▁▁▁▁

0,1
epoch,10.0
train_loss,0.63691
val_accuracy,0.62312
val_loss,0.63276


[34m[1mwandb[0m: Agent Starting Run: 1hqx7adq with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 2048
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 32
[34m[1mwandb[0m: 	hidden_size_3: 32
[34m[1mwandb[0m: 	hidden_size_4: 64
[34m[1mwandb[0m: 	learning_rate: 0.003334825062180368
[34m[1mwandb[0m: 	n_layers: 4
[34m[1mwandb[0m: 	optimizer: SGD


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▄▃▂▁▁▁▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▄▃▂▁▁▁▁▁▁

0,1
epoch,10.0
train_loss,0.68839
val_accuracy,0.45978
val_loss,0.68874


[34m[1mwandb[0m: Agent Starting Run: uxkmizi6 with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	batch_size: 4096
[34m[1mwandb[0m: 	hidden_size_1: 64
[34m[1mwandb[0m: 	hidden_size_2: 64
[34m[1mwandb[0m: 	hidden_size_3: 64
[34m[1mwandb[0m: 	hidden_size_4: 8
[34m[1mwandb[0m: 	learning_rate: 0.009306088407557018
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: SGD


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▃▃▂▂▂▂▁▁▁
val_accuracy,▁▁▁▁▁▁▁▁▁▁
val_loss,█▆▅▅▄▃▃▂▂▁

0,1
epoch,10.0
train_loss,0.68153
val_accuracy,0.45978
val_loss,0.68153


[34m[1mwandb[0m: Agent Starting Run: b040jgao with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 64
[34m[1mwandb[0m: 	hidden_size_3: 8
[34m[1mwandb[0m: 	hidden_size_4: 8
[34m[1mwandb[0m: 	learning_rate: 0.004550234006480559
[34m[1mwandb[0m: 	n_layers: 2
[34m[1mwandb[0m: 	optimizer: Adam


0,1
epoch,▁▂▃▃▄▅▆▆▇█
train_loss,█▂▂▂▁▁▁▁▁▁
val_accuracy,▅▄█▁▆▄▂▅▆▂
val_loss,▆▇█▆▂▁▃▁▄▄

0,1
epoch,10.0
train_loss,0.63649
val_accuracy,0.60641
val_loss,0.63532


[34m[1mwandb[0m: Agent Starting Run: 6kax42lu with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	batch_size: 1024
[34m[1mwandb[0m: 	hidden_size_1: 32
[34m[1mwandb[0m: 	hidden_size_2: 64
[34m[1mwandb[0m: 	hidden_size_3: 8
[34m[1mwandb[0m: 	hidden_size_4: 32
[34m[1mwandb[0m: 	learning_rate: 0.003117957760587081
[34m[1mwandb[0m: 	n_layers: 4
[34m[1mwandb[0m: 	optimizer: Adam


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
