## Neural Network Approach

##### Imports

In [9]:
from torch.utils.data import DataLoader, WeightedRandomSampler
from torchmetrics import Accuracy, Precision, F1Score, Recall
import torch.nn.functional as F
from tqdm.auto import tqdm
import pandas as pd
import torch.nn as nn
import numpy as np
import itertools
import argparse
import torch

###### Additionally, paths may need to be changed to absolute file path if the notebook throws an error. The line to be changed is `resp_dataset = RespDataset("data/covidfeaturesnorm.csv")` under "Training Code".

##### Utils

In [10]:
def get_configurations(param_grid):
    keys, values = zip(*param_grid.items())
    configurations = [dict(zip(keys, v)) for v in itertools.product(*values)]
    return configurations

##### Dataset Definition

In [11]:
class RespDataset(torch.utils.data.Dataset):
    def __init__(self, csv_file):
        self.data = pd.read_csv(csv_file)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        file_name = self.data.iloc[idx, 0]
        audio_fts = torch.tensor(self.data.iloc[idx, [x for x in range(1,26)]].tolist(), dtype=torch.float32)   

        target_str = self.data.iloc[idx, 26]

        diagnosis_dict = {'healthy': 0,
                          'positive_mild': 1,
                          'no_resp_illness_exposed': 0,
                          'resp_illness_not_identified': 0,
                          'positive_moderate': 1,
                          'recovered_full': 0,
                          'positive_asymp': 1}

        target = diagnosis_dict[target_str]

        return audio_fts, target

##### Model Definition

In [12]:
class RespPredictionModel(nn.Module):
    def __init__(self):
        super(RespPredictionModel, self).__init__()
        self.fc1 = nn.Linear(25, 64)
        self.fc2 = nn.Linear(64, 128)
        self.fc3 = nn.Linear(128, 256)
        self.fc4 = nn.Linear(256, 64)
        self.fc5 = nn.Linear(64, 2)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # print("Weight matrix data type:", self.fc1.weight.dtype)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        x = self.sigmoid(x)

        return x

##### Training Code

###### To train the model, input necessary hyperparameters in the `param_grid` and change the path of the Dataset.

In [None]:
def train_one_epoch(resp_model, train_data, optimizer, criterion) -> None:
    """
    Trains CNN for one epoch, for respiratory diagnosis classification
    :param resp_model: NN model
    :param train_data: training data
    :param optimizer: optimizer
    :param criterion: loss criterion
    """
 
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)
 
    resp_model.train()
 
    for resp_fts, targets in tqdm(train_data):
        
        targets = targets.to(torch.int8)

        optimizer.zero_grad()
        outputs = resp_model(resp_fts)
        outputs = outputs.to(torch.float32)
        ce_loss = criterion(outputs.squeeze(), targets.long())
        ce_loss.backward()
        optimizer.step()
        print(ce_loss.item())

 
def evaluate_one_epoch(resp_model, val_data, epoch, criterion):
    """
    Evaluates NN for one epoch, for age prediction
    :param resp_model: NN model
    :param train_data: validation data
    :param optimizer: optimizer
    :param criterion: loss criterion
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("device: ", device)
    resp_model.eval()

    with torch.no_grad():
        for resp_fts, targets in tqdm(val_data):

            targets = targets.to(torch.int8)

            preds = resp_model(resp_fts)
            preds = preds.to(torch.float32)
            ce_loss = criterion(preds.squeeze(), targets.long())
            print(ce_loss.item())

 
 
def test_resp_model(resp_model, test_data, criterion):
    """
    Tests NN for one epoch, for age prediction
    :param resp_model: NN model
    :param train_data: test data
    :param optimizer: optimizer
    :param criterion: loss criterion
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    resp_model.eval()

    accuracy = Accuracy(task="multiclass", num_classes=2)
    f1 = F1Score(task="multiclass", num_classes=2, average='macro')
    precision = Precision(task="multiclass", num_classes=2, average='macro')
    recall = Recall(task="multiclass", num_classes=2, average='macro')

    with torch.no_grad():
        for resp_fts, targets in tqdm(test_data):

            targets = targets.to(torch.int8)

            preds = resp_model(resp_fts)
            preds = preds.to(torch.float32)
            ce_loss = criterion(preds.squeeze(), targets.long())
            print(ce_loss.item())

    
    acc = accuracy(torch.argmax(preds, dim=1), targets)
    f1score = f1(torch.argmax(preds, dim=1), targets)
    precision_final = precision(torch.argmax(preds, dim=1), targets)
    recall_final = recall(torch.argmax(preds, dim=1), targets)


    print("f1", f1score)
    print("precision", precision_final)
    print("recall", recall_final)
    print("Final Accuracy: ", acc)
 


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

param_grid = {
'learning_rate': [0.0001, 0.0005, 0.008],
'batch_size': [32, 64],
'num_epochs': [100, 150, 200],
}
combinations = get_configurations(param_grid)

for i, combo in enumerate(combinations):
    print(i, combo)
    batch_size = combo['batch_size']
    learning_rate = combo['learning_rate']
    num_epochs = combo['num_epochs']

    # Data
    resp_dataset = RespDataset("data/covidfeaturesnorm.csv")
    train_data_tmp, val_data_tmp, test_data_tmp = torch.utils.data.random_split(resp_dataset, [0.8, 0.1, 0.1], generator=torch.Generator().manual_seed(42))

    class_weights = [0.27, 0.73]

    criterion = nn.CrossEntropyLoss(weight=torch.tensor(class_weights))

    train_data = DataLoader(
        train_data_tmp,
        batch_size=batch_size,
        shuffle=True
    )
    val_data = DataLoader(
        val_data_tmp,
        batch_size=batch_size,
        shuffle=True
    )
    test_data = DataLoader(
        test_data_tmp,
        batch_size=batch_size,
        shuffle=True
    )
    print("train_data:", train_data)
    print("val_data:", val_data)
    print("test_data:", test_data)

    resp_model = RespPredictionModel()

    params = resp_model.parameters()

    optimizer = torch.optim.Adam(
        params, lr=learning_rate, weight_decay=0.0005
    )

    # Training Loop
    for epoch in range(num_epochs):
        print("training epoch:", epoch)
        train_one_epoch(resp_model, train_data, optimizer, criterion)
        print("evaluating epoch:", epoch)
        evaluate_one_epoch(resp_model, val_data, epoch, criterion)
        pass

    torch.save(resp_model, f"covid_model_binary_{i}.pt")

# Testing Loop
    resp_model_trained = torch.load(f"covid_model_binary_{i}.pt", map_location=torch.device('cpu'))
    resp_model_trained.to(device)
    print("testing model: ", f"covid_model_binary_{i}.pt")
    test_resp_model(resp_model_trained, test_data, criterion)