# k-Fold Cross Validation
https://www.machinecurve.com/index.php/2021/02/03/how-to-use-k-fold-cross-validation-with-pytorch/

In [1]:
# system imports
import os
from datetime import datetime

# additional imports
import pandas as pd
import numpy as np
from tqdm.auto import tqdm

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold

import torch

# internal imports
from utils import CoughNet

# device config
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Hyperparameters

In [2]:
hparams = {    
    'dataset': 'data/prepared_data_balanced.csv',
    'epochs': 20,
    'batch_size': 16,
    'lr': 1e-3,
    'features': [
        'chroma_stft', 'rmse', 'spectral_centroid', 'spectral_bandwidth', 'rolloff', 'zero_crossing_rate',
        'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4', 'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 
        'mfcc11', 'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18', 'mfcc19', 'mfcc20'
    ]
}

## Prepare Data

In [3]:
df_features = pd.read_csv(hparams['dataset'])
X = np.array(df_features[hparams['features']], dtype=np.float32)

encoder = LabelEncoder()
y = encoder.fit_transform(df_features['label'])

## K-fold Cross Validation model evaluation

In [4]:
k_folds = 8
kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
indices = np.arange(len(y))
results_train = []
results_test = []

def train(loader_train, model, optimizer, epoch):
    model.train()
    running_correct = 0.0
    total = 0
    for batch_ndx, sample in enumerate(loader_train): 
        features, labels = sample[0].to(device), sample[1].to(device) 

        # forward pass and loss calculation
        outputs = model(features)
        loss = criterion(outputs, labels)  
        
        # backward pass    
        loss.backward()
        
        # update weights
        optimizer.step()
        optimizer.zero_grad()

        # calculate metrics
        predictions = torch.argmax(outputs.data, 1)
        running_correct += (predictions == labels).sum().item()
        total += labels.shape[0]

    return running_correct / total

def evaluate(loader_test, model, epoch):
    model.eval()
    running_correct = 0.0
    total = 0
    with torch.no_grad():
        for batch_ndx, sample in enumerate(loader_test):
            features, labels = sample[0].to(device), sample[1].to(device) 

            # forward pass and loss calculation
            outputs = model(features)
            loss = criterion(outputs, labels)  

            # calculate metrics
            predictions = torch.argmax(outputs.data, 1)
            running_correct += (predictions == labels).sum().item()
            total += labels.shape[0]

    return running_correct / total

print(f'K-FOLD CROSS VALIDATION RESULTS FOR {k_folds} FOLDS')
print('--------------------------------------------')
print('|         | Train Accuracy | Test Accuracy |')
print('--------------------------------------------')

for fold, (train_ids, test_ids) in enumerate(kfold.split(indices)):
    X_train = X[train_ids]
    y_train = y[train_ids]
    X_test = X[test_ids]
    y_test = y[test_ids]
    
    # scale data
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)
    
    # create pytorch dataloader
    torch.manual_seed(42)
    train_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_train), torch.Tensor(y_train).long())
    test_dataset = torch.utils.data.TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test).long())
    train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=hparams['batch_size'], shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=hparams['batch_size'], shuffle=False)
    
    # create model
    model = CoughNet(len(hparams['features'])).to(device)

    # Construct loss and optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=hparams['lr'])
    criterion = torch.nn.CrossEntropyLoss()

    # training loop
    for epoch in range(hparams['epochs']):
        train_accuracy = train(train_loader, model, optimizer, epoch)
        eval_accuracy = evaluate(test_loader, model, epoch)
    results_train.append(train_accuracy) 
    results_test.append(eval_accuracy) 
    print(f'| Fold {fold}  |       {train_accuracy*100:.2f} % |       {eval_accuracy*100:.2f} % |')

print('--------------------------------------------')
print(f'| Average |       {np.mean(results_train)*100:.2f} % |       {np.mean(results_test)*100:.2f} % |')

K-FOLD CROSS VALIDATION RESULTS FOR 8 FOLDS
--------------------------------------------
|         | Train Accuracy | Test Accuracy |
--------------------------------------------
| Fold 0  |       100.00 % |       94.12 % |
| Fold 1  |       100.00 % |       88.24 % |
| Fold 2  |       100.00 % |       94.12 % |
| Fold 3  |       100.00 % |       76.47 % |
| Fold 4  |       100.00 % |       88.24 % |
| Fold 5  |       100.00 % |       94.12 % |
| Fold 6  |       100.00 % |       100.00 % |
| Fold 7  |       96.61 % |       87.50 % |
--------------------------------------------
| Average |       99.58 % |       90.35 % |
