In [None]:
import copy
import glob
import os
import random # to set the python random seed
import sys
import time
# This should map to the root directory of the github repo
# if it doesnt, can set it manaully
ROOT_DIR = os.path.dirname(os.getcwd())
print(ROOT_DIR)
sys.path.append(ROOT_DIR)

# This should map to the data directory of the github repo
DATA_DIR = os.path.join(ROOT_DIR, 'data','sta_chunks')
print(DATA_DIR)

import numpy as np # to set the numpy random seed
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor
import torch.nn.functional as TF
import torchvision.transforms.functional as TVF
from torchmetrics.functional import accuracy
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, transforms
# Ignore excessive warnings
import logging
logging.propagate = False 
logging.getLogger().setLevel(logging.ERROR)

# WandB – Import the wandb library
# import wandb
from pytorch_lightning import LightningModule, seed_everything, Trainer
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

from tqdm.notebook import tqdm


# Local imports
from utils.icme_dataset import ICMEDataset
from utils.icme_net import IcmeNet

In [None]:
seed_everything(42)

Function used to train the model

In [None]:
def train_model(
    model, 
    criterion,
    optimizer, 
    scheduler,
    num_epochs=25,
    dataloaders=None,
    device=None
):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    dataset_sizes = {}
    for key in dataloaders.keys():
        dataset_sizes[key] = len(dataloaders[key])
    
    training_data = {'train_loss':[], 'val_loss':[], 'train_acc': [], 'val_acc': []}
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(dataloaders[phase], desc=phase, total=len(dataloaders[phase])):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs.float())
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            training_data[f'{phase}_loss'].append(epoch_loss)
            training_data[f'{phase}_acc'].append(epoch_acc)
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
              
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, training_data

In [None]:
icme_train_dataset = ICMEDataset(
    'sta_train_set.txt', 
    rootdir=ROOT_DIR,
    datadir=DATA_DIR
)
train_loader = DataLoader(
    icme_train_dataset, 
    batch_size=1, 
    shuffle=True,
    num_workers=0
)

# Test dataset and loader
icme_test_dataset = ICMEDataset(
    'sta_test_set.txt', 
    rootdir=ROOT_DIR,
    datadir=DATA_DIR
)
test_loader = DataLoader(
    icme_test_dataset, 
    batch_size=1, 
    shuffle=True,
    num_workers=0
)

#val dataset and loader
icme_val_dataset = ICMEDataset(
    'sta_validation_set.txt', 
    rootdir=ROOT_DIR,
    datadir=DATA_DIR
)

val_loader = DataLoader(
    icme_val_dataset, 
    batch_size=1, 
    shuffle=True,
    num_workers=0
)

Collect the training and validation loaders to use during the training loop

In [None]:
dataloaders = {'train': train_loader, 'val':val_loader}

In [None]:
model = IcmeNet(num_classes=2, kernel_size=9, train_loader=train_loader, test_loader=test_loader, val_loader=val_loader)

Use GPU if it is available

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Define the optimizer, learning rate scheduler, and loss function

In [None]:
# We will use stochastic gradient descent as our optimization routine
optimizer = optim.Adam(model.parameters(), lr=0.001)

# We will also use a scheduler for the learning rate.
# This allows us to optimize the learning rate by having large steps
# at first and then getting gradually smaller (by a factor of 1/2) every 5 epochs
scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

# Loss function
loss_fn = TF.cross_entropy

Run the training loop

In [None]:
model, training_data = train_model(
    model, 
    criterion=loss_fn, 
    optimizer=optimizer, 
    scheduler=scheduler,
    num_epochs=20,
    dataloaders=dataloaders,
    device=device
  )

Proceed with the following cells if you are content with the model performance

In [None]:
training_data_df = pd.DataFrame(training_data)

In [None]:
training_data_df['train_acc'] = [val.cpu().numpy() for val in training_data_df['train_acc']]
training_data_df['val_acc'] = [val.cpu().numpy() for val in training_data_df['val_acc']]

In [None]:
training_data_df.to_csv('./gdrive/MyDrive/aos_c205_final_project_data/training_results.txt', header=True, index=True)

In [None]:
# torch.save(model.state_dict(), '../adam_optim_neural_network.pth')