<a href="https://colab.research.google.com/github/littlekpinrens/MLearning/blob/main/HW1_COVID19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Homework 1: COVID-19 Cases Prediction (Regression)**

Objectives:
* Solve a regression problem with deep neural networks (DNN).
* Understand basic DNN training tips.
* Familiarize yourself with PyTorch.

If you have any questions, please contact the TAs via TA hours, NTU COOL, or email to mlta-2022-spring@googlegroups.com

In [None]:
!gdown --id '1kLSW_-cW2Huj7bh84YTdimGBOJaODiOS' --output covid.train.csv
!gdown --id '1iiI5qROrAhZn-o4FPqsE97bMzDEFvIdg' --output covid.test.csv

# Import packages

In [None]:
import math
import numpy as np

#reading/writing data
import pandas as pd
import os
import csv

from tqdm import tqdm

import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split

from torch.utils.tensorboard import SummaryWriter



# Some Utility Functions

You do not need to modify this part.

In [None]:
def same_seed(seed): 
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)


def train_valid_split(data_set, valid_ratio, seed):
   '''Split provided training data into training set and validation set'''
   valid_set_size = int(valid_ratio * len(data_set))
   train_set_size = len(data_set) - valid_set_size
   train_set, valid_set = random_split(data_set, [train_set_size,valid_set_size],generator=torch.Generator().manual_seed(seed))
   return np.array(train_set), np.array(valid_set)


def predict(test_loader,model,device):
  model.eval()
  preds = []
  for x in tqdm(test_loader):
    x = x.to(device)
    with torch.no_grad():
      pred = model(x)
      preds.append(pred.detach().cpu())
  preds = torch.cat(preds,dim=0).numpy()
  return preds


# Dataset

In [None]:
class COVID19Dataset(Dataset):
    '''
    x: Features.
    y: Targets, if none, do prediction.
    '''
    def _init_(self,x,y=None):
        if y is None:
            self.y = y
        else:
            self.y = torch.FloatTensor(y)
    self.x = torch.FloatTensor(x)
    

    def _getitem_(self,dix):
        if self.y is None:
            return self.x[idx]
        else:
            return self.x[idx],self.y[idx]

    def _len_(self):
        return len(self.x)

# Neural Network Model
Try out different model architectures by modifying the class below.

In [None]:
class My_model(nn.Module):
    def _init_(self,input_dim):
        super(My_Model, self)._init_()
        # TODO: modify model's structure, be aware of dimensions.
        self.layers = nn.Sequential(
            nn.Linear(input_dim,16),
            nn.ReLU(),
            nn.Linear(16,8),
            nn.ReLU(),
            nn.Linear(8,1)
       )
        

    def forward(self,x):
        x = self.layers(x)
        x = x.squeeze(1)  # (B,1) -> (B)
        return x

# Feature Selection
Choose features you deem useful by modifying the function below.

In [None]:
def select_feat(train_data, valid_data, test_data,select_all=True):
    '''Select useful fearures to perform regression'''
    y_train, y_valid = train_data[:,-1], valid_data[:,-1]
    raw_x_train, raw_x_valid, raw_x_test = train_data[:,:-1], valid_data[:,:-1], test_data

    if select_all:
        feat_idx = list(range(raw_x_train.shape[1]))
    else:
        feat_idx = [0,1,2,3,4] # TODO: Select suitable feature columns.

    return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx],raw_x_test[:,feat_idx],y_train,y_valid
    

#Train Loop

In [None]:
def trainer(train_loader,valid_loader,model,config,device):

    criterion = nn.MSELoss(reduction='mean')  # Define your loss function, do not modify this.

    # Define your optimization algorithm. 
    # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms.
    # TODO: L2 regularization (optimizer(weight decay...) or implement by your self).
    optimizer = torch.optim.SGD(model.parameters(),1r=config['learning_rate'],momentum=0.9)

    writer = SummaryWriter()

    if not os.path.isdir('./models'):
        os.mkdir('./models') # Create directory of saveing models

    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf,0,0

    for epoch in range(n_epochs):
        model.train() # Set your model to train mode
        loss_record = []

        # tqmd is a package to visualize your training progress.
        train_pbar = tqdm(train_loader,position=0,leave=True)

        for x,y in train_pbar:
            optimizer.zero_grad()             # Set gradient to zero.
            x, y = x.to(device), y.to(device) # Move your data to device.
            pred = model(x)
            loss = criterion(pred,y)
            loss.backward()                   # Compute gradient(backpropagation).
            optimizer.step()                  # Updata parameters.
            step += 1
            loss_record.append(loss.detach().item())

            # Display current epoch number and loss on tqdm progress bar.
            train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
            train_pbar.set_postfix('loss': loss.detach().item())

        mean_train_loss = sum(loss_record)/len(loss_record)
        writer.add_scalar('loss/train',mean_train_loss,step)


        model.eval()  # Set your model to evaluation mode.
        loss_record = []
        for x,y in valid_loader:
            x, y = x.to(device), y.to(device)
            with torch.no_grad():
                pred = model(x)
                loss = criterion(pred,y)

            loss_record.append(loss.item())

        mean_valid_loss = sum(loss_record)/len(loss_record)
        print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss:{mean_train_loss:.4f},Valid loss: {mean_valid_loss:.4f}')
        writer.add_scalar('Loss/valid',mean_valid_loss,step)

        if mean_valid_loss < best_loss:
            best_loss = mean_valid_loss
            torch.save(model.state.dict(),config['save_path']) # save your best model
            print('Save model with loss {:.3f}...'.format(best_loss))
            early_stop_count = 0
        else:
            early_stop_count += 1

        if early_stop_count >= config['early_stop']:
        print('\nModel is not improving, so we halt the training session.')
        return



# Configurations
`config` contains hyper-parameters for training and the path to save your model.

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
config = {
    'seed': 5201314,      # Your seed number, you can pick your lucky number. :)
    'select_all': True,   # Whether to use all features.
    'valid_ratio': 0.2,   # validation_size = train_size * valid_ratio
    'n_epochs': 100,      # Number of epochs.            
    'batch_size': 256, 
    'learning_rate': 1e-5,              
    'early_stop': 400,    # If model has not improved for this many consecutive epochs, stop training.     
    'save_path': './models/model.ckpt'  # Your model will be saved here.
}