In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import itertools

from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import StandardScaler

In [2]:
class CustomDataset(Dataset):
    
    def __init__(self, filepath) -> None:
        
        # load csv data
        data = pd.read_csv(filepath, header=None)
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
        
        # feature scaling
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
        # convert to tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [3]:
dataset = CustomDataset('train_all_0.csv')

# create data indices for train val split
data_size = len(dataset)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# create data loader
train_loader = DataLoader(dataset, batch_size=16, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=16, sampler=val_sampler)

In [60]:
class BinaryClassification(nn.Module):
    def __init__(self, dropout=False, batch_norm=False):
        '''Once at a time'''
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(12, 128) 
        self.layer_out = nn.Linear(128, 2) 
        self.relu = nn.ReLU()
        
        if dropout:
            self.dropout_1 = nn.Dropout(0.2)
        
        if batch_norm:
            self.batch_norm_1 = nn.BatchNorm1d(128)
        
        self.dropout = dropout
        self.batch_norm = batch_norm
        
    def forward(self, inputs):
        if self.dropout:
            x = self.relu(self.layer_1(inputs))
            x = self.dropout_1(x)
            x = self.layer_out(x)
        
        elif self.dropout:
            x = self.relu(self.layer_1(inputs))
            x = self.batch_norm_1(x)
            x = self.layer_out(x)
            
        else:
            x = self.relu(self.layer_1(inputs))
            x = self.layer_out(x)
            
        return x

# Homework 3-1

In [43]:
# create training pipeline based on page 47
def LG_UA_regularization(train_loader=train_loader, val_loader=val_loader, model=None, criterion=None, optimizer=None, loss_threshold=0.5, eta_threshold=0.008, l2_lambda=0.001):
    '''
    Based on page 47, it should be L2 regularization and I can actually use "weight_decay" in pytorch optimizer.
    But it's not fun so I will still implement L2 regularization by myself.
    '''
    previous_train_loss = 10000    

    for epoch in itertools.count():
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                
                # L2 regularization with normalized l2
                L2_regularization = sum(p.pow(2.0).sum() for p in model.parameters())
                param_num = sum(p.numel() for p in model.parameters())
                loss += (l2_lambda / param_num) * L2_regularization
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            max_train_loss = max(train_loss)
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            
            if train_loss < previous_train_loss:
                if max_train_loss < loss_threshold:
                    optimizer.param_groups[0]['lr'] *= 1.2
                    previous_train_loss = train_loss
                    # print(f'The previous training loss is: {previous_train_loss}')
                    break
                
                else:
                    model.load_state_dict(previous_model_params)
                    stop_training = True
                    # print(f'max loss: {max_train_loss} | loss_threshold: {loss_threshold}')
                    break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            # current_lr = optimizer.param_groups[0]['lr']
            # print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
            # print('learning rate < eta_threshold')
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('Restore previous model weights, stop training.')
            break

In [44]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
LG_UA_regularization(train_loader, val_loader, model, criterion, optimizer, loss_threshold=0.9, eta_threshold=0.0001)

[ 1 ] | train_loss = 0.56748, train_acc = 0.72656, val_loss = 0.33394, val_acc = 0.76042
[ 2 ] | train_loss = 0.44250, train_acc = 0.82031, val_loss = 0.31409, val_acc = 0.79167
[ 3 ] | train_loss = 0.37139, train_acc = 0.86198, val_loss = 0.19311, val_acc = 0.80208
[ 4 ] | train_loss = 0.33307, train_acc = 0.86458, val_loss = 0.30488, val_acc = 0.83333
[ 5 ] | train_loss = 0.30860, train_acc = 0.88802, val_loss = 0.16566, val_acc = 0.82292
[ 6 ] | train_loss = 0.29882, train_acc = 0.88281, val_loss = 0.28512, val_acc = 0.86458
[ 7 ] | train_loss = 0.27554, train_acc = 0.89062, val_loss = 0.22133, val_acc = 0.85417
[ 8 ] | train_loss = 0.26412, train_acc = 0.90625, val_loss = 0.24282, val_acc = 0.84375
[ 9 ] | train_loss = 0.24601, train_acc = 0.90885, val_loss = 0.19254, val_acc = 0.89583
[ 10 ] | train_loss = 0.23799, train_acc = 0.91406, val_loss = 0.06659, val_acc = 0.89583
[ 11 ] | train_loss = 0.23598, train_acc = 0.90625, val_loss = 0.25251, val_acc = 0.87500
[ 12 ] | train_loss

In [50]:
# create training pipeline based on page 48
def EU_LG_UA_regularization(train_loader=train_loader, val_loader=val_loader, model=None, epochs=1, criterion=None, optimizer=None, loss_threshold=0.5, eta_threshold=0.008, l2_lambda=0.001):
    '''
    Based on page 47, it should be L2 regularization and I can actually use "weight_decay" in pytorch optimizer.
    But it's not fun so I will still implement L2 regularization by myself.
    '''
    previous_train_loss = 10000    

    for epoch in range(epochs):
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                
                # L2 regularization with normalized l2
                L2_regularization = sum(p.pow(2.0).sum() for p in model.parameters())
                param_num = sum(p.numel() for p in model.parameters())
                loss += (l2_lambda / param_num) * L2_regularization
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            max_train_loss = max(train_loss)
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            
            if train_loss < previous_train_loss:
                if max_train_loss < loss_threshold:
                    optimizer.param_groups[0]['lr'] *= 1.2
                    previous_train_loss = train_loss
                    # print(f'The previous training loss is: {previous_train_loss}')
                    break
                
                else:
                    model.load_state_dict(previous_model_params)
                    stop_training = True
                    # print(f'max loss: {max_train_loss} | loss_threshold: {loss_threshold}')
                    break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            # current_lr = optimizer.param_groups[0]['lr']
            # print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
            # print('learning rate < eta_threshold')
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1}/{epochs} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('Restore previous model weights, stop training.')
            break
        
        if epoch+1 >= 50:
            print("It's over 50 epochs, stop training")    
            break

In [51]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
EU_LG_UA_regularization(train_loader, val_loader, model, epochs=100, criterion=criterion, optimizer=optimizer, loss_threshold=0.9, eta_threshold=0.0001)

[ 1/100 ] | train_loss = 0.56600, train_acc = 0.79167, val_loss = 0.49526, val_acc = 0.76042
[ 2/100 ] | train_loss = 0.43306, train_acc = 0.82812, val_loss = 0.42078, val_acc = 0.79167
[ 3/100 ] | train_loss = 0.36971, train_acc = 0.86198, val_loss = 0.19177, val_acc = 0.80208
[ 4/100 ] | train_loss = 0.33529, train_acc = 0.86198, val_loss = 0.39725, val_acc = 0.81250
[ 5/100 ] | train_loss = 0.31347, train_acc = 0.88021, val_loss = 0.22493, val_acc = 0.84375
[ 6/100 ] | train_loss = 0.29172, train_acc = 0.88281, val_loss = 0.14767, val_acc = 0.86458
[ 7/100 ] | train_loss = 0.27862, train_acc = 0.90104, val_loss = 0.59157, val_acc = 0.86458
[ 8/100 ] | train_loss = 0.26701, train_acc = 0.89323, val_loss = 0.36960, val_acc = 0.88542
[ 9/100 ] | train_loss = 0.25352, train_acc = 0.89844, val_loss = 0.20255, val_acc = 0.87500
[ 10/100 ] | train_loss = 0.23497, train_acc = 0.91146, val_loss = 0.24970, val_acc = 0.89583
[ 11/100 ] | train_loss = 0.22420, train_acc = 0.92188, val_loss = 0.

In [58]:
# create training pipeline
def regularization(train_loader=train_loader, val_loader=val_loader, model=None, epochs=None, criterion=None, optimizer=None, l2_lambda=0.001):
    for epoch in range(epochs):
        
        model.train()
        train_loss = []
        train_accs = []
        
        for batch in train_loader:
            
            x, y = batch
            
            logits = model(x.to(device))
            loss = criterion(logits, y.to(device))
            
            # L2 regularization with normalized l2
            L2_regularization = sum(p.pow(2.0).sum() for p in model.parameters())
            param_num = sum(p.numel() for p in model.parameters())
            loss += (l2_lambda / param_num) * L2_regularization
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
            train_loss.append(loss.item())
            train_accs.append(acc)
        
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)
        
        model.eval()

        valid_loss = []
        valid_accs = []
        
        for batch in val_loader:
            imgs, labels = batch
            
            with torch.no_grad():
                logits = model(imgs.to(device))
                
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                
                valid_loss.append(loss.item())
                valid_accs.append(acc)
        
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)
        
        print(f'[ {epoch+1}/{epochs} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')

In [61]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification(dropout=True).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
regularization(train_loader, val_loader, model, epochs=100, criterion=criterion, optimizer=optimizer)

[ 1/100 ] | train_loss = 0.66423, train_acc = 0.55208, val_loss = 0.61592, val_acc = 0.76042
[ 2/100 ] | train_loss = 0.48022, train_acc = 0.83333, val_loss = 0.61011, val_acc = 0.78125
[ 3/100 ] | train_loss = 0.40645, train_acc = 0.86198, val_loss = 0.43490, val_acc = 0.80208
[ 4/100 ] | train_loss = 0.36450, train_acc = 0.85417, val_loss = 0.34818, val_acc = 0.81250
[ 5/100 ] | train_loss = 0.34894, train_acc = 0.86198, val_loss = 0.21656, val_acc = 0.80208
[ 6/100 ] | train_loss = 0.32327, train_acc = 0.86719, val_loss = 0.43926, val_acc = 0.81250
[ 7/100 ] | train_loss = 0.32068, train_acc = 0.87240, val_loss = 0.32941, val_acc = 0.84375
[ 8/100 ] | train_loss = 0.31098, train_acc = 0.87500, val_loss = 0.18890, val_acc = 0.84375
[ 9/100 ] | train_loss = 0.30062, train_acc = 0.87760, val_loss = 0.33761, val_acc = 0.85417
[ 10/100 ] | train_loss = 0.29693, train_acc = 0.89062, val_loss = 0.43440, val_acc = 0.86458
[ 11/100 ] | train_loss = 0.28962, train_acc = 0.89583, val_loss = 0.

In [62]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification(batch_norm=True).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
regularization(train_loader, val_loader, model, epochs=100, criterion=criterion, optimizer=optimizer)

[ 1/100 ] | train_loss = 0.66753, train_acc = 0.58333, val_loss = 0.59166, val_acc = 0.76042
[ 2/100 ] | train_loss = 0.47878, train_acc = 0.82292, val_loss = 0.24175, val_acc = 0.79167
[ 3/100 ] | train_loss = 0.41122, train_acc = 0.85677, val_loss = 0.47515, val_acc = 0.80208
[ 4/100 ] | train_loss = 0.37385, train_acc = 0.86458, val_loss = 0.61439, val_acc = 0.81250
[ 5/100 ] | train_loss = 0.34675, train_acc = 0.86719, val_loss = 0.41991, val_acc = 0.82292
[ 6/100 ] | train_loss = 0.32787, train_acc = 0.86458, val_loss = 0.25247, val_acc = 0.82292
[ 7/100 ] | train_loss = 0.31786, train_acc = 0.86719, val_loss = 0.19771, val_acc = 0.83333
[ 8/100 ] | train_loss = 0.30486, train_acc = 0.88021, val_loss = 0.12140, val_acc = 0.84375
[ 9/100 ] | train_loss = 0.29751, train_acc = 0.88802, val_loss = 0.17287, val_acc = 0.85417
[ 10/100 ] | train_loss = 0.29003, train_acc = 0.88802, val_loss = 0.27099, val_acc = 0.84375
[ 11/100 ] | train_loss = 0.28256, train_acc = 0.89062, val_loss = 0.

# Homework 3