In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import itertools

from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import StandardScaler

In [2]:
class CustomDataset(Dataset):
    
    def __init__(self, filepath) -> None:
        
        # load csv data
        data = pd.read_csv(filepath, header=None)
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
        
        # feature scaling
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
        # convert to tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [4]:
dataset = CustomDataset('../data/train_all_0.csv')

# create data indices for train val split
data_size = len(dataset)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# create data loader
train_loader = DataLoader(dataset, batch_size=16, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=16, sampler=val_sampler)

In [5]:
class BinaryClassification(nn.Module):
    def __init__(self):
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(12, 128) 
        self.layer_out = nn.Linear(128, 2) 
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        x = self.relu(self.layer_1(inputs))
        x = self.layer_out(x)
        
        return x

In [6]:
# create training pipeline based on page 49
def P49_train(train_loader=train_loader, val_loader=val_loader, model=None, epochs=None, criterion=None, optimizer=None, loss_threshold=0.5):
    '''When iterating over 50 or validation loss smaller than specific number, the function will stop'''
    for epoch in range(epochs):
        
        model.train()
        train_loss = []
        train_accs = []
        
        for batch in train_loader:
            
            x, y = batch
            
            logits = model(x.to(device))
            loss = criterion(logits, y.to(device))
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
            train_loss.append(loss.item())
            train_accs.append(acc)
        
        train_loss = sum(train_loss) / len(train_loss)
        train_acc = sum(train_accs) / len(train_accs)
        
        model.eval()

        valid_loss = []
        valid_accs = []
        
        for batch in val_loader:
            imgs, labels = batch
            
            with torch.no_grad():
                logits = model(imgs.to(device))
                
                acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                
                valid_loss.append(loss.item())
                valid_accs.append(acc)
        
        valid_loss = sum(valid_loss) / len(valid_loss)
        valid_acc = sum(valid_accs) / len(valid_accs)
        
        print(f'[ {epoch+1}/{epochs} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
        
        if epoch+1 >= 50:
            print("It's over 50 epochs, stop training")    
            break
        
        if train_loss < loss_threshold:
            print('The training loss is smaller than what you want, stop training')
            break
            

In [7]:
# create training pipeline based on page 50
def P50_train(train_loader=train_loader, val_loader=val_loader, model=None, epochs=None, criterion=None, optimizer=None, loss_threshold=0.5, eta_threshold=0.008):
    '''
    When iterating over 50 or validation loss smaller than specific number, the function will stop.
    Also, if the new loss is larger than the previous one, it will compare the learning rate and the threshold.
    if the learning rate is larger than the threshold, use the old weight and the learning rate will multiply 0.7 and do bp one more time.
    if not, then stop training.
    However if the new loss is smaller than the previous one, update w to new weight and multiply learning rate by 1.2, and going through new loop
    '''
    
    previous_train_loss = 10000    

    for epoch in range(epochs):
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            if train_loss < previous_train_loss:
                optimizer.param_groups[0]['lr'] *= 1.2
                previous_train_loss = train_loss
                print(f'The previous training loss is: {previous_train_loss}')
                break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            current_lr = optimizer.param_groups[0]['lr']
            print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1}/{epochs} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('Learning rate is smaller than the threshold, stop training.')
            break
        
        if epoch+1 >= 50:
            print('It over 50 epochs, stop training.')
            break
        
        if train_loss < loss_threshold:
            print('The training loss is smaller than what you want, stop training.')
            break

In [8]:
# create training pipeline based on page 58
def P58_train(train_loader=train_loader, val_loader=val_loader, model=None, criterion=None, optimizer=None, loss_threshold=0.5, eta_threshold=0.008):
    '''
    Almost the same as P50, but without training epochs.
    '''
    
    previous_train_loss = 10000    

    for epoch in itertools.count():
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            if train_loss < previous_train_loss:
                optimizer.param_groups[0]['lr'] *= 1.2
                previous_train_loss = train_loss
                print(f'The previous training loss is: {previous_train_loss}')
                break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            current_lr = optimizer.param_groups[0]['lr']
            print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1}/{epochs} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('Learning rate is smaller than the threshold, stop training.')
            break
        
        if train_loss < loss_threshold:
            print('The training loss is smaller than what you want, stop training.')
            break

In [9]:
epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
P49_train(train_loader, val_loader, model, epochs, criterion, optimizer, loss_threshold=0.2)

[ 1/100 ] | train_loss = 0.57666, train_acc = 0.70573, val_loss = 0.37518, val_acc = 0.83333
[ 2/100 ] | train_loss = 0.42514, train_acc = 0.82552, val_loss = 0.54065, val_acc = 0.86458
[ 3/100 ] | train_loss = 0.37309, train_acc = 0.86458, val_loss = 0.30738, val_acc = 0.84375
[ 4/100 ] | train_loss = 0.34173, train_acc = 0.87760, val_loss = 0.24397, val_acc = 0.87500
[ 5/100 ] | train_loss = 0.32258, train_acc = 0.87240, val_loss = 0.25656, val_acc = 0.88542
[ 6/100 ] | train_loss = 0.30682, train_acc = 0.88802, val_loss = 0.77175, val_acc = 0.89583
[ 7/100 ] | train_loss = 0.29765, train_acc = 0.89844, val_loss = 0.26842, val_acc = 0.88542
[ 8/100 ] | train_loss = 0.28747, train_acc = 0.89583, val_loss = 0.22019, val_acc = 0.88542
[ 9/100 ] | train_loss = 0.27871, train_acc = 0.89844, val_loss = 0.21673, val_acc = 0.88542
[ 10/100 ] | train_loss = 0.27159, train_acc = 0.90625, val_loss = 0.11994, val_acc = 0.88542
[ 11/100 ] | train_loss = 0.26526, train_acc = 0.91406, val_loss = 0.

In [10]:
epochs = 100
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
P50_train(train_loader, val_loader, model, epochs, criterion, optimizer, loss_threshold=0.05, eta_threshold=9e-4)

The previous training loss is: 0.5601096078753471
[ 1/100 ] | train_loss = 0.56011, train_acc = 0.78646, val_loss = 0.50863, val_acc = 0.83333
The previous training loss is: 0.44109678640961647
[ 2/100 ] | train_loss = 0.44110, train_acc = 0.81510, val_loss = 0.52197, val_acc = 0.85417
The previous training loss is: 0.37545802630484104
[ 3/100 ] | train_loss = 0.37546, train_acc = 0.84896, val_loss = 0.25889, val_acc = 0.84375
The previous training loss is: 0.33619561046361923
[ 4/100 ] | train_loss = 0.33620, train_acc = 0.86198, val_loss = 0.29675, val_acc = 0.87500
The previous training loss is: 0.3092994410544634
[ 5/100 ] | train_loss = 0.30930, train_acc = 0.87500, val_loss = 0.13201, val_acc = 0.84375
The previous training loss is: 0.2889921034996708
[ 6/100 ] | train_loss = 0.28899, train_acc = 0.89844, val_loss = 0.37172, val_acc = 0.87500
The previous training loss is: 0.27357610656569403
[ 7/100 ] | train_loss = 0.27358, train_acc = 0.89323, val_loss = 0.12459, val_acc = 0.8

In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = BinaryClassification().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-3)
P58_train(train_loader, val_loader, model, criterion, optimizer, loss_threshold=0.05, eta_threshold=9e-4)

The previous training loss is: 0.6519167224566141
[ 1/100 ] | train_loss = 0.65192, train_acc = 0.63281, val_loss = 0.59518, val_acc = 0.83333
The previous training loss is: 0.47752015478909016
[ 2/100 ] | train_loss = 0.47752, train_acc = 0.81250, val_loss = 0.59318, val_acc = 0.83333
The previous training loss is: 0.3995066260298093
[ 3/100 ] | train_loss = 0.39951, train_acc = 0.84635, val_loss = 0.31210, val_acc = 0.83333
The previous training loss is: 0.35225291550159454
[ 4/100 ] | train_loss = 0.35225, train_acc = 0.86198, val_loss = 0.29602, val_acc = 0.84375
The previous training loss is: 0.3276235417773326
[ 5/100 ] | train_loss = 0.32762, train_acc = 0.85417, val_loss = 0.30539, val_acc = 0.85417
The previous training loss is: 0.30202035419642925
[ 6/100 ] | train_loss = 0.30202, train_acc = 0.88281, val_loss = 0.32821, val_acc = 0.87500
The previous training loss is: 0.28464382079740363
[ 7/100 ] | train_loss = 0.28464, train_acc = 0.89583, val_loss = 0.54565, val_acc = 0.8