In [144]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import torch.optim as optim
import numpy as np
import itertools
import copy

from torch.utils.data import Dataset, DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from sklearn.preprocessing import StandardScaler

In [145]:
class CustomDataset(Dataset):
    
    def __init__(self, filepath) -> None:
        
        # load csv data
        data = pd.read_csv(filepath, header=None)
        X = data.iloc[:, :-1].values
        y = data.iloc[:, -1].values
        
        # feature scaling
        sc = StandardScaler()
        X = sc.fit_transform(X)
        
        # convert to tensors
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y)
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [146]:
dataset = CustomDataset('train_all_0.csv')

# create data indices for train val split
data_size = len(dataset)
indices = list(range(data_size))
split = int(np.floor(0.2 * data_size))
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)

# create data loader
train_loader = DataLoader(dataset, batch_size=16, sampler=train_sampler)
val_loader = DataLoader(dataset, batch_size=16, sampler=val_sampler)

In [147]:
class BinaryClassification(nn.Module):
    def __init__(self, dropout=False, batch_norm=False):
        '''Once at a time'''
        super(BinaryClassification, self).__init__()
        self.layer_1 = nn.Linear(12, 1024) 
        self.layer_out = nn.Linear(1024, 2) 
        self.relu = nn.ReLU()
        
        if dropout:
            self.dropout_1 = nn.Dropout(0.2)
        
        if batch_norm:
            self.batch_norm_1 = nn.BatchNorm1d(128)
        
        self.dropout = dropout
        self.batch_norm = batch_norm
        
    def forward(self, inputs):
        if self.dropout:
            x = self.relu(self.layer_1(inputs))
            x = self.dropout_1(x)
            x = self.layer_out(x)
        
        elif self.dropout:
            x = self.relu(self.layer_1(inputs))
            x = self.batch_norm_1(x)
            x = self.layer_out(x)
            
        else:
            x = self.relu(self.layer_1(inputs))
            x = self.layer_out(x)
            
        return x

In [214]:
# create training pipeline based on page 58
def LG_UA_weight_tune(train_loader=train_loader, val_loader=val_loader, model=None, criterion=None, optimizer=None, device=None, loss_threshold=0.5, eta_threshold=0.0008):
    print('Initialize weight-tuning LG UA model')
    
    previous_train_loss = 10000    

    for epoch in itertools.count():
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            if train_loss < previous_train_loss:
                optimizer.param_groups[0]['lr'] *= 1.2
                previous_train_loss = train_loss
                print(f'The previous training loss is: {previous_train_loss}')
                break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            current_lr = optimizer.param_groups[0]['lr']
            print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('LG_UA_weight: Learning rate is smaller than the threshold, stop training. Unacceptable')
            return True
        
        if train_loss < loss_threshold:
            print('LG_UA_weight: The training loss is smaller than what you want, stop training. Acceptable')
            return False

In [215]:
def LG_UA_regularization(train_loader=train_loader, val_loader=val_loader, model=None, criterion=None, optimizer=None, device=None, loss_threshold=0.5, eta_threshold=0.0008, l2_lambda=0.001):
    '''
    Based on page 47, it should be L2 regularization and I can actually use "weight_decay" in pytorch optimizer.
    But it's not fun so I will still implement L2 regularization by myself.
    '''
    previous_train_loss = 10000    

    for epoch in itertools.count():
        
        model.train()
        
        previous_model_params = model.state_dict()
        stop_training = False
        
        while optimizer.param_groups[0]['lr'] > eta_threshold:
            
            train_loss = []
            train_accs = []
            
            for batch in train_loader:
                
                x, y = batch
                
                logits = model(x.to(device))
                loss = criterion(logits, y.to(device))
                
                # L2 regularization with normalized l2
                L2_regularization = sum(p.pow(2.0).sum() for p in model.parameters())
                param_num = sum(p.numel() for p in model.parameters())
                loss += (l2_lambda / param_num) * L2_regularization
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                acc = (logits.argmax(dim=-1) == y.to(device)).float().mean()
                train_loss.append(loss.item())
                train_accs.append(acc)
            
            max_train_loss = max(train_loss)
            train_loss = sum(train_loss) / len(train_loss)
            train_acc = sum(train_accs) / len(train_accs)
            
            
            if train_loss <= previous_train_loss:
                if max_train_loss < loss_threshold:
                    optimizer.param_groups[0]['lr'] *= 1.2
                    previous_train_loss = train_loss
                    # print(f'The previous training loss is: {previous_train_loss}')
                    break
                
                else:
                    model.load_state_dict(previous_model_params)
                    stop_training = True
                    # print(f'max loss: {max_train_loss} | loss_threshold: {loss_threshold}')
                    break
            
            optimizer.param_groups[0]['lr'] *= 0.7
            model.load_state_dict(previous_model_params)
            # current_lr = optimizer.param_groups[0]['lr']
            # print(f'lr shrinking!, now the lr is: {current_lr}')
            
        else:
            stop_training = True
            model.load_state_dict(previous_model_params)
            # print('learning rate < eta_threshold')
        
        # Use try and except to detect whether the eta_threshold is set too high initially
        try:        
            model.eval()
            valid_loss = []
            valid_accs = []
            
            for batch in val_loader:
                imgs, labels = batch
                
                with torch.no_grad():
                    logits = model(imgs.to(device))
                    
                    acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
                    valid_loss.append(loss.item())
                    valid_accs.append(acc)
            
            valid_loss = sum(valid_loss) / len(valid_loss)
            valid_acc = sum(valid_accs) / len(valid_accs)
        
            print(f'[ {epoch+1} ] | train_loss = {train_loss:.5f}, train_acc = {train_acc:.5f}, val_loss = {valid_loss:.5f}, val_acc = {valid_acc:.5f}')
            
        except UnboundLocalError:
            print('LG_UA_reg: Your eta_threshold is setting higher than your learning rate. Reset it with lower one!')
        
        # stopping criterion
        if stop_training:
            print('LG_UA_reg: Restore previous model weights, stop training.')
            break

In [216]:
class OrderUnstructured(prune.RandomUnstructured):
    def __init__(self, amount=1, index=0):
        super(OrderUnstructured, self).__init__(amount)
        self.index = index
        
    def compute_mask(self, t, default_mask):
        nparams_toprune = self.amount
        mask = default_mask.clone(memory_format=torch.contiguous_format)
        
        if nparams_toprune != 0:  # k=0 not supported by torch.kthvalue
            mask.view(-1)[self.index:self.amount+self.index] = 0
        
        return mask

In [217]:
def prune_model(model, index=0, amount=1):
    new_model = copy.copy(model)
    module_list = []
    for name, module in new_model.layer_1.named_modules():
        module_list += [(module, 'weight'), (module, 'bias')]
    
    prune.global_unstructured(
    module_list,
    pruning_method=OrderUnstructured,
    amount=amount,
    index=index)
    
    return new_model

In [225]:
def All_r_LG_UA_w_LG_UA(train_loader=train_loader,
                        val_loader=val_loader,
                        model=None,
                        criterion=None,
                        optimizer=None,
                        device=None,
                        loss_threshold=0.5,
                        eta_threshold=0.2,
                        l2_lambda=0.001,
                        k=1,
                        p=50):
    
    prune_index = 0
    while k<p:
        LG_UA_regularization(train_loader, val_loader, model, criterion, optimizer, device, loss_threshold, eta_threshold, l2_lambda)
        saved_model = copy.deepcopy(model)
        
        new_model = prune_model(model, index=prune_index)
        
        unacceptable = LG_UA_weight_tune(train_loader, val_loader, new_model, criterion, optimizer, device, loss_threshold, eta_threshold*0.1)
        
        if unacceptable:
            print('All : Restore Network')
            model = saved_model
            prune_index += 1
            k += 1
        
        else:
            print('Pruning Works!')
            model = new_model
            p -= 1
    
    print('finish training')

In [226]:
device = torch.device('cpu')
model = BinaryClassification().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
All_r_LG_UA_w_LG_UA(train_loader, val_loader, model, criterion, optimizer, device, loss_threshold=0.9, eta_threshold=0.008)

LG_UA_reg: Your eta_threshold is setting higher than your learning rate. Reset it with lower one!
LG_UA_reg: Restore previous model weights, stop training.
Initialize weight-tuning LG UA model
The previous training loss is: 0.4830825924873352
[ 1 ] | train_loss = 0.48308, train_acc = 0.79688, val_loss = 0.26112, val_acc = 0.84375
LG_UA_weight: The training loss is smaller than what you want, stop training. Acceptable
Pruning Works!
LG_UA_reg: Your eta_threshold is setting higher than your learning rate. Reset it with lower one!
LG_UA_reg: Restore previous model weights, stop training.
Initialize weight-tuning LG UA model
The previous training loss is: 0.33521613851189613
[ 1 ] | train_loss = 0.33522, train_acc = 0.85938, val_loss = 0.39031, val_acc = 0.84375
LG_UA_weight: The training loss is smaller than what you want, stop training. Acceptable
Pruning Works!
LG_UA_reg: Your eta_threshold is setting higher than your learning rate. Reset it with lower one!
LG_UA_reg: Restore previous m