In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import random

from TrainingAlgorithm import TrainingAlgo, train
from DataPreprocess import create_dataloader
from model import TwoLayerNN

In [2]:
def All_r_LG_UA_w_LG_UA(model=None,
                        reg_params=None,
                        weight_params=None,
                        l2_lambda=0.001,
                        l1_lambda=0.001,
                        k=1,
                        p=50):
    
    prune_index = 0
    
    while not k>p:
        prune_index = random.randint(0, 127)
        
        model = train_algo.multiclass_regularization(epochs=reg_params['epochs'],
                                                    model=model,
                                                    optimizer=reg_params['optimizer'],
                                                    loss_threshold=reg_params['loss_threshold'],
                                                    eta_threshold=reg_params['eta_threshold'],
                                                    l2_lambda=l2_lambda,
                                                    l1_lambda=l1_lambda)
        
        saved_model = copy.deepcopy(model)
        
        model.del_neuron(index=prune_index)
        
        situation, model = train_algo.multiclass_weight_tuning(epochs=weight_params['epochs'],
                                                        model=model,
                                                        optimizer=weight_params['optimizer'],
                                                        loss_threshold=weight_params['loss_threshold'],
                                                        eta_threshold=weight_params['eta_threshold'])
        
        if situation == 'Unacceptable':
            model = saved_model
            k += 1
        
        elif situation == 'Acceptable':
            p -= 1
    
    return model

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TwoLayerNN(12, 128, 2, batch_norm=True, dropout=True).to(device)
train_loader, val_loader = create_dataloader()

# initial training
epochs = 20
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.87786, train_acc = 0.37500, val_loss = 0.95426, val_acc = 0.39583
[ 2/20 ] | train_loss = 0.79859, train_acc = 0.40104, val_loss = 0.80146, val_acc = 0.42708
[ 3/20 ] | train_loss = 0.75756, train_acc = 0.46354, val_loss = 0.90577, val_acc = 0.51042
[ 4/20 ] | train_loss = 0.72998, train_acc = 0.51562, val_loss = 0.75581, val_acc = 0.55208
[ 5/20 ] | train_loss = 0.68712, train_acc = 0.55729, val_loss = 0.60223, val_acc = 0.62500
[ 6/20 ] | train_loss = 0.66683, train_acc = 0.59375, val_loss = 0.54475, val_acc = 0.64583
[ 7/20 ] | train_loss = 0.67208, train_acc = 0.62240, val_loss = 0.63489, val_acc = 0.67708
[ 8/20 ] | train_loss = 0.64133, train_acc = 0.62240, val_loss = 0.65184, val_acc = 0.75000
[ 9/20 ] | train_loss = 0.61269, train_acc = 0.68229, val_loss = 0.66688, val_acc = 0.72917
[ 10/20 ] | train_loss = 0.61304, train_acc = 0.67708, val_loss = 0.59823, val_acc = 0.75000
[ 11/20 ] | train_loss = 0.61566, train_acc = 0.67188, val_loss = 0.66677, val_

In [4]:
# parameters for different task
reg_optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
reg_loss_threshold = 0.7
reg_eta_threshold = 1e-7
reg_epochs = None

weight_optimizer = optim.Adam(model.parameters(), lr=1e-3)
weight_loss_threshold = 0.32
weight_eta_threshold = 1e-7
weight_epochs = 10

reg = {'optimizer': reg_optimizer,
       'loss_threshold': reg_loss_threshold,
       'eta_threshold': reg_eta_threshold,
       'epochs': reg_epochs}

weight = {'optimizer': weight_optimizer,
          'loss_threshold': weight_loss_threshold,
          'eta_threshold': weight_eta_threshold,
          'epochs': weight_epochs}

criterion = nn.CrossEntropyLoss()
train_algo = TrainingAlgo(train_loader, val_loader, criterion, device)
model = All_r_LG_UA_w_LG_UA(model, reg, weight, l2_lambda=0.001, p=50)

--------initializing regularization--------
max loss:0.7474372386932373 > threshold0.7, stop training.
[ 1 ] | train_loss = 0.46733, train_acc = 0.78906, val_loss = 0.35653, val_acc = 0.82292
--------initializing weight tuning--------
[ 1/10 ] | train_loss = 0.64033, train_acc = 0.64323, val_loss = 0.75937, val_acc = 0.70833
[ 2/10 ] | train_loss = 0.62716, train_acc = 0.65365, val_loss = 0.79698, val_acc = 0.70833
[ 3/10 ] | train_loss = 0.60881, train_acc = 0.68229, val_loss = 0.54627, val_acc = 0.72917
[ 4/10 ] | train_loss = 0.59524, train_acc = 0.68490, val_loss = 0.55156, val_acc = 0.70833
[ 5/10 ] | train_loss = 0.57795, train_acc = 0.69010, val_loss = 0.41097, val_acc = 0.76042
[ 6/10 ] | train_loss = 0.57630, train_acc = 0.69271, val_loss = 0.54695, val_acc = 0.70833
learning rate < threshold
[ 7/10 ] | train_loss = 0.60349, train_acc = 0.68490, val_loss = 0.58844, val_acc = 0.72917
--------initializing regularization--------
[ 1 ] | train_loss = 0.40611, train_acc = 0.82292, 

In [6]:
epochs = 20
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.29214, train_acc = 0.88542, val_loss = 0.54926, val_acc = 0.85417
[ 2/20 ] | train_loss = 0.30416, train_acc = 0.86198, val_loss = 0.48569, val_acc = 0.85417
[ 3/20 ] | train_loss = 0.28998, train_acc = 0.87500, val_loss = 0.42711, val_acc = 0.85417
[ 4/20 ] | train_loss = 0.27950, train_acc = 0.89062, val_loss = 0.21312, val_acc = 0.85417
[ 5/20 ] | train_loss = 0.30202, train_acc = 0.87500, val_loss = 0.08801, val_acc = 0.85417
[ 6/20 ] | train_loss = 0.27085, train_acc = 0.88021, val_loss = 0.30946, val_acc = 0.85417
[ 7/20 ] | train_loss = 0.30973, train_acc = 0.86458, val_loss = 0.43956, val_acc = 0.86458
[ 8/20 ] | train_loss = 0.27525, train_acc = 0.90365, val_loss = 0.19218, val_acc = 0.86458
[ 9/20 ] | train_loss = 0.29933, train_acc = 0.87500, val_loss = 0.18441, val_acc = 0.84375
[ 10/20 ] | train_loss = 0.30144, train_acc = 0.86198, val_loss = 0.57029, val_acc = 0.84375
[ 11/20 ] | train_loss = 0.29127, train_acc = 0.88021, val_loss = 0.31907, val_