In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import random

from TrainingAlgorithm import TrainingAlgo, train
from DataPreprocess import create_dataloader
from model import TwoLayerNN

In [32]:
def All_r_LG_UA_w_LG_UA(model=None,
                        reg_params=None,
                        weight_params=None,
                        l2_lambda=0.001,
                        l1_lambda=0.001,
                        k=1,
                        p=50):
    
    prune_index = 0
    
    while not k>p:
        prune_index = random.randint(0, 127)
        
        model = train_algo.multiclass_regularization(epochs=reg_params['epochs'],
                                                    model=model,
                                                    optimizer=reg_params['optimizer'],
                                                    loss_threshold=reg_params['loss_threshold'],
                                                    eta_threshold=reg_params['eta_threshold'],
                                                    l2_lambda=l2_lambda,
                                                    l1_lambda=l1_lambda)
        
        saved_model = copy.deepcopy(model)
        
        model.del_neuron(index=prune_index)
        
        situation, model = train_algo.multiclass_weight_tuning(epochs=weight_params['epochs'],
                                                        model=model,
                                                        optimizer=weight_params['optimizer'],
                                                        loss_threshold=weight_params['loss_threshold'],
                                                        eta_threshold=weight_params['eta_threshold'])
        
        if situation == 'Unacceptable':
            model = saved_model
            k += 1
        
        elif situation == 'Acceptable':
            p -= 1
    
    return model

In [33]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TwoLayerNN(12, 128, 2, batch_norm=True, dropout=True).to(device)
train_loader, val_loader = create_dataloader()

# initial training
epochs = 20
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.71637, train_acc = 0.53906, val_loss = 0.60543, val_acc = 0.53125
[ 2/20 ] | train_loss = 0.69476, train_acc = 0.59896, val_loss = 0.76493, val_acc = 0.56250
[ 3/20 ] | train_loss = 0.68617, train_acc = 0.58073, val_loss = 0.57557, val_acc = 0.54167
[ 4/20 ] | train_loss = 0.64447, train_acc = 0.61719, val_loss = 0.62720, val_acc = 0.48958
[ 5/20 ] | train_loss = 0.63200, train_acc = 0.64583, val_loss = 0.60540, val_acc = 0.51042
[ 6/20 ] | train_loss = 0.63473, train_acc = 0.60938, val_loss = 0.85535, val_acc = 0.58333
[ 7/20 ] | train_loss = 0.63659, train_acc = 0.65104, val_loss = 0.61837, val_acc = 0.63542
[ 8/20 ] | train_loss = 0.62294, train_acc = 0.65625, val_loss = 0.59291, val_acc = 0.62500
[ 9/20 ] | train_loss = 0.61268, train_acc = 0.67188, val_loss = 0.62639, val_acc = 0.63542
[ 10/20 ] | train_loss = 0.60073, train_acc = 0.68750, val_loss = 0.66769, val_acc = 0.62500
[ 11/20 ] | train_loss = 0.60198, train_acc = 0.70573, val_loss = 0.66845, val_

In [34]:
# parameters for different task
reg_optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
reg_loss_threshold = 0.7
reg_eta_threshold = 1e-7
reg_epochs = None

weight_optimizer = optim.RMSprop(model.parameters(), lr=1e-4)
weight_loss_threshold = 0.8
weight_eta_threshold = 1e-10
weight_epochs = 10

reg = {'optimizer': reg_optimizer,
       'loss_threshold': reg_loss_threshold,
       'eta_threshold': reg_eta_threshold,
       'epochs': reg_epochs}

weight = {'optimizer': weight_optimizer,
          'loss_threshold': weight_loss_threshold,
          'eta_threshold': weight_eta_threshold,
          'epochs': weight_epochs}

train_algo = TrainingAlgo(train_loader, val_loader, criterion, device)
model = All_r_LG_UA_w_LG_UA(model, reg, weight, l2_lambda=0.001, p=50)

--------initializing regularization--------
max loss:0.9194713830947876 > threshold0.7, stop training.
[ 1 ] | train_loss = 0.48026, train_acc = 0.79427, val_loss = 0.47539, val_acc = 0.85417
--------initializing weight tuning--------
[ 1/10 ] | train_loss = 0.58894, train_acc = 0.71615, val_loss = 0.49311, val_acc = 0.66667
[ 2/10 ] | train_loss = 0.57623, train_acc = 0.71094, val_loss = 0.35288, val_acc = 0.64583
[ 3/10 ] | train_loss = 0.55913, train_acc = 0.70573, val_loss = 0.39135, val_acc = 0.64583
learning rate < threshold
[ 4/10 ] | train_loss = 0.60420, train_acc = 0.68750, val_loss = 0.60055, val_acc = 0.62500
--------initializing regularization--------
[ 1 ] | train_loss = 0.39328, train_acc = 0.82292, val_loss = 0.49877, val_acc = 0.86458
[ 2 ] | train_loss = 0.36809, train_acc = 0.86979, val_loss = 0.43057, val_acc = 0.86458
[ 3 ] | train_loss = 0.36730, train_acc = 0.83854, val_loss = 0.30619, val_acc = 0.85417
learning <= threshold, stop training.
[ 4 ] | train_loss = 0

In [35]:
print(model)

TwoLayerNN(
  (layer_1): Linear(in_features=12, out_features=110, bias=True)
  (layer_out): Linear(in_features=110, out_features=2, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (bn): BatchNorm1d(110, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [36]:
epochs = 20
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.52987, train_acc = 0.76562, val_loss = 0.44886, val_acc = 0.81250
[ 2/20 ] | train_loss = 0.47808, train_acc = 0.78125, val_loss = 0.44220, val_acc = 0.83333
[ 3/20 ] | train_loss = 0.42750, train_acc = 0.82552, val_loss = 0.46887, val_acc = 0.83333
[ 4/20 ] | train_loss = 0.41740, train_acc = 0.81771, val_loss = 0.33475, val_acc = 0.83333
[ 5/20 ] | train_loss = 0.38667, train_acc = 0.83594, val_loss = 0.42429, val_acc = 0.85417
[ 6/20 ] | train_loss = 0.37968, train_acc = 0.83854, val_loss = 0.31412, val_acc = 0.87500
[ 7/20 ] | train_loss = 0.35147, train_acc = 0.84635, val_loss = 0.45228, val_acc = 0.89583
[ 8/20 ] | train_loss = 0.33316, train_acc = 0.86719, val_loss = 0.26436, val_acc = 0.91667
[ 9/20 ] | train_loss = 0.32480, train_acc = 0.87240, val_loss = 0.27872, val_acc = 0.91667
[ 10/20 ] | train_loss = 0.32091, train_acc = 0.84896, val_loss = 0.37936, val_acc = 0.89583
[ 11/20 ] | train_loss = 0.32304, train_acc = 0.87500, val_loss = 0.50618, val_