In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import copy
import random

from TrainingAlgorithm import TrainingAlgo, train
from DataPreprocess import create_dataloader
from model import TwoLayerNN

In [9]:
def All_r_LG_UA_w_LG_UA(model=None,
                        reg_params=None,
                        weight_params=None,
                        l2_lambda=0.001,
                        l1_lambda=0.001,
                        k=1,
                        p=50):
    
    prune_index = 0
    
    while not k>p:
        prune_index = random.randint(0, 127)
        
        model = train_algo.multiclass_regularization(epochs=reg_params['epochs'],
                                                    model=model,
                                                    optimizer=reg_params['optimizer'],
                                                    loss_threshold=reg_params['loss_threshold'],
                                                    eta_threshold=reg_params['eta_threshold'],
                                                    l2_lambda=l2_lambda,
                                                    l1_lambda=l1_lambda)
        
        saved_model = copy.deepcopy(model)
        
        model.del_neuron(index=prune_index)
        
        situation, model = train_algo.multiclass_weight_tuning(epochs=weight_params['epochs'],
                                                        model=model,
                                                        optimizer=weight_params['optimizer'],
                                                        loss_threshold=weight_params['loss_threshold'],
                                                        eta_threshold=weight_params['eta_threshold'])
        
        if situation == 'Unacceptable':
            model = saved_model
            k += 1
        
        elif situation == 'Acceptable':
            p -= 1
    
    return model

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = TwoLayerNN(12, 128, 1, batch_norm=True, dropout=True).to(device)
train_loader, val_loader = create_dataloader()

# initial training
epochs = 20
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.80638, train_acc = 0.42708, val_loss = 0.77759, val_acc = 0.39583
[ 2/20 ] | train_loss = 0.78011, train_acc = 0.44271, val_loss = 0.74927, val_acc = 0.41667
[ 3/20 ] | train_loss = 0.73371, train_acc = 0.47917, val_loss = 0.72049, val_acc = 0.43750
[ 4/20 ] | train_loss = 0.74466, train_acc = 0.48438, val_loss = 0.68351, val_acc = 0.51042
[ 5/20 ] | train_loss = 0.68992, train_acc = 0.53385, val_loss = 0.63668, val_acc = 0.48958
[ 6/20 ] | train_loss = 0.69573, train_acc = 0.53385, val_loss = 0.64471, val_acc = 0.52083
[ 7/20 ] | train_loss = 0.66619, train_acc = 0.57812, val_loss = 0.68900, val_acc = 0.52083
[ 8/20 ] | train_loss = 0.64704, train_acc = 0.60156, val_loss = 0.65994, val_acc = 0.51042
[ 9/20 ] | train_loss = 0.62945, train_acc = 0.64583, val_loss = 0.69853, val_acc = 0.54167
[ 10/20 ] | train_loss = 0.61951, train_acc = 0.65104, val_loss = 0.66752, val_acc = 0.57292
[ 11/20 ] | train_loss = 0.63160, train_acc = 0.60417, val_loss = 0.57356, val_

In [11]:
# parameters for different task
reg_optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
reg_loss_threshold = 0.7
reg_eta_threshold = 1e-7
reg_epochs = None

weight_optimizer = optim.RMSprop(model.parameters(), lr=2e-3)
weight_loss_threshold = 0.8
weight_eta_threshold = 1e-10
weight_epochs = 10

reg = {'optimizer': reg_optimizer,
       'loss_threshold': reg_loss_threshold,
       'eta_threshold': reg_eta_threshold,
       'epochs': reg_epochs}

weight = {'optimizer': weight_optimizer,
          'loss_threshold': weight_loss_threshold,
          'eta_threshold': weight_eta_threshold,
          'epochs': weight_epochs}

train_algo = TrainingAlgo(train_loader, val_loader, criterion, device)
model = All_r_LG_UA_w_LG_UA(model, reg, weight, l2_lambda=0.001, p=50)

--------initializing regularization--------
max loss:0.8717529773712158 > threshold0.7, stop training.
[ 1 ] | train_loss = 0.49213, train_acc = 0.77604, val_loss = 0.51689, val_acc = 0.81250
--------initializing weight tuning--------
[ 1/10 ] | train_loss = 0.57818, train_acc = 0.72917, val_loss = 0.44364, val_acc = 0.71875
[ 2/10 ] | train_loss = 0.57758, train_acc = 0.72917, val_loss = 0.77102, val_acc = 0.71875
[ 3/10 ] | train_loss = 0.57097, train_acc = 0.73438, val_loss = 0.49953, val_acc = 0.70833
--------initializing regularization--------
max loss:0.7812615633010864 > threshold0.7, stop training.
[ 1 ] | train_loss = 0.58325, train_acc = 0.69271, val_loss = 0.75738, val_acc = 0.71875
--------initializing weight tuning--------
[ 1/10 ] | train_loss = 0.59740, train_acc = 0.69792, val_loss = 0.44062, val_acc = 0.71875
[ 2/10 ] | train_loss = 0.57585, train_acc = 0.72656, val_loss = 0.79063, val_acc = 0.69792
[ 3/10 ] | train_loss = 0.57375, train_acc = 0.74479, val_loss = 0.571

In [12]:
print(model)

TwoLayerNN(
  (layer_1): Linear(in_features=12, out_features=91, bias=True)
  (layer_out): Linear(in_features=91, out_features=1, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (bn): BatchNorm1d(91, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)


In [13]:
epochs = 20
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3)

model = train(train_loader, val_loader, model=model, epochs=epochs, optimizer=optimizer, criterion=criterion, device=device)

[ 1/20 ] | train_loss = 0.60084, train_acc = 0.71615, val_loss = 0.62785, val_acc = 0.64583
[ 2/20 ] | train_loss = 0.59332, train_acc = 0.68229, val_loss = 0.60762, val_acc = 0.64583
[ 3/20 ] | train_loss = 0.59883, train_acc = 0.69792, val_loss = 0.43327, val_acc = 0.65625
[ 4/20 ] | train_loss = 0.59171, train_acc = 0.71875, val_loss = 0.65076, val_acc = 0.67708
[ 5/20 ] | train_loss = 0.58432, train_acc = 0.70312, val_loss = 0.59891, val_acc = 0.69792
[ 6/20 ] | train_loss = 0.56788, train_acc = 0.71875, val_loss = 0.52334, val_acc = 0.69792
[ 7/20 ] | train_loss = 0.56887, train_acc = 0.74479, val_loss = 0.56552, val_acc = 0.69792
[ 8/20 ] | train_loss = 0.57621, train_acc = 0.70833, val_loss = 0.54646, val_acc = 0.70833
[ 9/20 ] | train_loss = 0.55905, train_acc = 0.75260, val_loss = 0.71248, val_acc = 0.68750
[ 10/20 ] | train_loss = 0.56056, train_acc = 0.75000, val_loss = 0.64607, val_acc = 0.69792
[ 11/20 ] | train_loss = 0.56949, train_acc = 0.72396, val_loss = 0.55362, val_