In [1]:
!pip install adabelief-pytorch==0.1.0



In [2]:
import time
import torch
import timeit
import numpy as np
import torch.nn as nn
import torch.optim as optim
from adabelief_pytorch import AdaBelief
from torch.utils import data
import matplotlib.pyplot as plt
import torch.nn.functional as F
from collections import namedtuple
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets, models
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

%matplotlib inline
np.random.seed(2018)

In [3]:
train = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain.npy',allow_pickle=True)
train_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain_labels.npy',allow_pickle=True)
val = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval.npy',allow_pickle=True)
val_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval_labels.npy',allow_pickle=True)
ntest = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntest.npy',allow_pickle=True)
phones = np.loadtxt("C:/Users/malji/Google Drive/Colab Notebooks/Hw3/phones.txt", dtype=str)

In [4]:
class PhonesDataset(Dataset):
  def __init__(self, x,y,k):             
    self.x = x
    self.y = y
    self.k = k
    self.inputs = []
    self.lab = np.array([])
    self.idx = []
    self._init_dataset()
      
  def __len__(self):
    return len(self.lab)

  def __getitem__(self,index):
    i = self.idx[index]
    return torch.from_numpy(np.concatenate(self.inputs[i-self.k:i+self.k+1],axis=0)).float(),torch.tensor(self.lab[index]).long()
  
  def _init_dataset(self):
    idx=0
    for i in range(len(self.x)):
      start_time = time.time()#######
      s = len(self.x[i])
      x=np.pad(self.x[i],((self.k, self.k), (0, 0)), 'constant', constant_values=0)

      idx += 2*self.k
      for j in range(s):           
        self.idx+=[idx-self.k]
        idx += 1

      self.inputs+=list(x)  
      self.lab = np.append(self.lab,self.y[i])
      end_time = time.time()#######  
    return np.array(self.inputs), self.lab

In [5]:
class TestDataset(Dataset):
  def __init__(self, x,k):       
    self.x = x
    self.k = k
    self.inputs = []
    self.idx = []
    self._init_dataset()
      
  def __len__(self):
    return len(self.idx)

  def __getitem__(self,index):
    idx = self.idx[index]
    return torch.from_numpy(np.concatenate(self.inputs[idx-self.k:idx+self.k+1],axis=0)).float()
  
  def _init_dataset(self):
    idx=0
    for i in range(len(self.x)):
      start_time = time.time()#######
      s = len(self.x[i])
      x=np.pad(self.x[i],((self.k, self.k), (0, 0)), 'constant', constant_values=0)

      idx += 2*self.k
      for j in range(s):           
        self.idx+=[idx-self.k]
        idx += 1

      self.inputs+=list(x)  
      end_time = time.time()#######
      if i==1000:##########
        print('Time: ',end_time - start_time, 's') #######    
    return np.array(self.inputs)

In [6]:
# def save_data(loader):
#     print('saving data...')
    
#     for i, (x, y) in enumerate(loader):
        
#         x=x.view(-1, 13).numpy()
#         y = y.numpy()
#         if i==0:
#             data = np.array(x)
#             label = np.array(y)
#         else:     
#             data = np.concatenate((data, x))
#             label = np.concatenate((label, y))
#     return data, label


In [7]:
# #saving datas
# start_time = time.time()
# print("saving data...")
# data, labels = save_data(train_loader)
# np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_data_new.npy', data)
# np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_labels_new.npy', labels)
# print("train data savied in:")
# print("--- %s seconds ---" % (time.time() - start_time))
# print("\n===================================\n")


In [8]:
cuda = torch.cuda.is_available()
num_workers = 0 #8 if cuda else 0 
    
# Training
start_time = time.time()
train_dataset = PhonesDataset(train, train_labels,100)
train_loader_args = dict(shuffle=True, batch_size=256, num_workers=num_workers, pin_memory=True)
train_loader = data.DataLoader(train_dataset, **train_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

taken time: 805.5183613300323 seconds ---


In [9]:
# Validation
start_time = time.time()
num_workers = 0 #8 
val_dataset = PhonesDataset(val, val_labels,100)
val_loader_args = dict(shuffle=False, batch_size=256, num_workers=num_workers, pin_memory=True)
val_loader = data.DataLoader(val_dataset, **val_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

taken time: 7.28446364402771 seconds ---


In [10]:
# Testing
start_time = time.time()
test_dataset = TestDataset(ntest,100)
test_loader_args = dict(shuffle=False, batch_size=1, num_workers=num_workers, pin_memory=True)
test_loader = data.DataLoader(test_dataset, **test_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

Time:  0.0009958744049072266 s
taken time: 2.190014362335205 seconds ---


In [11]:
def init_xavier(m):
  if type(m) == nn.Linear:
    fan_in = m.weight.size()[1]
    fan_out = m.weight.size()[0]
    std = np.sqrt(1.0/(fan_in + fan_out))
    m.weight.data.normal_(0,std)


In [12]:
def init_hey(m):
  if type(m) == nn.Linear:
    fan_in = m.weight.size()[1]
    fan_out = m.weight.size()[0]
    std = np.sqrt(2.0/(fan_in + fan_out))
    m.weight.data.normal_(0,std)


In [13]:
class PhonesModel(nn.Module):
  # try changing 32 to 128
  def __init__(self):
    super(PhonesModel, self).__init__()
    self.fc1 = nn.Linear(2613, 2048 )
    self.bnorm1 = nn.BatchNorm1d(2048 )
    self.dp1 = nn.Dropout(p=0.1)
    self.fc2 = nn.Linear(2048 , 1024 )
    self.bnorm2 = nn.BatchNorm1d(1024 )
    self.dp2 = nn.Dropout(p=0.1)
    self.fc3 = nn.Linear(1024 , 1024 )
    self.bnorm3 = nn.BatchNorm1d(1024 )
    self.dp3 = nn.Dropout(p=0.1)
    self.fc4 = nn.Linear(1024 , 1024 )
    self.bnorm4 = nn.BatchNorm1d(1024 )
    self.dp4 = nn.Dropout(p=0.1)
    self.fc5 = nn.Linear(1024 , 1024 )
    self.bnorm5 = nn.BatchNorm1d(1024 )
    self.dp5 = nn.Dropout(p=0.1)
    self.fc6 = nn.Linear(1024, 512 )
    self.bnorm6 = nn.BatchNorm1d(512 )
    self.dp6 = nn.Dropout(p=0.1)
    self.fc7 = nn.Linear(512, 256 )
    self.bnorm7 = nn.BatchNorm1d(256 )
    self.dp7 = nn.Dropout(p=0.1)
    self.fc8 = nn.Linear(256 , 346)
  
  def forward(self, x):
    x = F.gelu(self.fc1(x))
    x = self.dp1(self.bnorm1(x))
    x = F.gelu(self.fc2(x))
    x = self.dp2(self.bnorm2(x))
    x = F.gelu(self.fc3(x))
    x = self.dp3(self.bnorm3(x))
    x = F.gelu(self.fc4(x))
    x = self.dp4(self.bnorm4(x))
    x = F.gelu(self.fc5(x))
    x = self.dp5(self.bnorm5(x))
    x = F.gelu(self.fc6(x))
    x = self.dp6(self.bnorm6(x))
    x = F.gelu(self.fc7(x))
    x = self.dp7(self.bnorm7(x))
    x = F.log_softmax(self.fc8(x))
    return x

In [14]:
model = PhonesModel()
model.apply(init_xavier)
criterion = nn.CrossEntropyLoss()
# optimizer = optim.Adam(model.parameters())
optimizer = AdaBelief(model.parameters(), lr=1e-3, eps=1e-16, betas=(0.9,0.999), weight_decouple = True, rectify = False)
scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
#scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.1, patience=2, verbose=True)
device = torch.device("cuda" if cuda else "cpu")
model.to(device)
print(model)


[31mPlease check your arguments if you have upgraded adabelief-pytorch from version 0.0.5.
[31mModifications to default arguments:
[31m                           eps  weight_decouple    rectify
-----------------------  -----  -----------------  ---------
adabelief-pytorch=0.0.5  1e-08  False              False
Current version (0.1.0)  1e-16  True               True
[31mFor a complete table of recommended hyperparameters, see
[31mhttps://github.com/juntang-zhuang/Adabelief-Optimizer
[0m
Weight decoupling enabled in AdaBelief
PhonesModel(
  (fc1): Linear(in_features=2613, out_features=2048, bias=True)
  (bnorm1): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp1): Dropout(p=0.1, inplace=False)
  (fc2): Linear(in_features=2048, out_features=1024, bias=True)
  (bnorm2): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (dp2): Dropout(p=0.1, inplace=False)
  (fc3): Linear(in_features=1024, out_features=1024, bias

In [15]:
def train_epoch(model, train_loader, criterion, optimizer):
  model.train()

  running_loss = 0.0
  total_predictions = 0.0
  correct_predictions = 0.0
  model.to(device)
  
  start_time = time.time()
  
  # Print Learning Rate
  
  for batch_idx, (data, target) in enumerate(train_loader):   
    optimizer.zero_grad()   # .backward() accumulates gradients
    data = data.to(device)
    target = target.to(device) # all data & model on same device

    outputs = model(data)
    _, predicted = torch.max(outputs.data, 1)
    
    total_predictions += target.size(0)
    correct_predictions += (predicted == target).sum().item()
    
    loss = criterion(outputs, target)
    running_loss += loss.item()

    loss.backward()
    optimizer.step()
  scheduler.step()
  end_time = time.time()
  
  running_loss /= len(train_loader)
  acc = (correct_predictions/total_predictions)*100.0
  print('Training Loss: ', running_loss, 'Time: ',end_time - start_time, 's')
  print('Training Accuracy: ', acc, '%')
  return running_loss,acc


In [16]:
def val_model(model, val_loader, criterion):
  with torch.no_grad():
    model.eval()
    model.to(device)

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0

    for batch_idx, (data, target) in enumerate(val_loader):   
      data = data.to(device)
      target = target.to(device)

      outputs = model(data)

      _, predicted = torch.max(outputs.data, 1)
      total_predictions += target.size(0)
      correct_predictions += (predicted == target).sum().item()

      loss = criterion(outputs, target).detach()
      running_loss += loss.item()


    running_loss /= len(val_loader)
    acc = (correct_predictions/total_predictions)*100.0
    print('Validation Loss: ', running_loss)
    print('Validation Accuracy: ', acc, '%')
    return running_loss, acc


In [17]:
def test_model(model, test_loader):
  with torch.no_grad():
    model.eval()
    pred = []

    for batch_idx, (data) in enumerate(test_loader):   
      data = data.to(device)
      outputs = model(data)

      _, predicted = torch.max(outputs.data, 1)
      pred.append(predicted.cpu().numpy()[0])

    return np.array(pred)


In [18]:
n_epochs = 5
Train_acc = []
Train_loss = []
Val_loss = []
Val_acc = []

load_path_model = 'C:/Users/malji/Google Drive/Colab Notebooks/trained_model/trained_model_xyz_new_more_layers_k.pt'
model.load_state_dict(torch.load(load_path_model))

for i in range(n_epochs):
  print('Epoch: ',i+1)
  print('LR: ', scheduler.get_lr())
  train_loss,acc = train_epoch(model, train_loader, criterion, optimizer)
  test_loss, test_acc = val_model(model, val_loader, criterion)
  Train_loss.append(train_loss)
  Train_acc.append(acc)
  Val_loss.append(test_loss)
  Val_acc.append(test_acc)
  print('='*20)
  #scheduler.step(test_acc)
  torch.save(model.state_dict(), 'C:/Users/malji/Google Drive/Colab Notebooks/trained_model/trained_model_xyz_new_more_layers_k_2.pt')


Epoch:  1
LR:  [0.001]


  x = F.log_softmax(self.fc8(x))


Training Loss:  0.2284896719197631 Time:  2262.349710702896 s
Training Accuracy:  92.74726607644308 %
Validation Loss:  2.28089728064878
Validation Accuracy:  56.6676509818544 %
Epoch:  2
LR:  [0.001]
Training Loss:  0.2138244508950272 Time:  2243.129941225052 s
Training Accuracy:  93.19401998144507 %
Validation Loss:  2.274886760858295
Validation Accuracy:  56.91658679532834 %
Epoch:  3
LR:  [0.001]
Training Loss:  0.2022376772597574 Time:  2236.6696088314056 s
Training Accuracy:  93.54987411028533 %
Validation Loss:  2.3116280755468432
Validation Accuracy:  56.89863138480634 %
Epoch:  4
LR:  [0.001]
Training Loss:  0.1924726500647175 Time:  2240.690627336502 s
Training Accuracy:  93.85983819941417 %
Validation Loss:  2.294323387057601
Validation Accuracy:  56.96056816674633 %
Epoch:  5
LR:  [0.001]
Training Loss:  0.18445722248180244 Time:  2241.3544569015503 s
Training Accuracy:  94.10555284716526 %
Validation Loss:  2.365221174096141
Validation Accuracy:  56.931914584798335 %


In [19]:
!nvidia-smi

Sat Nov 07 11:50:49 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 451.67       Driver Version: 451.67       CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 2060   WDDM  | 00000000:01:00.0  On |                  N/A |
| 60%   50C    P2    34W / 170W |   3688MiB /  6144MiB |     13%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                                  |
|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |
|       