In [1]:
import time
import torch
import timeit
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils import data
import matplotlib.pyplot as plt
import torch.nn.functional as F
from collections import namedtuple
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets, models
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau

%matplotlib inline
np.random.seed(2018)

Mounted at /content/drive


In [2]:
train = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain.npy',allow_pickle=True)
train_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain_labels.npy',allow_pickle=True)
val = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval.npy',allow_pickle=True)
val_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval_labels.npy',allow_pickle=True)
ntest = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntest.npy',allow_pickle=True)
phones = np.loadtxt("C:/Users/malji/Google Drive/Colab Notebooks/Hw3/phones.txt", dtype=str)


In [3]:
class PhonesModel(nn.Module):
    # try changing 32 to 128
    def __init__(self):
        super(PhonesModel, self).__init__()
        self.fc1 = nn.Linear(13, 256 )
        self.bnorm1 = nn.BatchNorm1d(256 )
        self.dp1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(256 , 512 )
        self.bnorm2 = nn.BatchNorm1d(512 )
        self.dp2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(512, 256 )
        self.bnorm3 = nn.BatchNorm1d(256 )
        self.dp3 = nn.Dropout(p=0.2)
        self.fc4 = nn.Linear(256, 128 )
        self.bnorm4 = nn.BatchNorm1d(128 )
        self.dp4 = nn.Dropout(p=0.1)
        self.fc5 = nn.Linear(128 , 346)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dp1(self.bnorm1(x))
        x = F.relu(self.fc2(x))
        x = self.dp2(self.bnorm2(x))
        x = F.relu(self.fc3(x))
        x = self.dp3(self.bnorm3(x))
        x = F.relu(self.fc4(x))
        x = self.dp4(self.bnorm4(x))
        x = F.log_softmax(self.fc5(x))
        return x

In [4]:
# Dataset
class PhonesDataset(data.Dataset):
  def __init__(self, X,Y,k): 
    self.X = X
    self.Y = Y
    self.k = k
    self.samples = []
    self.labels = []
    self.length = []
    self._init_dataset()
    self.ind = np.arange(self.length[-1])
    km = [self.k*(2*i+1) for i in range(len(self.length))]
        
    b = 0
    for i in range(self.length[-1]):
        if i == self.length[b]:
            b = b+1
            self.ind[i] = self.ind[i] + km[b]
        else:
            self.ind[i] = self.ind[i] + km[b]

  def __len__(self):
    print(len(self.samples),len(self.labels))
    return len(self.labels)

  def __getitem__(self,index):
    X = np.concatenate((self.samples[self.ind[index]-self.k:self.ind[index]+ self.k+1]),axis=0)
    labels = self.labels[index]
    return torch.from_numpy(X).float(),torch.tensor(labels).long()
    
  def _init_dataset(self):
    s = 0
    for i in range(len(self.X)):
        p = np.pad(self.X[i], ((self.k, self.k), (0, 0)), 'constant', constant_values=0)
        s = s + len(self.X[i])
        self.length.append(s)
        self.samples = self.samples + list(p)
        self.labels = self.labels + list(self.Y[i]) 

    return np.array(self.samples), np.array(self.labels)

In [5]:
class TestDataset(data.Dataset):
    def __init__(self, X,k): 
      self.X = X
      self.k = k
      self.samples = []
      self.length = []
      self._init_dataset()
      self.ind = np.arange(self.length[-1])
      km = [self.k*(2*i+1) for i in range(len(self.length))]
      
      b = 0
      for i in range(self.length[-1]):
          if i == self.length[b]:
              b = b+1
              self.ind[i] = self.ind[i] + km[b]
          else:
              self.ind[i] = self.ind[i] + km[b]
        

    def __len__(self):
      print(len(self.samples),self.length[-1])
      return self.length[-1]

    def __getitem__(self,index):
      X = np.concatenate((self.samples[self.ind[index]-self.k:self.ind[index]+ self.k+1]),axis=0)
      return torch.from_numpy(X).float()
    
    def _init_dataset(self):
      s = 0
      for i in range(len(self.X)):
          p = np.pad(self.X[i], ((self.k, self.k), (0, 0)), 'constant', constant_values=0)
          s = s + len(self.X[i])
          self.length.append(s)
          self.samples = self.samples + list(p)
         
      return np.array(self.samples)

In [6]:
def save_data(loader):
    print('saving data...')
    
    for i, (x, y) in enumerate(loader):
        
        x=x.view(-1, 13).numpy()
        y = y.numpy()
        if i==0:
            data = np.array(x)
            label = np.array(y)
        else:     
            data = np.concatenate((data, x))
            label = np.concatenate((label, y))
    return data, label


In [7]:
cuda = torch.cuda.is_available()
num_workers = 8 if cuda else 0 
    
# Training
start_time = time.time()
train_dataset = PhonesDataset(train, train_labels,13)
train_loader_args = dict(shuffle=True, batch_size=256, num_workers=num_workers, pin_memory=True)
train_loader = data.DataLoader(train_dataset, **train_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

24139641 23628221
24139641 23628221
taken time: 5047.538684606552 seconds ---


In [None]:
start_time = time.time()
print("saving data...")
data, labels = save_data(train_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_data_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_labels_new.npy', labels)
print("train data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")


saving data...
saving data...


In [None]:
# Validation
start_time = time.time()
num_workers = 8 
val_dataset = MyDataset(dev, dev_labels,13)
val_loader_args = dict(shuffle=False, batch_size=256, num_workers=num_workers, pin_memory=True)
val_loader = data.DataLoader(val_dataset, **val_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

In [None]:
print("saving data...")
start_time = time.time()
data, labels = save_data(val_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/val_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/val_labels_new.npy', labels)
print("validation data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")


In [None]:
# Testing
start_time = time.time()
test_dataset = TestDataset(test,13)
test_loader_args = dict(shuffle=False, batch_size=1, num_workers=num_workers, pin_memory=True)
test_loader = data.DataLoader(test_dataset, **test_loader_args)
print("taken time: %s seconds ---" % (time.time() - start_time))

In [None]:
print("saving data...")
start_time = time.time()
data, labels = save_data(test_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/test_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/test_labels_new.npy', labels)
print("test  data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")

In [None]:
#skip these codes

In [None]:
# loading data
train = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain.npy',allow_pickle=True)
train_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntrain_labels.npy',allow_pickle=True)
val = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval.npy',allow_pickle=True)
val_labels = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/nval_labels.npy',allow_pickle=True)
ntest = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntest.npy',allow_pickle=True)
phones = np.loadtxt("C:/Users/malji/Google Drive/Colab Notebooks/Hw3/phones.txt", dtype=str)
train_size = train.shape[0]



val_size = val.shape[0]

test_size = test.shape[0]

batch_size = 32

start_time = time.time()
train_data = PhonesDataset(np.load('/content/drive/My Drive/Colab Notebooks/Hw3/ntrain.npy',allow_pickle=True),
                           np.load('/content/drive/My Drive/Colab Notebooks/Hw3/ntrain_labels.npy',allow_pickle=True))

train_loader = torch.utils.data.DataLoader(train_data, 
                                           batch_size=batch_size,
                                           shuffle=True
                                           )
print("Train data loaded in %s seconds ---" % (time.time() - start_time))

start_time = time.time()
val_data = PhonesDataset(np.load('/content/drive/My Drive/Colab Notebooks/Hw3/nval.npy',allow_pickle=True),
                         np.load('/content/drive/My Drive/Colab Notebooks/Hw3/nval_labels.npy',allow_pickle=True))
val_loader = torch.utils.data.DataLoader(val_data, 
                                         batch_size=batch_size,
                                         shuffle=False
                                           )
print("Validation data loaded in %s seconds ---" % (time.time() - start_time))

start_time = time.time()
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)
print("Test data loaded in %s seconds ---" % (time.time() - start_time))

def save_data(loader):
    print('saving data...')
    
    for i, (x, y) in enumerate(loader):
        
        x=x.view(-1, 13).numpy()
        y = y.numpy()
        if i==0:
            data = np.array(x)
            label = np.array(y)
        else:     
            data = np.concatenate((data, x))
            label = np.concatenate((label, y))
    return data, label

start_time = time.time()
print("saving data...")
data, labels = save_data(train_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_data_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/train_labels_new.npy', labels)
print("train data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")
print("saving data...")
start_time = time.time()
data, labels = save_data(val_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/val_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/val_labels_new.npy', labels)
print("validation data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")
print("saving data...")
start_time = time.time()
data, labels = save_data(test_loader)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/test_new.npy', data)
np.save('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/test_labels_new.npy', labels)
print("test  data savied in:")
print("--- %s seconds ---" % (time.time() - start_time))
print("\n===================================\n")






Metric = namedtuple('Metric', ['loss', 'train_error', 'val_error'])

In [None]:
def inference(model, loader,device, n_members):
    correct = 0
    for data, label in loader:
        X = Variable(data.view(-1, 13))
        Y = Variable(label.view(-1))
        X = X.to(device)
        Y = Y.to(device)
        model= model.to(device)
        out = model(X)
        pred = out.data.max(1, keepdim=True)[1]
        predicted = pred.eq(Y.data.view_as(pred))
        correct += predicted.sum()
    correct = correct.cpu()
    return correct.numpy() / n_members

class Trainer():
    """ 
    A simple training cradle
    """
    
    def __init__(self, model, optimizer,device, load_path=None):
        self.model = model
        if load_path is not None:
            self.model = torch.load(load_path)
        self.optimizer = optimizer
        self.device = device
            
    def save_model(self, path):
        torch.save(self.model.state_dict(), path)

    def run(self, n_epochs):
        print("Start Training...")
        self.metrics = []
        for e in range(n_epochs):
            start_time = time.time()
            epoch_loss = 0
            correct = 0
            for batch_idx, (data, label) in enumerate(train_loader):
                self.optimizer.zero_grad()
                X = Variable(data.view(-1, 13))
                Y = Variable(label.view(-1))
                X = X.to(device)
                Y = Y.to(device)
                self.model= self.model.to(device)
                out = self.model(X)
                pred = out.data.max(1, keepdim=True)[1]
                predicted = pred.eq(Y.data.view_as(pred))
                correct += predicted.sum()
                #loss = F.nll_loss(out, Y)
                criterion = nn.CrossEntropyLoss()
                loss = criterion(out, Y)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.data.item()

            
            total_loss = epoch_loss/train_size
            correct = correct.cpu()
            
            train_error = 1.0 - correct.numpy()/train_size
            train_acc = correct.numpy()/train_size
            val_acc = inference(self.model, val_loader,device, val_size)
            val_error = 1.0 - val_acc
            print("epoch: {0}, loss: {1:.8f}".format(e+1, total_loss))
            print('Train accuracy = ', train_acc)
            print('Validation accuracy = ', val_acc)
            print("--- %s seconds ---" % (time.time() - start_time))
            print("\n===================================\n")
            if e%10 ==0:              
              self.save_model("C:/Users/malji/Google Drive/Colab Notebooks/trained_model/trained_modelXXX.pt")
              print("model saved...")
            self.metrics.append(Metric(loss=total_loss, 
                                  train_error=train_error,
                                  val_error=val_error))
         

In [None]:
##a = b +dafd

In [None]:
# Training

def init_xavier(m):
    if type(m) == nn.Linear:
        fan_in = m.weight.size()[1]
        fan_out = m.weight.size()[0]
        std = np.sqrt(2.0 / (fan_in + fan_out))
        m.weight.data.normal_(0,std)
#to run on gpu        
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
n_epochs = 10

model = PhonesModel()
#load_path_model = "/content/drive/My Drive/Colab Notebooks/trained_model/trained_model2.pt"
# model.load_state_dict(torch.load(load_path_model))
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9, weight_decay = 0.001)
btrainer = Trainer(model.double(), optimizer,device)
btrainer.run(n_epochs)
#btrainer.save_model('./dropout-batchnorm_optimized_model.pt')

Start Training...




epoch: 1, loss: 0.00009103
Train accuracy =  0.18647527315168613
Validation accuracy =  0.4162325346367095
--- 89.91339826583862 seconds ---


model saved...
epoch: 2, loss: 0.00007793
Train accuracy =  0.24949212854634886
Validation accuracy =  0.45831958761859104
--- 81.15086507797241 seconds ---


epoch: 3, loss: 0.00007114
Train accuracy =  0.2629163593245553
Validation accuracy =  0.48042992276818597
--- 81.11348295211792 seconds ---


epoch: 4, loss: 0.00006665
Train accuracy =  0.2733956298684246
Validation accuracy =  0.4979670461646715
--- 81.0838348865509 seconds ---


epoch: 5, loss: 0.00006372
Train accuracy =  0.27989942712513594
Validation accuracy =  0.5083104681051246
--- 81.17587113380432 seconds ---


epoch: 6, loss: 0.00006164
Train accuracy =  0.2843093199322818
Validation accuracy =  0.5165431960954529
--- 81.17606520652771 seconds ---


epoch: 7, loss: 0.00006012
Train accuracy =  0.2876601342251744
Validation accuracy =  0.5222224603151664
--- 81.15521144866943 s

In [None]:
#t = get_newData(test)
#test_loader = torch.utils.data.DataLoader(t, batch_size=batch_size, shuffle=False)


In [None]:
#test_acc = inference(model, test_loader,device, test_size)
#print("Test accuracy of model optimizer with: {0:.2f}".format(test_acc * 100))


In [None]:
### VISUALIZATION ###
def training_plot(metrics):
    plt.figure(1)
    plt.plot([m.loss for m in metrics], 'b')
    plt.title('Training Loss')
    plt.show()

training_plot(btrainer.metrics)

In [None]:
#btrainer.metrics

In [None]:
# test.shape

In [None]:
# out = model(t)

In [None]:
phones = np.loadtxt("/content/drive/My Drive/Colab Notebooks/Hw3/phones.txt", dtype=str)
phones_labels = np.zeros(346,)
i = 1

while i <346:
  phones_labels[i] = int(phones[i][1])
  i = i +1

phones_labels.shape
phones_labels.size

In [None]:
# Datasets
class PhonesDatasetTest(Dataset):

  """ Phones dataset."""
  # Initialize your data, etc.
  def __init__(self, x,y):

    self.train = get_newData(x)
    self.train_labels = get_newData(y)
    self.len = len(self.train)
    print(self.train.shape)

    
  def __getitem__(self, index):
    return self.train[index], self.train_labels[index]

  def __len__(self):
    return len(self.train)

  def len():
    return len(self.train)

In [None]:
#test = np.load('C:/Users/malji/Google Drive/Colab Notebooks/Hw3/ntest.npy',allow_pickle=True)
#test = PhonesDatasetTest(test, phones_labels)
#test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False)

In [None]:
def inferenceTest(model, loader,device, n_members):
    correct = 0
    for data, label in loader:
        X = Variable(data.view(-1, 13))
        Y = phones_labels
        Y = torch.from_numpy(Y)
        print(X.numpy().shape)
        print(Y.numpy().shape)
        X = X.to(device)
        Y = Y.to(device)
        model= model.to(device)
        out = model(X)
        pred = out.data.max(1, keepdim=True)[1]
        print(pred)
        predicted = pred.eq(Y.data.view_as(pred))
        correct += predicted.sum()
    correct = correct.cpu()
    return correct.numpy() / n_members


In [None]:
#inferenceTest(model, test_loader, device,test_size)

In [None]:
model = PhonesModel()
load_path_model = "/content/drive/My Drive/Colab Notebooks/trained_model/trained_model3.pt"
model.load_state_dict(torch.load(load_path_model))


In [None]:
test[0][0]

In [None]:
modelx = model.double()
out = modelx(torch.from_numpy(test[0]))

In [None]:
out[1000]

In [None]:
val_labels[1000]