In [1]:
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

env: CUBLAS_WORKSPACE_CONFIG=:4096:8


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import time

In [4]:
from sklearn.model_selection import train_test_split
from project_utilities import Loss
from project_utilities import ValueSet
from project_utilities import efficiency
train_set_idx, val_set_idx = train_test_split(list(range(1,80)), test_size=20)

In [5]:
random_seed=132
# num_classes=4000
learning_rate=0.001
num_epochs=53
b_size=128

In [6]:
class MyDataset(torch.utils.data.Dataset): 
  def __init__(self, setID): 
        'Initialization' 
        npz_files_content = np.load("./drive/MyDrive/data/Set_"+str(setID)+".npz")  
         
        self.X_set = torch.tensor(npz_files_content['X'])    
        self.y_set = torch.tensor(npz_files_content['y']) 
  def __len__(self): 
        'Denotes the total number of samples' 
        return len(self.y_set) 
  def __getitem__(self, index): 
        'Generates one sample of data' 
        # Select sample 
        X = self.X_set[index] 
        y = self.y_set[index] 
        return X, y

In [7]:
# Train loader 
for setID in train_set_idx: 
  train_set = MyDataset(setID) 
  train_loader = torch.utils.data.DataLoader(train_set,  
                                             batch_size = 128,  
                                             shuffle = True, drop_last=True) 
  print(setID) 
  for X_train, y_train in train_loader: 
    print(X_train.shape) 
    print(y_train.shape) 
# Validation loader 
for setID in val_set_idx:
  val_set = MyDataset(setID) 
  val_loader = torch.utils.data.DataLoader(val_set,  
                                           batch_size = 128,  
                                           shuffle = True,  drop_last=True) 
  print(setID) 
  for X_val, y_val in val_loader: 
    print(X_val.shape) 
    print(y_val.shape) 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
61
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([128, 4000])
torch.Size([128, 4, 4000])
torch.Size([12

In [8]:
##########################
### MODEL
##########################


class ConvNet(torch.nn.Module):

    def __init__(self):
        super(ConvNet, self).__init__()
        
        #########################
        ### 1st residual block
        #########################
        
        self.block_1 = torch.nn.Sequential(
                torch.nn.Conv2d(in_channels=4,
                                out_channels=6,
                                kernel_size=1,
                                stride=1,
                                padding=0),
                # torch.nn.LeakyReLU(0.01),
                torch.nn.BatchNorm2d(6),
                torch.nn.ReLU(inplace=True),
                torch.nn.Dropout2d(0.3),
                torch.nn.Conv2d(in_channels=6,
                                out_channels=4,
                                kernel_size=3,
                                stride=1,
                                padding=1),
                torch.nn.BatchNorm2d(4)
        )
        
        self.block_2 = torch.nn.Sequential(
                torch.nn.Conv2d(in_channels=4,
                                out_channels=6,
                                kernel_size=1,
                                stride=1,
                                padding=0),
                # torch.nn.LeakyReLU(0.01),
                torch.nn.BatchNorm2d(6),
                torch.nn.ReLU(inplace=True),
                torch.nn.Dropout2d(0.3),
                torch.nn.Conv2d(in_channels=6,
                                out_channels=1,
                                kernel_size=3,
                                stride=1,
                                padding=1),
                torch.nn.BatchNorm2d(1)
        )

        #########################
        ### Fully connected
        #########################        
        self.linear_1 = torch.nn.Linear(1*4000, 1*4000)

        
    def forward(self, x):
        # print(x.shape)
        x=x.view(b_size,4,1,4000)
        # x = x.permute(0, 3, 1, 2)
        # print(x.shape)
        #########################
        ### 1st residual block
        #########################
        shortcut = x
        x = self.block_1(x)
        # print(" 1st residual block done ",x.shape)
        x = torch.nn.functional.relu(x + shortcut)
        # print(" 1st relu residual block done ",x.shape)
        #########################
        ### 2nd residual block
        #########################
        # x=x.view(128,1,1,4000)
        shortcut = x
        x = self.block_2(x)
        # x = torch.nn.functional.relu(x + shortcut)
        logits = torch.nn.functional.relu(x)
        # print(" 2nd residual block done",x.shape)
        # #########################
        # ### Fully connected
        # #########################
        # print(" 2nd ",x.shape)
        logits=logits.view(b_size,4000)
        # logits = self.linear_1(x.view(128, 4000))
        return logits

    
torch.manual_seed(random_seed)
model = ConvNet()
# model = model.to(device)
    
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  

In [9]:
loss_model = Loss(0.00001)

In [10]:
# def compute_accuracy(model, data_loader):
#     correct_pred, num_examples = 0, 0
#     for i, (features, targets) in enumerate(data_loader):            
#         # features = features.to(device)
#         # targets = targets.to(device)
#         logits = model(features)
#         _, predicted_labels = torch.max(logits, 1)
#         num_examples += targets.size(0)
#         correct_pred += (predicted_labels == targets).sum()
#     return correct_pred.float()/num_examples * 100
print("after compute_accuracy func")
def validate(model): 
    loss_val = [] 
    eff = ValueSet(0, 0, 0, 0) 
    # switch to evaluate mode 
    model.eval() 
    with torch.no_grad():
        for setID in val_set_idx: 
          val_set = MyDataset(setID+1) 
          val_generator = torch.utils.data.DataLoader(val_set,  
                                                      batch_size=5000,  
                                                      shuffle=True,  drop_last=True) 
          print(setID) 
          for X_val, y_val in val_generator: 
            # Forward pass 
            val_outputs = model(X_val) 
            loss_output = loss_model.forward(val_outputs, y_val) 
            loss_val.append(loss_output) 
            for label, output in zip(y_val.cpu().numpy(), val_outputs.cpu().numpy()):
                eff += efficiency(label, output, difference = 5.0,  
                                  threshold = 1e-2, integral_threshold = 0.2,  
                                  min_width = 3) 
    return sum(loss_val)/len(loss_val), eff.eff_rate, eff.fp_rate
start_time = time.time()
# for epoch in range(num_epochs):
#     model = model.train()
#     for batch_idx, (features, targets) in enumerate(train_loader):
        
#         # features = features.to(device)
#         # targets = targets.to(device)
        
#         ### FORWARD AND BACK PROP
#         yhat = model(features)
#         loss_model = Loss(0.00001)
#         loss = loss_model.forward(yhat, targets)
#         optimizer.zero_grad()
        
#         loss.backward()
        
#         ### UPDATE MODEL PARAMETERS
#         optimizer.step()
        
#         ### LOGGING
#         if not batch_idx % 250:
#             print ('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f' 
#                    %(epoch+1, num_epochs, batch_idx, 
#                      len(train_loader), loss))

#     model = model.eval() # eval mode to prevent upd. batchnorm params during inference
#     with torch.set_grad_enabled(False): # save memory during inference
#         print('Epoch: %03d/%03d training accuracy:%%' % (
#               epoch+1, num_epochs))
#         print(validate(model))
#     print('Time elapsed: %.2f min' % ((time.time() - start_time)/60))
    
# print('Total Training Time: %.2f min' % ((time.time() - start_time)/60))

after compute_accuracy func


In [11]:
def train(model, num_epochs, 
          learning_rate=0.01, seed=123, batch_size=128): 
    cost = [] 
     
    torch.manual_seed(seed) 
     
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) 
     
    for e in range(1,num_epochs): 
      batch_num = 0 
      for setID in train_set_idx: 
          train_set = MyDataset(setID+1) 
          train_generator = torch.utils.data.DataLoader(train_set,  
                                                        batch_size=batch_size,  
                                                        shuffle=True,  drop_last=True) 
          print(setID) 
          for X_train, y_train in train_generator: 
            batch_num = batch_num + 1 
            #### Compute outputs #### 
            yhat = model(X_train) 
            loss = loss_model.forward(yhat, y_train) 
            #### Reset gradients from previous iteration #### 
            optimizer.zero_grad() 
             
            #### Compute gradients #### 
            loss.backward() 
             
            #### Update weights #### 
            optimizer.step() 
            #### Logging #### 
            with torch.no_grad(): 
                yhat = model.forward(X_train) 
                curr_loss = loss_model.forward(yhat, y_train) 
                print('Epoch ID: %d ' % e, end="") 
                print('  Set ID: %d' % setID, end="") 
                print('  Batch ID: %d' % batch_num, end="") 
                print(' | Loss: %.5f' % curr_loss) 
                cost.append(curr_loss) 
    return cost

In [None]:
train(model, num_epochs, seed=random_seed, batch_size=128)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch ID: 5   Set ID: 72  Batch ID: 1059 | Loss: 0.07744
Epoch ID: 5   Set ID: 72  Batch ID: 1060 | Loss: 0.08091
Epoch ID: 5   Set ID: 72  Batch ID: 1061 | Loss: 0.07231
Epoch ID: 5   Set ID: 72  Batch ID: 1062 | Loss: 0.07743
Epoch ID: 5   Set ID: 72  Batch ID: 1063 | Loss: 0.07275
Epoch ID: 5   Set ID: 72  Batch ID: 1064 | Loss: 0.07207
Epoch ID: 5   Set ID: 72  Batch ID: 1065 | Loss: 0.07277
Epoch ID: 5   Set ID: 72  Batch ID: 1066 | Loss: 0.07458
Epoch ID: 5   Set ID: 72  Batch ID: 1067 | Loss: 0.07196
Epoch ID: 5   Set ID: 72  Batch ID: 1068 | Loss: 0.07828
Epoch ID: 5   Set ID: 72  Batch ID: 1069 | Loss: 0.07456
Epoch ID: 5   Set ID: 72  Batch ID: 1070 | Loss: 0.07611
Epoch ID: 5   Set ID: 72  Batch ID: 1071 | Loss: 0.07532
Epoch ID: 5   Set ID: 72  Batch ID: 1072 | Loss: 0.07743
Epoch ID: 5   Set ID: 72  Batch ID: 1073 | Loss: 0.07515
Epoch ID: 5   Set ID: 72  Batch ID: 1074 | Loss: 0.07709
Epoch ID: 5   Set ID: 7

In [None]:
def validate(model): 
    b_size=5000
    loss_val = [] 
    eff = ValueSet(0, 0, 0, 0) 
    # switch to evaluate mode 
    model.eval() 
    with torch.no_grad():
        for setID in val_set_idx: 
          val_set = MyDataset(setID+1) 
          val_generator = torch.utils.data.DataLoader(val_set,  
                                                      batch_size=5000,  
                                                      shuffle=True,  drop_last=True) 
          print(setID) 
          for X_val, y_val in val_generator: 
            # Forward pass 
            val_outputs = model(X_val) 
            loss_output = loss_model.forward(val_outputs, y_val) 
            loss_val.append(loss_output) 
            for label, output in zip(y_val.cpu().numpy(), val_outputs.cpu().numpy()):
                eff += efficiency(label, output, difference = 5.0,  
                                  threshold = 1e-2, integral_threshold = 0.2,  
                                  min_width = 3) 
    return sum(loss_val)/len(loss_val), eff.eff_rate, eff.fp_rate

In [None]:
b_size=5000
loss_val, eff_rate, fp_rate = validate(model) 

print('Loss: %0.3f ' % loss_val, end="")
print('  Efficiency: %0.3f' % eff_rate, end="") 
print('  False positive rate: %0.3f' % fp_rate) 
