In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision.io import read_image
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset, random_split
import numpy as np
import os
import time
import pickle
BATCH_SIZE = 256

In [2]:
def get_device():
    return 'cuda' if torch.cuda.is_available() else 'cpu'

print(torch.cuda.is_available())

True


In [3]:
class convBlock(nn.Module):
  def __init__(self, input_node, output_node, filter_size):
    super(convBlock, self).__init__()
    self.conv = nn.Conv2d(input_node, output_node, (filter_size, filter_size), padding=(5, 5)) # padding problem
    self.maxpool = nn.MaxPool2d((2, 2))
  
  def forward(self, input):
    x = self.conv(input)
    x = self.maxpool(x)
    return x

# class flatBlock(nn.module):
#   def __init__(self):
#     self.flat = nn.Flatten()
#     self.dense = nn.Linear(128, 64) # not sure about input size 128

class mymodel(nn.Module):
  def __init__(self):
    super(mymodel, self).__init__()
    # self.trans = nn.Sequential(transforms.Resize(48), transforms.Grayscale()) # transform image
    self.conv1 = convBlock(1, 16, 22)
    self.conv2 = convBlock(16, 32, 16)
    self.conv3 = convBlock(32, 64, 8)
    self.conv4 = convBlock(64, 64, 4)
    # self.maxpool = nn.MaxPool2d((4, 4)) # use stride?
    self.batchnorm = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.drop = nn.Dropout(0.4)
    self.flat = nn.Flatten()
    # self.line1 = nn.Linear(2304, 1000)
    # self.drop2 = nn.Dropout(0.4)
    # self.line2 = nn.Linear(1000, 500)
    # self.line3 = nn.Linear(500, 100)
    self.classifier = nn.Linear(2304, 35) # not sure about input size 128
    # self.linelayers = nn.Sequential(self.flat, self.line1, self.drop2, self.line2, self.line3, self.classifier)
    self.layers = nn.Sequential(self.conv1, self.conv2, self.conv3, self.conv4, self.batchnorm, self.relu, self.drop, self.flat, self.classifier)

  def forward(self, input):
    output = self.layers(input)
    # print("forward done")
    return output

class kaggle_1(nn.Module):
  def __init__(self):
    super(kaggle_1, self).__init__()
    self.flat = nn.Flatten()
    self.l1 = nn.Linear(1280, 128)
    self.relu = nn.ReLU()
    self.bn = nn.BatchNorm1d(128)
    self.l2 = nn.Linear(128, 128)
    self.l3 = nn.Linear(128, 64)
    self.l4 = nn.Linear(64, 35)
    self.softmax = nn.Softmax()
    self.layers = nn.Sequential(self.flat, self.l1, self.relu, self.bn, self.l2, self.relu, self.bn, self.l3, self.relu, self.l4, self.softmax)

  def forward(self, input):
    output = self.layers(input)
    # print("forward done")
    return output

In [4]:
def evaluate_model(model, device, val_dl, criterion):
  model.eval()
  loss = 0
  acc = 0
  with torch.no_grad():
    for x_val, y_val in val_dl:
      x_val, y_val = x_val.to(device), y_val.to(device)
      output = model(x_val)
      pred = torch.argmax(output, axis=1)
      loss += criterion(output, y_val)
      acc += (pred==y_val).sum().item()

  return loss/len(val_dl), acc/len(val_dl.dataset)

def train_model(model, train_dl, val_dl, device, path):
  model_name = 'mymodel'
  lr = 0.0001
  weight_decay = 0.0001
  optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
  criterion = nn.CrossEntropyLoss()
  epochs = 200
  record = {'train_loss':[], 'train_accuracy':[], 'val_loss':[], 'val_accuracy':[]}

  max_acc = 0
  print("Training start...")
  torch.cuda.synchronize()
  for epoch in range(epochs):
    model.train()
    train_loss = 0
    train_accuracy = 0
    val_loss = 0
    val_accuracy = 0

    for x_train, y_train in train_dl:
      x_train, y_train = x_train.to(device), y_train.to(device)
      optimizer.zero_grad()
      output = model(x_train)
      pred = torch.argmax(output, axis=1)
      loss = criterion(output, y_train)
      train_loss += loss.detach().cpu().item()
      train_accuracy += (pred==y_train).sum().item()
      loss.backward()
      optimizer.step()


    train_loss = train_loss/len(train_dl)
    train_accuracy = train_accuracy/len(train_dl.dataset)
    record['train_loss'].append(train_loss) # len(train_dl) = total number of data / batch size
    record['train_accuracy'].append(train_accuracy)

    
    # evaluate model
    val_loss, val_accuracy = evaluate_model(model, device, val_dl, criterion)
    if val_accuracy > max_acc:
      max_acc = val_accuracy
      torch.save(model.state_dict(), os.path.join(path, f'{model_name}_combinedThree_reduced_bs{BATCH_SIZE}_e{epochs}_lr{lr}_wd{weight_decay}'))

    record['val_loss'].append(val_loss)
    record['val_accuracy'].append(val_accuracy)
    # torch.cuda.empty_cache()

    print(f'epoch: {epoch}, train_loss: {train_loss}, train_acc: {train_accuracy}, val_loss: {val_loss}, val_acc: {val_accuracy}')


In [5]:
def loadData(filepath):
    trans = transforms.Compose([transforms.Resize((64, 64)), transforms.Grayscale(), transforms.ToTensor()])
    dataset = datasets.ImageFolder(filepath, transform=trans) # resize?

    # randomly split train and validate
    train_set, val_set = random_split(dataset, [int(0.9*len(dataset)), int(0.1*len(dataset))])
    # torch.save(train_set, 'tensorData/train_set_64_64.pt')
    # torch.save(val_set, 'tensorData/val_set_64_64.pt')

    #train_set = torch.load('tensorData/train_set_64_64.pt')
    #val_set = torch.load('tensorData/val_set_64_64.pt')
    return train_set, val_set

In [6]:
def main():
    train_set, val_set = loadData('data')
    train_dl = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
    val_dl = DataLoader(dataset=val_set, batch_size=len(val_set), num_workers=2)

    device = get_device()
    model = mymodel().to(device)
    train_model(model, train_dl, val_dl, device, 'savedModel')

Training start...
epoch: 0, train_loss: 1.4604169795679491, train_acc: 0.70441400304414, val_loss: 0.5593900084495544, val_acc: 0.9246575342465754
epoch: 1, train_loss: 0.3838237563306971, train_acc: 0.9397564687975647, val_loss: 0.2749934494495392, val_acc: 0.9671232876712329
epoch: 2, train_loss: 0.20166345599085786, train_acc: 0.9698325722983258, val_loss: 0.15188176929950714, val_acc: 0.9802739726027397
epoch: 3, train_loss: 0.12303495476412218, train_acc: 0.9837747336377474, val_loss: 0.09558787941932678, val_acc: 0.9887671232876712
epoch: 4, train_loss: 0.08017865074581879, train_acc: 0.9913850837138508, val_loss: 0.060854729264974594, val_acc: 0.9953424657534247
epoch: 5, train_loss: 0.05784897272323453, train_acc: 0.9952511415525114, val_loss: 0.0415707528591156, val_acc: 0.9980821917808219
epoch: 6, train_loss: 0.041233400258329486, train_acc: 0.9969254185692542, val_loss: 0.03645249456167221, val_acc: 0.9980821917808219
epoch: 7, train_loss: 0.032164520432436186, train_acc: 0

In [None]:
# target_file = open("targets.pkl", "rb")
# tmp = pickle.load(target_file)
# target_file.close()