In [1]:
import os
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms

#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping
    

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets

import torchvision.transforms as transforms

import numpy as np
import pandas as pd

train_on_gpu = torch.cuda.is_available()

In [4]:
import importlib
if importlib.util.find_spec('mlflow') is None:
  !pip install mlflow
import mlflow
import mlflow.pytorch

Collecting mlflow
[?25l  Downloading https://files.pythonhosted.org/packages/2d/bb/c79f745214c39dd70b8596b8341c4a6f93ec96f6ed7c7a769c6a826d215f/mlflow-1.9.1-py3-none-any.whl (11.9MB)
[K     |████████████████████████████████| 12.0MB 255kB/s 
Collecting azure-storage-blob>=12.0
[?25l  Downloading https://files.pythonhosted.org/packages/6d/3d/31614573e8a197db12d8ab47a7fd813f15bd4a4b5c64e85d23b865de5b9b/azure_storage_blob-12.3.2-py2.py3-none-any.whl (280kB)
[K     |████████████████████████████████| 286kB 35.6MB/s 
Collecting sqlalchemy<=1.3.13
[?25l  Downloading https://files.pythonhosted.org/packages/af/47/35edeb0f86c0b44934c05d961c893e223ef27e79e1f53b5e6f14820ff553/SQLAlchemy-1.3.13.tar.gz (6.0MB)
[K     |████████████████████████████████| 6.0MB 41.6MB/s 
[?25hCollecting gunicorn; platform_system != "Windows"
[?25l  Downloading https://files.pythonhosted.org/packages/69/ca/926f7cd3a2014b16870086b2d0fdc84a9e49473c68a8dff8b57f7c156f43/gunicorn-20.0.4-py2.py3-none-any.whl (77kB)
[K 

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [6]:
!mkdir -p hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/train.zip" -d hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/test.zip" -d hin_classifier

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [8]:
transform = transforms.Compose([
    transforms.ToTensor()])

In [9]:
full_data = VowelConsonantDataset("hin_classifier/train",train=True,transform=transform)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=60, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=60, shuffle=True)

In [10]:
test_data = VowelConsonantDataset("hin_classifier/test",train=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=60,shuffle=False)

In [11]:
def label_tensor(actual_labels):
    return torch.LongTensor([torch.max(labels, dim = -1)[1].item() for labels in actual_labels])

In [13]:
class Params(object):
    def __init__(self, batch_size, epochs, seed, log_interval):
        self.batch_size = batch_size
        self.epochs = epochs
        self.seed = seed
        self.log_interval = log_interval

args = Params(256, 4, 0, 20)

In [12]:
class FeedForwardNetwork(nn.Module):
    def __init__(self): 
        super(FeedForwardNetwork, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(12288, 2),  # 64 x 64 x 3 = 12288
            nn.ReLU(),
            nn.Linear(2, 2),
            nn.ReLU(),
            nn.Linear(2, 10)
        )
             
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [20]:
def train(network, optimise, epochs, compute_loss):
    
  network.train()
  for epoch in range(epochs):   
    for batch_id, data in enumerate(train_loader):
      inputs, labels = data
              
      optimise.zero_grad()
      inputs = inputs.to(device)
      outputs = network(inputs)
                    
      labels_reshaped = labels.permute(1,0,2)
      vow_labels = label_tensor(labels_reshaped[0])
      con_labels = label_tensor(labels_reshaped[1])
      vow_labels = vow_labels.to(device)
      con_labels = con_labels.to(device)

      loss = compute_loss(outputs, vow_labels)
      loss += compute_loss(outputs, con_labels)


      loss.backward()
      optimise.step()
        
      if batch_id % args.log_interval == 0:
        pos = epoch * len(train_loader) + batch_id
        mlflow.log_metric('train_loss', loss.data.item()/len(inputs)*1000)
            
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.3f}'.format(
                epoch, batch_id * len(inputs), len(train_loader.dataset),
                100. * batch_id / len(train_loader), loss.data.item()))

In [16]:
max_epochs = 16 if torch.cuda.is_available() else 5

In [21]:
%%time
ffn = FeedForwardNetwork().to(device)
loss_epoch_arr = []
train(ffn, optim.Adam(ffn.parameters()), max_epochs,nn.CrossEntropyLoss())

CPU times: user 1min 40s, sys: 5.3 s, total: 1min 45s
Wall time: 1min 45s
