In [1]:
import os
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms

#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            vow_test_tensor, con_test_tensor = torch.zeros(10,dtype=torch.int64), torch.zeros(10,dtype=torch.int64)
            numeric = file_name.split('.')[0]
            if len(numeric) < 4:
              numeric = '0'*(4-len(numeric))+numeric
            if numeric == '10000':
              numeric = '9999'
            vow_test_tensor[int(numeric[0])] = 1
            con_test_tensor[int(numeric[1])] = 1
            test_label = torch.stack([vow_test_tensor,con_test_tensor])
            return image_data, test_label
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping
    

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets

import torchvision.transforms as transforms

import numpy as np
import pandas as pd

train_on_gpu = torch.cuda.is_available()

In [3]:
import importlib
if importlib.util.find_spec('mlflow') is None:
  !pip install mlflow
import mlflow
import mlflow.pytorch

In [4]:
if importlib.util.find_spec('google.colab'):
  from google.colab import drive
  drive.mount('/content/drive')

In [5]:
!mkdir -p hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/train.zip" -d hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/test.zip" -d hin_classifier

/bin/sh: 1: unzip: Exec format error
/bin/sh: 1: unzip: Exec format error


In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [7]:
transform = transforms.Compose([
    transforms.ToTensor()])

In [8]:
full_data = VowelConsonantDataset("hin_classifier/train",train=True,transform=transform)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=60, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=60, shuffle=True)

In [9]:
test_data = VowelConsonantDataset("hin_classifier/test",train=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=60,shuffle=False)

In [10]:
def label_tensor(actual_labels):
    return torch.LongTensor([torch.max(labels, dim = -1)[1].item() for labels in actual_labels])

In [11]:
class Params(object):
    def __init__(self, batch_size, epochs, seed, log_interval):
        self.batch_size = batch_size
        self.epochs = epochs
        self.seed = seed
        self.log_interval = log_interval

max_epochs = 16 if torch.cuda.is_available() else 4
args = Params(256, max_epochs, 0, 20)

In [12]:
class FeedForwardNetwork(nn.Module):
    def __init__(self): 
        super(FeedForwardNetwork, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(12288, 2),  # 64 x 64 x 3 = 12288
            nn.ReLU(),
            nn.Linear(2, 2),
            nn.ReLU(),
            nn.Linear(2, 10)
        )
             
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [13]:
def train(network, optimise, epochs, compute_loss):
    
  network.train()
  for epoch in range(epochs):   
    for batch_id, data in enumerate(train_loader):
      inputs, labels = data
              
      optimise.zero_grad()
      inputs = inputs.to(device)
      outputs = network(inputs)
                    
      labels_reshaped = labels.permute(1,0,2)
      vow_labels = label_tensor(labels_reshaped[0])
      con_labels = label_tensor(labels_reshaped[1])
      vow_labels = vow_labels.to(device)
      con_labels = con_labels.to(device)

      loss = compute_loss(outputs, vow_labels)
      loss += compute_loss(outputs, con_labels)


      loss.backward()
      optimise.step()
        
      if batch_id % args.log_interval == 0:
        pos = epoch * len(train_loader) + batch_id
        mlflow.log_metric('train_loss', loss.data.item()/len(inputs)*1000)
            
        print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.3f}'.format(
                epoch, batch_id * len(inputs), len(train_loader.dataset),
                100. * batch_id / len(train_loader), loss.data.item()))

In [14]:
%%time
ffn = FeedForwardNetwork().to(device)
loss_epoch_arr = []
train(ffn, optim.Adam(ffn.parameters()), max_epochs,nn.CrossEntropyLoss())

CPU times: user 48.2 s, sys: 12.4 s, total: 1min
Wall time: 45.4 s
