In [1]:
import os
from PIL import Image
import matplotlib.pyplot as plt

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms

#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            vow_test_tensor, con_test_tensor = torch.zeros(10,dtype=torch.int64), torch.zeros(10,dtype=torch.int64)
            numeric = file_name.split('.')[0]
            if len(numeric) < 4:
              numeric = '0'*(4-len(numeric))+numeric
            if numeric == '10000':
              numeric = '9999'
            vow_test_tensor[int(numeric[0])] = 1
            con_test_tensor[int(numeric[1])] = 1
            test_label = torch.stack([vow_test_tensor,con_test_tensor])
            return image_data, test_label
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping
    

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import matplotlib.pyplot as plt
from torchvision import datasets

import torchvision.transforms as transforms

import numpy as np
import pandas as pd

train_on_gpu = torch.cuda.is_available()

In [3]:
import importlib
if importlib.util.find_spec('mlflow') is None:
  !pip install mlflow
import mlflow
import mlflow.pytorch

Collecting mlflow
[?25l  Downloading https://files.pythonhosted.org/packages/2d/bb/c79f745214c39dd70b8596b8341c4a6f93ec96f6ed7c7a769c6a826d215f/mlflow-1.9.1-py3-none-any.whl (11.9MB)
[K     |████████████████████████████████| 12.0MB 253kB/s 
[?25hCollecting azure-storage-blob>=12.0
[?25l  Downloading https://files.pythonhosted.org/packages/6d/3d/31614573e8a197db12d8ab47a7fd813f15bd4a4b5c64e85d23b865de5b9b/azure_storage_blob-12.3.2-py2.py3-none-any.whl (280kB)
[K     |████████████████████████████████| 286kB 40.6MB/s 
[?25hCollecting querystring-parser
  Downloading https://files.pythonhosted.org/packages/4a/fa/f54f5662e0eababf0c49e92fd94bf178888562c0e7b677c8941bbbcd1bd6/querystring_parser-1.2.4.tar.gz
Collecting gorilla
  Downloading https://files.pythonhosted.org/packages/e3/56/5a683944cbfc77e429c6f03c636ca50504a785f60ffae91ddd7f5f7bb520/gorilla-0.3.0-py2.py3-none-any.whl
Collecting databricks-cli>=0.8.7
[?25l  Downloading https://files.pythonhosted.org/packages/1e/57/5c2d6b83cb8

In [4]:
if importlib.util.find_spec('google.colab'):
  from google.colab import drive
  drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [5]:
!mkdir -p hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/train.zip" -d hin_classifier
!unzip -nq "/content/drive/My Drive/hin_classifier/test.zip" -d hin_classifier

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [7]:
transform = transforms.Compose([
    transforms.ToTensor()])

In [8]:
class Params(object):
    def __init__(self, batch_size, epochs, seed, log_interval):
        self.batch_size = batch_size
        self.epochs = epochs
        self.seed = seed
        self.log_interval = log_interval

max_epochs = 16 if torch.cuda.is_available() else 2
args = Params(256, max_epochs, 0, 20)

In [9]:
full_data = VowelConsonantDataset("hin_classifier/train",train=True,transform=transform)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=args.batch_size, shuffle=True)

In [10]:
test_data = VowelConsonantDataset("hin_classifier/test",train=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size,shuffle=False)

In [11]:
def label_tensor(actual_labels):
    return torch.LongTensor([torch.max(labels, dim = -1)[1].item() for labels in actual_labels])

In [12]:
class FeedForwardNetwork(nn.Module):
    def __init__(self): 
        super(FeedForwardNetwork, self).__init__()
        self.classifier = nn.Sequential(
            nn.Linear(12288, 48),  # 64 x 64 x 3 = 12288
            nn.ReLU(),
            nn.Linear(48, 24),
            nn.ReLU(),
            nn.Linear(24, 10)
        )
             
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [13]:
def cumulative_loss(predicted, actual, compute_loss=nn.CrossEntropyLoss()):
  actual_reshaped = actual.permute(1,0,2)
  vow_labels = label_tensor(actual_reshaped[0])
  con_labels = label_tensor(actual_reshaped[1])
  vow_labels = vow_labels.to(device)
  con_labels = con_labels.to(device)

  loss = compute_loss(predicted, vow_labels)
  loss += compute_loss(predicted, con_labels)
  return loss


In [14]:
def train(network, optimise, epoch, compute_loss):
    
  network.train()
  for batch_id, data in enumerate(train_loader):
    inputs, labels = data
            
    optimise.zero_grad()
    inputs = inputs.to(device)
    outputs = network(inputs)
                  
    loss = cumulative_loss(outputs, labels)

    loss.backward()
    optimise.step()
      
    if batch_id % args.log_interval == 0:
      pos = epoch * len(train_loader) + batch_id
      mlflow.log_metric('train_loss', loss.data.item()/len(inputs)*1000)
          
      print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.3f}'.format(
              epoch, batch_id * len(inputs), len(train_loader.dataset),
              100. * batch_id / len(train_loader), loss.data.item()))

In [17]:
def test(network, epoch, compute_loss):
    
    network.eval()
    test_loss = 0
    correct = 0
    confusion_matrix = np.zeros([10, 10])
    
    with torch.no_grad():
        
        for inputs, labels in test_loader:
            inputs = inputs.to(device)
            outputs = network(inputs)
            test_loss += compute_loss(outputs, labels).data.item()
            pred = outputs.data.max(1)[1]
            labels_reshaped = labels.permute(1,0,2)
            vow_test = label_tensor(labels_reshaped[0].data).to(device)
            con_test = label_tensor(labels_reshaped[1].data).to(device)
            correct += pred.eq(vow_test).sum().item()
            correct += pred.eq(con_test).sum().item()

            
            for x, y in zip(pred.cpu().numpy(), labels.numpy()):
                confusion_matrix[x][y] += 1
            
        test_loss /= len(test_loader.dataset)
        test_accuracy = 100.0 * correct / len(test_loader.dataset)
        
        pos = (epoch + 1) * len(train_loader)
        mlflow.log_metric('test_loss', test_loss*1000)
        mlflow.log_metric('test_accuracy', test_accuracy)
        
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(test_loss, correct, len(test_loader.dataset), test_accuracy))
              
        if epoch == args.epochs:
            classes = np.arange(10)
            fig, ax = plt.subplots()
            im = ax.imshow(confusion_matrix, interpolation='nearest', cmap=plt.cm.Blues)
            ax.figure.colorbar(im, ax=ax)
            ax.set(xticks=np.arange(confusion_matrix.shape[1]),
                       yticks=np.arange(confusion_matrix.shape[0]),
                       xticklabels=classes, yticklabels=classes,
                       ylabel='True label',
                       xlabel='Predicted label',
                       title='Epoch %d' % epoch)
            thresh = confusion_matrix.max() / 2.
            for i in range(confusion_matrix.shape[0]):
                for j in range(confusion_matrix.shape[1]):
                    ax.text(j, i, int(confusion_matrix[i, j]),
                            ha="center", va="center",
                            color="white" if confusion_matrix[i, j] > thresh else "black")
              
            fig.tight_layout()
              
            image_path = 'images/%s.png' % (expt_id)
            plt.savefig(image_path)
            mlflow.log_artifact(image_path)


In [18]:
%%time
for epoch in range(args.epochs):
  ffn = FeedForwardNetwork().to(device)
  train(ffn, optim.Adam(ffn.parameters()), epoch,cumulative_loss)
  test(ffn, epoch,cumulative_loss)


Test set: Average loss: 0.0185, Accuracy: 1995/10000 (20%)


Test set: Average loss: 0.0185, Accuracy: 2000/10000 (20%)


Test set: Average loss: 0.0185, Accuracy: 2022/10000 (20%)


Test set: Average loss: 0.0185, Accuracy: 2071/10000 (21%)


Test set: Average loss: 0.0184, Accuracy: 2002/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2085/10000 (21%)


Test set: Average loss: 0.0184, Accuracy: 1972/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2021/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2169/10000 (22%)


Test set: Average loss: 0.0184, Accuracy: 1994/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 1961/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2000/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2101/10000 (21%)


Test set: Average loss: 0.0184, Accuracy: 1968/10000 (20%)


Test set: Average loss: 0.0184, Accuracy: 2000/10000 (20%)


Test set: Average loss: 0.0185, Accuracy: 2000/10000 (20%)

CPU times: user 3min 35s

In [None]:
if importlib.util.find_spec('google.colab'):
  from google.colab import files
  files.download('mlruns')