In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

['train', 'sample_submission.csv', 'test']


In [2]:
import os
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook 
import copy

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision.transforms as transforms
from torchvision import datasets

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import pandas as pd

In [3]:
torch.manual_seed(0)

<torch._C.Generator at 0x7fc89408b570>

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [5]:
#For converting the dataset to torchvision dataset format
class VowelConsonantDataset(Dataset):
    def __init__(self, file_path,train=True,transform=None):
        self.transform = transform
        self.file_path=file_path
        self.train=train
        self.file_names=[file for _,_,files in os.walk(self.file_path) for file in files]
        self.len = len(self.file_names)
        if self.train:
            self.classes_mapping=self.get_classes()
    def __len__(self):
        return len(self.file_names)
    
    def __getitem__(self, index):
        file_name=self.file_names[index]
        image_data=self.pil_loader(self.file_path+"/"+file_name)
        if self.transform:
            image_data = self.transform(image_data)
        if self.train:
            file_name_splitted=file_name.split("_")
            Y1 = self.classes_mapping[file_name_splitted[0]]
            Y2 = self.classes_mapping[file_name_splitted[1]]
            z1,z2=torch.zeros(10),torch.zeros(10)
            z1[Y1-10],z2[Y2]=1,1
            label=torch.stack([z1,z2])

            return image_data, label

        else:
            return image_data, file_name
          
    def pil_loader(self,path):
        with open(path, 'rb') as f:
            img = Image.open(f)
            return img.convert('RGB')

      
    def get_classes(self):
        classes=[]
        for name in self.file_names:
            name_splitted=name.split("_")
            classes.extend([name_splitted[0],name_splitted[1]])
        classes=list(set(classes))
        classes_mapping={}
        for i,cl in enumerate(sorted(classes)):
            classes_mapping[cl]=i
        return classes_mapping


In [6]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [7]:
class Params(object):
    def __init__(self, batch_size, epochs, seed):
        self.batch_size = batch_size
        self.epochs = epochs
        self.seed = seed

args = Params(64, 40, 0)

In [8]:
full_data = VowelConsonantDataset("../input/train/train",train=True,transform=transform)
train_size = int(0.9 * len(full_data))
test_size = len(full_data) - train_size

train_data, validation_data = random_split(full_data, [train_size, test_size])

train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size=args.batch_size, shuffle=True)

In [9]:
test_data = VowelConsonantDataset("../input/test/test",train=False,transform=transform)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size,shuffle=False)

In [10]:
class CNN_Model(nn.Module):
    def __init__(self): 
        super(CNN_Model, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(64, 192, kernel_size=5, padding=2),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
            nn.Conv2d(192, 384, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(384, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2),
        )
        self.fc = nn.Sequential(
            nn.Dropout(),
            nn.Linear(256 * 6 * 6, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True)
        )
        self.vowel_classifier = nn.Sequential(
            nn.Linear(4096, 10)
          )          
        self.consonant_classifier = nn.Sequential(
            nn.Linear(4096, 10)        
          )        
        
    def forward(self, x):
        x = self.features(x)
        #print(x.shape)
        x = x.view(x.size(0), -1)
        #print(x.shape)
        x = self.fc(x)
        #print(x.shape)        
        return self.vowel_classifier(x), self.consonant_classifier(x)

In [11]:
def train(epoch):
    
    model.train()
    
    for batch_id, data in enumerate(train_loader):
        inputs, labels = data        
        labels_vowel = labels[:, 0, :].data.max(1)[1]
        labels_consonant = labels[:, 1, :].data.max(1)[1]
        
        inputs = inputs.to(device)
        labels_vowel = labels_vowel.to(device)
        labels_consonant = labels_consonant.to(device)
        
        opt.zero_grad()
        outputs_vowel, outputs_consonant = model(inputs)
        loss = loss_fn(outputs_vowel, labels_vowel) + loss_fn(outputs_consonant, labels_consonant)
        loss.backward()
        opt.step()
        
        del inputs, labels_vowel, labels_consonant, outputs_vowel, outputs_consonant
        torch.cuda.empty_cache()

In [12]:
def validate(epoch):
    
    model.eval()
    validation_loss = 0
    correct = 0
    
    with torch.no_grad():
        
        for inputs, labels in validation_loader:
            labels_vowel = labels[:, 0, :].data.max(1)[1]
            labels_consonant = labels[:, 1, :].data.max(1)[1]
        
            inputs = inputs.to(device)
            labels_vowel = labels_vowel.to(device)
            labels_consonant = labels_consonant.to(device)
            
            outputs_vowel, outputs_consonant = model(inputs)
            loss = loss_fn(outputs_vowel, labels_vowel) + loss_fn(outputs_consonant, labels_consonant)
            validation_loss += loss.data.item()
            pred_vowel = outputs_vowel.data.max(1)[1]
            pred_consonant = outputs_consonant.data.max(1)[1]
            correct += (pred_vowel.eq(labels_vowel.data) & pred_consonant.eq(labels_consonant.data)).sum().item()           
            
            del inputs, labels_vowel, labels_consonant, outputs_vowel, outputs_consonant
            torch.cuda.empty_cache()            
            
        validation_loss /= len(validation_loader.dataset)
        validation_accuracy = 100.0 * correct / len(validation_loader.dataset)
        
        print('\nEpoch: {}, Validation set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.
              format(epoch, validation_loss, correct, len(validation_loader.dataset), validation_accuracy))  
        
        return validation_accuracy


In [13]:
def predict():
    model.eval()
    
    submission = {'ImageId': [], 'Class': [], 'Index': []}
    vowel_class_map = {0: 'V0', 1: 'V1', 2: 'V2', 3: 'V3', 4: 'V4', 5: 'V5', 
                       6: 'V6', 7: 'V7', 8: 'V8', 9: 'V9'}
    consonant_class_map = {0: 'C0', 1: 'C1', 2: 'C2', 3: 'C3', 4: 'C4', 5: 'C5', 
                       6: 'C6', 7: 'C7', 8: 'C8', 9: 'C9'}
    
    with torch.no_grad():
        
        for inputs, file_names in test_loader:
            inputs = inputs.to(device)
            submission['ImageId'].extend(list(file_names))
            submission['Index'].extend(list(int(file_name.split('.')[0]) for file_name in file_names))
            
            outputs_vowel, outputs_consonant = model(inputs)
            pred_vowel = list(outputs_vowel.data.max(1)[1].cpu().numpy())
            pred_consonant = list(outputs_consonant.data.max(1)[1].cpu().numpy())
            
            submission['Class'].extend(list('{}_{}'.format(vowel_class_map[v], consonant_class_map[c]) 
                                            for v, c in zip(pred_vowel, pred_consonant)))
            
    submission = pd.DataFrame(submission)
    submission = submission[['ImageId', 'Class', 'Index']]
    submission = submission.sort_values(['Index'])
    submission = submission.drop('Index', axis=1)
    submission.to_csv("submisision.csv", index=False)

In [14]:
def init_weights(m):
    if m in model.vowel_classifier or model.consonant_classifier:
        if type(m) == nn.Linear:
            nn.init.xavier_uniform_(m.weight, gain=1.0)
            m.bias.data.fill_(0.01)
    elif type(m) == nn.Conv2d or type(m) == nn.Linear:
        nn.init.kaiming_uniform_(m.weight, a=0, mode='fan_in', nonlinearity='relu')
        m.bias.data.fill_(0.01)

In [15]:
max_validation_accuracy = 0

lr = 0.015
momentum = 0.9
print('\nLR = %.3f, Momentum = %.3f\n' % (lr, momentum))

torch.manual_seed(args.seed)
model = CNN_Model()
model.apply(init_weights)
model.to(device)

loss_fn = nn.CrossEntropyLoss()
opt = optim.SGD(model.parameters(), lr=lr, momentum = momentum, nesterov=True)

for epoch in tqdm_notebook(range(1, args.epochs + 1), total=args.epochs, unit="epoch"):
    train(epoch)
    validation_accuracy = validate(epoch)
    
    if validation_accuracy > max_validation_accuracy:
        max_validation_accuracy = validation_accuracy
        best_model = copy.deepcopy(model.state_dict())
    print("Maximum validation accuracy so far: {:.0f}%".format(max_validation_accuracy))



LR = 0.015, Momentum = 0.900



HBox(children=(IntProgress(value=0, max=40), HTML(value='')))


Epoch: 1, Validation set: Average loss: 0.0718, Accuracy: 26/1000 (3%)

Maximum validation accuracy so far: 3%

Epoch: 2, Validation set: Average loss: 0.0667, Accuracy: 34/1000 (3%)

Maximum validation accuracy so far: 3%

Epoch: 3, Validation set: Average loss: 0.0727, Accuracy: 31/1000 (3%)

Maximum validation accuracy so far: 3%

Epoch: 4, Validation set: Average loss: 0.0424, Accuracy: 240/1000 (24%)

Maximum validation accuracy so far: 24%

Epoch: 5, Validation set: Average loss: 0.0291, Accuracy: 461/1000 (46%)

Maximum validation accuracy so far: 46%

Epoch: 6, Validation set: Average loss: 0.0227, Accuracy: 574/1000 (57%)

Maximum validation accuracy so far: 57%

Epoch: 7, Validation set: Average loss: 0.0176, Accuracy: 692/1000 (69%)

Maximum validation accuracy so far: 69%

Epoch: 8, Validation set: Average loss: 0.0166, Accuracy: 712/1000 (71%)

Maximum validation accuracy so far: 71%

Epoch: 9, Validation set: Average loss: 0.0145, Accuracy: 760/1000 (76%)

Maximum valida

In [16]:
model.load_state_dict(best_model)

opt = optim.SGD(model.parameters(), lr=0.001, momentum = 0.8, nesterov=True)
args.epochs = 20

for epoch in tqdm_notebook(range(1, args.epochs + 1), total=args.epochs, unit="epoch"):
    train(epoch)
    validation_accuracy = validate(epoch)

    if validation_accuracy > max_validation_accuracy:
        max_validation_accuracy = validation_accuracy
        best_model = copy.deepcopy(model.state_dict())
    print("Maximum validation accuracy so far: {:.0f}%".format(max_validation_accuracy))


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))


Epoch: 1, Validation set: Average loss: 0.0101, Accuracy: 862/1000 (86%)

Maximum validation accuracy so far: 86%

Epoch: 2, Validation set: Average loss: 0.0098, Accuracy: 861/1000 (86%)

Maximum validation accuracy so far: 86%

Epoch: 3, Validation set: Average loss: 0.0096, Accuracy: 865/1000 (86%)

Maximum validation accuracy so far: 86%

Epoch: 4, Validation set: Average loss: 0.0094, Accuracy: 868/1000 (87%)

Maximum validation accuracy so far: 87%

Epoch: 5, Validation set: Average loss: 0.0096, Accuracy: 865/1000 (86%)

Maximum validation accuracy so far: 87%

Epoch: 6, Validation set: Average loss: 0.0093, Accuracy: 866/1000 (87%)

Maximum validation accuracy so far: 87%

Epoch: 7, Validation set: Average loss: 0.0094, Accuracy: 867/1000 (87%)

Maximum validation accuracy so far: 87%

Epoch: 8, Validation set: Average loss: 0.0094, Accuracy: 872/1000 (87%)

Maximum validation accuracy so far: 87%

Epoch: 9, Validation set: Average loss: 0.0095, Accuracy: 868/1000 (87%)

Maxim

In [17]:
model.load_state_dict(best_model)
predict()