In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from fastai import *
from fastai.vision import *
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

/kaggle/input/Kannada-MNIST/Dig-MNIST.csv
/kaggle/input/Kannada-MNIST/train.csv
/kaggle/input/Kannada-MNIST/sample_submission.csv
/kaggle/input/Kannada-MNIST/test.csv


In [2]:
def get_data_labels(csv,label):
    fileraw = pd.read_csv(csv)
    labels = fileraw[label].to_numpy()
    data = fileraw.drop([label],axis=1).to_numpy(dtype=np.float32).reshape((fileraw.shape[0],28,28))
    data = np.expand_dims(data, axis=1)
    return data, labels

DATAPATH = Path('/kaggle/input/Kannada-MNIST')

In [3]:
train_data, train_labels = get_data_labels(DATAPATH/'train.csv','label')
test_data, test_labels = get_data_labels(DATAPATH/'test.csv','id')
dig_data, dig_labels = get_data_labels(DATAPATH/'Dig-MNIST.csv','label')

In [4]:
print(f' Train:\tdata shape {train_data.shape}\tlabel shape {train_labels.shape}\n \
Test:\tdata shape {test_data.shape}\tlabel shape {test_labels.shape}\n \
Dig-MNIST:\tdata shape {dig_data.shape}\tlabel shape {dig_labels.shape}')

 Train:	data shape (60000, 1, 28, 28)	label shape (60000,)
 Test:	data shape (5000, 1, 28, 28)	label shape (5000,)
 Dig-MNIST:	data shape (10240, 1, 28, 28)	label shape (10240,)


In [5]:
np.random.seed(60)

ran_10_pct_idx = (np.random.random_sample(train_labels.shape)) < .001

train_90_labels = train_labels[np.invert(ran_10_pct_idx)]
train_90_data = train_data[np.invert(ran_10_pct_idx)]

valid_10_labels = train_labels[ran_10_pct_idx]
valid_10_data = train_data[ran_10_pct_idx]

In [6]:
class ArrayDataset(Dataset):
    "Dataset for numpy arrays based on fastai example: "
    def __init__(self, x, y):
        self.x, self.y = x, y
        self.c = len(np.unique(y))
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, i):
        return self.x[i], self.y[i]

In [7]:
train_ds = ArrayDataset(train_90_data, train_90_labels)
valid_ds = ArrayDataset(valid_10_data, valid_10_labels)
dig_ds = ArrayDataset(dig_data, dig_labels)
test_ds = ArrayDataset(test_data, test_labels)
bs = 256

In [8]:
databunch = DataBunch.create(train_ds, valid_ds, test_ds=test_ds, bs=bs)
def conv2(ni, nf, stride=2, ks=5):
    return conv_layer(ni, nf, stride=stride, ks=ks)

In [9]:
my_architecture = nn.Sequential(
    conv2(1,32,stride=1,ks=5),
    conv2(32,32,stride=1,ks=5),
    conv2(32,32,stride=2,ks=8),
    nn.Dropout(0.4),
    
    conv2(32,64,stride=1,ks=5),
    conv2(64,64,stride=1,ks=5),
    conv2(64,64,stride=2,ks=5),
    nn.Dropout(0.4),
    
    Flatten(),
    nn.Linear(3136, 256),
    relu(inplace=True),
    nn.BatchNorm1d(256),
    nn.Dropout(0.4),
    nn.Linear(256,10)
)

learn = Learner(databunch, my_architecture, loss_func = nn.CrossEntropyLoss(), metrics=[accuracy] )

In [10]:
learn.fit_one_cycle(5)

epoch,train_loss,valid_loss,accuracy,time
0,0.089237,0.116848,0.980392,05:33
1,0.03373,0.00597,1.0,05:34
2,0.0239,0.000326,1.0,05:33
3,0.014854,0.005033,1.0,05:35
4,0.006794,0.000128,1.0,05:38


In [11]:
preds, ids = learn.get_preds(DatasetType.Test)
y = torch.argmax(preds, dim=1)

In [12]:
submission = pd.DataFrame({ 'id': ids,'label': y })
submission.to_csv(path_or_buf ="submission.csv", index=False)