In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
import torchvision
from sklearn.model_selection import train_test_split

In [11]:
root = "./Digitrecognizer/"
train_data = pd.read_csv(root+'train.csv',dtype=np.float32)
test_data = pd.read_csv(root+'test.csv',dtype=np.float32)

In [3]:
train_data.columns

Index(['label', 'pixel0', 'pixel1', 'pixel2', 'pixel3', 'pixel4', 'pixel5',
       'pixel6', 'pixel7', 'pixel8',
       ...
       'pixel774', 'pixel775', 'pixel776', 'pixel777', 'pixel778', 'pixel779',
       'pixel780', 'pixel781', 'pixel782', 'pixel783'],
      dtype='object', length=785)

In [4]:
train_data.head()

Unnamed: 0,label,pixel0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,...,pixel774,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
train_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Columns: 785 entries, label to pixel783
dtypes: int64(785)
memory usage: 251.5 MB


In [6]:
test_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28000 entries, 0 to 27999
Columns: 784 entries, pixel0 to pixel783
dtypes: int64(784)
memory usage: 167.5 MB


## Preparing Dataset
* What we are doing here is taking the raw dataset and splitting into targets and features. Dividing by 255 makes each pixel value to scale between 0 and 1 instead of 0 and 255, which helps in training our model. This step in Machine Learning is generally known as Normalization. Then we split into train and test sets using sklearn's train_test_split function.

* Converting the numpy arrays into PyTorch Tensors using from_numpy function.

* Batch size is set. The batch size is usually set between 64 and 256. The batch size does have an effect on the final test accuracy. One way to think about it is that smaller batches means that the number of parameter updates per epoch is greater. 

* To pass our data into our PyTorch models we need to convert it to a PyTorch Dataset. A Tensor Dataset in this case. 

* We have the training data loaded into trainloader and we can make an iterator with iter(trainloader) that can help us grab data. Later, we'll use this to loop through the dataset for training. Each time we can pull out data of the size of the batch that is defined.

In [12]:
target = train_data.label.values

In [13]:
features = train_data.loc[:,train_data.columns!='label'].values/255

In [14]:
X_train,X_test,Y_train,Y_test = train_test_split(features,target,test_size=0.2,random_state=42)

In [15]:
len(X_train)

33600

In [16]:
train_inputs = torch.from_numpy(X_train)
train_labels = torch.from_numpy(Y_train).type(torch.LongTensor)

test_inputs = torch.from_numpy(X_test)
test_labels = torch.from_numpy(Y_test).type(torch.LongTensor)

In [17]:
# Set batch size
batch_size = 256

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(train_inputs,train_labels)
test = torch.utils.data.TensorDataset(test_inputs,test_labels)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)

# Using ANN Architecture

In [47]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN,self).__init__()
        # input_features = 28*28*1
        self.fc1 = nn.Linear(28*28*1,392)
        self.fc2 = nn.Linear(392,196)
        self.fc3 = nn.Linear(196,98)
        self.fc4 = nn.Linear(98,49)
        self.fc5 = nn.Linear(49,10)
        self.log_softmax = nn.functional.log_softmax
    
    def forward(self,x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        out = self.log_softmax(self.fc5(x),dim=1)
        return out
        

In [71]:
learning_rate = 0.0015
batch_size = 100
training_epochs = 25
ANN_model = ANN()
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(ANN_model.parameters(),lr=learning_rate)

In [69]:
import time

In [72]:
training_time = time.time()
for epochs in range(training_epochs):
    print("Training of epoch {} started".format(epochs+1))
    avg_cost = 0
    total_batch = len(X_train)//batch_size
    start_time = time.time()
    for inputs,labels in train_loader:
        optimizer.zero_grad()
        output = ANN_model(inputs)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        avg_cost+=loss.data / batch_size
    print("[Epoch: {:<4}] cost = {:_<4.3} took {}s".format(epochs + 1, avg_cost.item(),time.time()-start_time))

print('Learning Finished! in {}s'.format(time.time()-training_time))

Training of epoch 1 started
[Epoch: 1   ] cost = 0.822 took 2.118333101272583s
Training of epoch 2 started
[Epoch: 2   ] cost = 0.265 took 2.3626821041107178s
Training of epoch 3 started
[Epoch: 3   ] cost = 0.171 took 2.2400095462799072s
Training of epoch 4 started
[Epoch: 4   ] cost = 0.13 took 2.1273109912872314s
Training of epoch 5 started
[Epoch: 5   ] cost = 0.0931 took 2.118335723876953s
Training of epoch 6 started
[Epoch: 6   ] cost = 0.0797 took 2.477375030517578s
Training of epoch 7 started
[Epoch: 7   ] cost = 0.0597 took 2.352707862854004s
Training of epoch 8 started
[Epoch: 8   ] cost = 0.0479 took 2.3347561359405518s
Training of epoch 9 started
[Epoch: 9   ] cost = 0.0331 took 2.312814950942993s
Training of epoch 10 started
[Epoch: 10  ] cost = 0.0346 took 2.438478708267212s
Training of epoch 11 started
[Epoch: 11  ] cost = 0.0285 took 2.636948585510254s
Training of epoch 12 started
[Epoch: 12  ] cost = 0.0155 took 2.470393180847168s
Training of epoch 13 started
[Epoch: 1

In [73]:
# Turn off gradients for validation
test_loss = 0
accuracy = 0
with torch.no_grad():
    ANN_model.eval()
for images, labels in test_loader:
    log_ps = ANN_model(images)
    test_loss += criterion(log_ps, labels)

    ps = torch.exp(log_ps)
    # Get our top predictions
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Accuracy of our model on test set using ANN architecture is ", (accuracy/len(test_loader))*100)

Accuracy of our model on test set using ANN architecture is  tensor(97.4587)


# Using CNN Architecture

In [26]:
target1 = target
features1 = features

In [35]:
features1 = features.reshape(features.shape[0],1,28,28)
print(features.shape," to ",features1.shape)

(42000, 784)  to  (42000, 1, 28, 28)


In [36]:
X_train1,X_test1,Y_train1,Y_test1 = train_test_split(features1,target1,test_size=0.2,random_state=42)

In [37]:
#converting numpy array to tensor
train_inputs1 = torch.from_numpy(X_train1)
train_labels1 = torch.from_numpy(Y_train1).type(torch.LongTensor)

test_inputs1 = torch.from_numpy(X_test1)
test_labels1 = torch.from_numpy(Y_test1).type(torch.LongTensor)

In [38]:
# Set batch size
batch_size = 256

# Pytorch train and test sets
train1 = torch.utils.data.TensorDataset(train_inputs1,train_labels1)
test1 = torch.utils.data.TensorDataset(test_inputs1,test_labels1)

# data loader
train_loader1 = torch.utils.data.DataLoader(train1, batch_size = batch_size, shuffle = True)
test_loader1 = torch.utils.data.DataLoader(test1, batch_size = batch_size, shuffle = True)

In [50]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=32,kernel_size=5,stride=1,padding=2),
                                    nn.ReLU(),
                                    nn.MaxPool2d(stride=2,kernel_size=2))
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(kernel_size=2,stride=2))
        self.fc1 = nn.Linear(in_features=7*7*64,out_features=1500)
        self.fc2 = nn.Linear(in_features=1500,out_features=800)
        self.fc3 = nn.Linear(800,400)
        self.fc4 = nn.Linear(400,100)
        self.fc5 = nn.Linear(100,10)
        self.softmax = nn.functional.log_softmax
        nn.init.xavier_uniform_(self.fc5.weight)
    
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0),-1)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        x = self.fc5(x)
        out = self.softmax(x,dim=1)
        return out
        

In [51]:
learning_rate = 0.0015
batch_size = 256
training_epochs = 25
CNN_model = CNN()

In [52]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN_model.parameters(),lr=learning_rate)

In [42]:
import time

In [53]:
training_time = time.time()
for epochs in range(training_epochs):
    print("Training of epoch {} started".format(epochs+1))
    avg_cost = 0
    total_batch = len(X_train)//batch_size
    start_time = time.time()
    for inputs,labels in train_loader1:
        optimizer.zero_grad()
        output = CNN_model(inputs)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        avg_cost+=loss.data/batch_size
    print("[Epoch: {:<4}] cost = {:_<4.3} took {}s".format(epochs + 1, avg_cost.item(),time.time()-start_time))

print('Learning Finished! in {}s'.format(time.time()-training_time))
    
    

Training of epoch 1 started
[Epoch: 1   ] cost = 0.236 took 115.31373143196106s
Training of epoch 2 started
[Epoch: 2   ] cost = 0.0387 took 114.65327334403992s
Training of epoch 3 started
[Epoch: 3   ] cost = 0.0263 took 113.97079849243164s
Training of epoch 4 started
[Epoch: 4   ] cost = 0.0198 took 113.874835729599s
Training of epoch 5 started
[Epoch: 5   ] cost = 0.0141 took 114.50114679336548s
Training of epoch 6 started
[Epoch: 6   ] cost = 0.0125 took 132.70922207832336s
Training of epoch 7 started
[Epoch: 7   ] cost = 0.00967 took 128.55888104438782s
Training of epoch 8 started
[Epoch: 8   ] cost = 0.00665 took 134.20919823646545s
Training of epoch 9 started
[Epoch: 9   ] cost = 0.00585 took 136.073903799057s
Training of epoch 10 started
[Epoch: 10  ] cost = 0.00795 took 137.2274293899536s
Training of epoch 11 started
[Epoch: 11  ] cost = 0.00653 took 131.2337040901184s
Training of epoch 12 started
[Epoch: 12  ] cost = 0.00496 took 130.7832531929016s
Training of epoch 13 starte

In [58]:
# Turn off gradients for validation
test_loss1 = 0
accuracy1 = 0
with torch.no_grad():
    CNN_model.eval()
for images, labels in test_loader1:
    out = CNN_model(images)
    test_loss1 += criterion(out, labels)

    ps = torch.exp(out)
    # Get our top predictions
    top_p, top_class = ps.topk(1, dim=1)
    equals1 = top_class == labels.view(*top_class.shape)
    accuracy1 += torch.mean(equals1.type(torch.FloatTensor))
print("Accuracy of our model on test set using CNN architecture is ", (accuracy1/len(test_loader1))*100)

Accuracy of our model on test set using CNN architecture is  tensor(99.0148)


In [73]:
torch.save(CNN_model,"./Kaggle/digitrecognizerCNN.pt")

  "type " + obj.__name__ + ". It won't be checked "


In [None]:
modelreloaded = torch.load(Path)