In [1]:
import numpy as np
import pandas as pd
import torch.nn as nn
import torch
import torchvision
from sklearn.model_selection import train_test_split

In [3]:
root = "../DataSets/Digitrecognizer/"
train_data = pd.read_csv(root+'train.csv',dtype=np.float32)
test_data = pd.read_csv(root+'test.csv',dtype=np.float32)

In [None]:
train_data.columns

In [None]:
train_data.head()

In [None]:
train_data.info()

In [None]:
test_data.info()

## Preparing Dataset
* What we are doing here is taking the raw dataset and splitting into targets and features. Dividing by 255 makes each pixel value to scale between 0 and 1 instead of 0 and 255, which helps in training our model. This step in Machine Learning is generally known as Normalization. Then we split into train and test sets using sklearn's train_test_split function.

* Converting the numpy arrays into PyTorch Tensors using from_numpy function.

* Batch size is set. The batch size is usually set between 64 and 256. The batch size does have an effect on the final test accuracy. One way to think about it is that smaller batches means that the number of parameter updates per epoch is greater. 

* To pass our data into our PyTorch models we need to convert it to a PyTorch Dataset. A Tensor Dataset in this case. 

* We have the training data loaded into trainloader and we can make an iterator with iter(trainloader) that can help us grab data. Later, we'll use this to loop through the dataset for training. Each time we can pull out data of the size of the batch that is defined.

In [4]:
target = train_data.label.values

In [5]:
features = train_data.loc[:,train_data.columns!='label'].values/255

In [7]:
X_train,X_test,Y_train,Y_test = train_test_split(features,target,test_size=0.2,random_state=42)

In [8]:
len(X_train)

33600

In [9]:
train_inputs = torch.from_numpy(X_train)
train_labels = torch.from_numpy(Y_train).type(torch.LongTensor)

test_inputs = torch.from_numpy(X_test)
test_labels = torch.from_numpy(Y_test).type(torch.LongTensor)

In [10]:
# Set batch size
batch_size = 256

# Pytorch train and test sets
train = torch.utils.data.TensorDataset(train_inputs,train_labels)
test = torch.utils.data.TensorDataset(test_inputs,test_labels)

# data loader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)

# Using ANN Architecture

In [11]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN,self).__init__()
        # input_features = 28*28*1
        self.fc1 = nn.Linear(28*28*1,392)
        self.fc2 = nn.Linear(392,196)
        self.fc3 = nn.Linear(196,98)
        self.fc4 = nn.Linear(98,49)
        self.fc5 = nn.Linear(49,10)
        self.log_softmax = nn.functional.log_softmax
    
    def forward(self,x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        out = self.log_softmax(self.fc5(x),dim=1)
        return out
        

In [12]:
learning_rate = 0.0015
batch_size = 100
training_epochs = 25
ANN_model = ANN()
criterion = nn.NLLLoss()
optimizer = torch.optim.Adam(ANN_model.parameters(),lr=learning_rate)

In [13]:
import time

In [14]:
training_time = time.time()
for epochs in range(training_epochs):
    print("Training of epoch {} started".format(epochs+1))
    avg_cost = 0
    total_batch = len(X_train)//batch_size
    start_time = time.time()
    for inputs,labels in train_loader:
        optimizer.zero_grad()
        output = ANN_model(inputs)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        avg_cost+=loss.data / batch_size
    print("[Epoch: {:<4}] cost = {:_<4.3} took {}s".format(epochs + 1, avg_cost.item(),time.time()-start_time))

print('Learning Finished! in {}s'.format(time.time()-training_time))

Training of epoch 1 started
[Epoch: 1   ] cost = 0.806 took 4.7764036655426025s
Training of epoch 2 started
[Epoch: 2   ] cost = 0.274 took 2.4843552112579346s
Training of epoch 3 started
[Epoch: 3   ] cost = 0.172 took 2.4743828773498535s
Training of epoch 4 started
[Epoch: 4   ] cost = 0.128 took 2.240009069442749s
Training of epoch 5 started
[Epoch: 5   ] cost = 0.0924 took 2.3586924076080322s
Training of epoch 6 started
[Epoch: 6   ] cost = 0.0727 took 2.734686851501465s
Training of epoch 7 started
[Epoch: 7   ] cost = 0.0566 took 2.6249911785125732s
Training of epoch 8 started
[Epoch: 8   ] cost = 0.0425 took 2.607027292251587s
Training of epoch 9 started
[Epoch: 9   ] cost = 0.0328 took 2.5162792205810547s
Training of epoch 10 started
[Epoch: 10  ] cost = 0.0313 took 2.371657371520996s
Training of epoch 11 started
[Epoch: 11  ] cost = 0.0279 took 2.359689235687256s
Training of epoch 12 started
[Epoch: 12  ] cost = 0.0272 took 2.6359503269195557s
Training of epoch 13 started
[Epoc

In [15]:
# Turn off gradients for validation
test_loss = 0
accuracy = 0
with torch.no_grad():
    ANN_model.eval()
for images, labels in test_loader:
    log_ps = ANN_model(images)
    test_loss += criterion(log_ps, labels)

    ps = torch.exp(log_ps)
    # Get our top predictions
    top_p, top_class = ps.topk(1, dim=1)
    equals = top_class == labels.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
print("Accuracy of our model on test set using ANN architecture is ", (accuracy/len(test_loader))*100)

Accuracy of our model on test set using ANN architecture is  tensor(97.0771)


# Using CNN Architecture

In [16]:
target1 = target
features1 = features

In [17]:
features1 = features.reshape(features.shape[0],1,28,28)
print(features.shape," to ",features1.shape)

(42000, 784)  to  (42000, 1, 28, 28)


In [18]:
X_train1,X_test1,Y_train1,Y_test1 = train_test_split(features1,target1,test_size=0.2,random_state=42)

In [19]:
#converting numpy array to tensor
train_inputs1 = torch.from_numpy(X_train1)
train_labels1 = torch.from_numpy(Y_train1).type(torch.LongTensor)

test_inputs1 = torch.from_numpy(X_test1)
test_labels1 = torch.from_numpy(Y_test1).type(torch.LongTensor)

In [20]:
# Set batch size
batch_size = 256

# Pytorch train and test sets
train1 = torch.utils.data.TensorDataset(train_inputs1,train_labels1)
test1 = torch.utils.data.TensorDataset(test_inputs1,test_labels1)

# data loader
train_loader1 = torch.utils.data.DataLoader(train1, batch_size = batch_size, shuffle = True)
test_loader1 = torch.utils.data.DataLoader(test1, batch_size = batch_size, shuffle = True)

In [21]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(in_channels=1,out_channels=32,kernel_size=5,stride=1,padding=2),
                                    nn.ReLU(),
                                    nn.MaxPool2d(stride=2,kernel_size=2))
        self.layer2 = nn.Sequential(nn.Conv2d(in_channels=32,out_channels=64,kernel_size=3,stride=1,padding=1),
                                   nn.ReLU(),
                                   nn.MaxPool2d(kernel_size=2,stride=2))
        self.fc1 = nn.Linear(in_features=7*7*64,out_features=1500)
        self.fc2 = nn.Linear(in_features=1500,out_features=800)
        self.fc3 = nn.Linear(800,400)
        self.fc4 = nn.Linear(400,100)
        self.fc5 = nn.Linear(100,10)
        self.softmax = nn.functional.log_softmax
        nn.init.xavier_uniform_(self.fc5.weight)
    
    def forward(self,x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = x.view(x.size(0),-1)
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = nn.functional.relu(self.fc3(x))
        x = nn.functional.relu(self.fc4(x))
        x = self.fc5(x)
        out = self.softmax(x,dim=1)
        return out
        

In [22]:
learning_rate = 0.0015
batch_size = 256
training_epochs = 50
CNN_model = CNN()

In [23]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(CNN_model.parameters(),lr=learning_rate)

In [26]:
import time

In [27]:
training_time = time.time()
for epochs in range(training_epochs):
    print("Training of epoch {} started".format(epochs+1))
    avg_cost = 0
    total_batch = len(X_train)//batch_size
    start_time = time.time()
    for inputs,labels in train_loader1:
        optimizer.zero_grad()
        output = CNN_model(inputs)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
        avg_cost+=loss.data/batch_size
    print("[Epoch: {:<4}] cost = {:_<4.3} took {}s".format(epochs + 1, avg_cost.item(),time.time()-start_time))

print('Learning Finished! in {}s'.format(time.time()-training_time))
    
    

Training of epoch 1 started
[Epoch: 1   ] cost = 0.00322 took 70.86893463134766s
Training of epoch 2 started
[Epoch: 2   ] cost = 0.00325 took 77.6385223865509s
Training of epoch 3 started
[Epoch: 3   ] cost = 0.00533 took 78.26769828796387s
Training of epoch 4 started
[Epoch: 4   ] cost = 0.00341 took 81.65342402458191s
Training of epoch 5 started
[Epoch: 5   ] cost = 0.00341 took 79.82010436058044s
Training of epoch 6 started
[Epoch: 6   ] cost = 0.00237 took 87.53954267501831s
Training of epoch 7 started
[Epoch: 7   ] cost = 0.00342 took 64.5513744354248s
Training of epoch 8 started
[Epoch: 8   ] cost = 0.00361 took 82.22186398506165s
Training of epoch 9 started
[Epoch: 9   ] cost = 0.000672 took 84.16724395751953s
Training of epoch 10 started
[Epoch: 10  ] cost = 0.00324 took 82.8834547996521s
Training of epoch 11 started
[Epoch: 11  ] cost = 0.00256 took 78.45534038543701s
Training of epoch 12 started
[Epoch: 12  ] cost = 0.00213 took 81.15192866325378s
Training of epoch 13 starte

In [28]:
# Turn off gradients for validation
test_loss1 = 0
accuracy1 = 0
with torch.no_grad():
    CNN_model.eval()
for images, labels in test_loader1:
    out = CNN_model(images)
    test_loss1 += criterion(out, labels)

    ps = torch.exp(out)
    # Get our top predictions
    top_p, top_class = ps.topk(1, dim=1)
    equals1 = top_class == labels.view(*top_class.shape)
    accuracy1 += torch.mean(equals1.type(torch.FloatTensor))
print("Accuracy of our model on test set using CNN architecture is ", (accuracy1/len(test_loader1))*100)

Accuracy of our model on test set using CNN architecture is  tensor(99.0357)


In [29]:
torch.save(CNN_model,"../DataSets/Kaggle/digitrecognizerCNN.pt")

  "type " + obj.__name__ + ". It won't be checked "


In [30]:
modelreloaded = torch.load("../DataSets/Kaggle/digitrecognizerCNN.pt")

In [None]:
test = test_data.loc[:].values/255

In [None]:
len(test)

In [None]:
test = test.reshape(len(test),1,28,28)
test_tensor = torch.from_numpy(test)

In [None]:
data = torch.utils.data.TensorDataset(test_tensor)

In [None]:
load = torch.utils.data.DataLoader(data)

In [31]:


class DatasetSubmissionMNIST(torch.utils.data.Dataset):
    def __init__(self, file_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        image = self.data.iloc[index].values.astype(np.uint8).reshape((1, 28, 28))

        
        if self.transform is not None:
            image = self.transform(image)
            
        return image



submissionset = DatasetSubmissionMNIST('../DataSets/Digitrecognizer/test.csv')
submissionloader = torch.utils.data.DataLoader(submissionset, shuffle=False)


In [32]:
ans = torch.LongTensor()
for img in submissionloader:
    img = torch.autograd.Variable(img)
    output = modelreloaded(img)
    _,predicted = torch.max(output[0],1)
    ans = torch.cat([ans,predicted.data],0)
    

RuntimeError: _thnn_conv2d_forward not supported on CPUType for Byte