In [1]:
import pandas as pd
import numpy as np
import torch

from torch import nn, optim
from torch.autograd import Variable
import torch.nn.functional as F

from sklearn.utils import shuffle
import pandas as pd

In [2]:
class LeNet(nn.Module):
	def __init__(self):
		super(LeNet, self).__init__()
		self.conv1 = nn.Conv2d(1, 32, (3,3))
		self.conv2 = nn.Conv2d(32, 64, (3,3))
		self.conv3 = nn.Conv2d(64, 64, (3,3))
		self.fc1   = nn.Linear(1600, 128)
		self.fc2   = nn.Linear(128, 10)
	def forward(self, x):
		x = F.relu(self.conv1(x))
		x = F.relu(self.conv2(x))
		x = F.max_pool2d(x, 2, 2)
		x = F.dropout(x, p=0.25, training=self.training)
		x = F.relu(self.conv3(x))
		x = F.max_pool2d(x, 2, 2)
		x = F.dropout(x, p=0.35, training=self.training)

		x = x.view(-1, self.num_flat_features(x))
		x = F.relu(self.fc1(x))
		x = F.dropout(x, p=0.5, training=self.training)
		x = self.fc2(x)
		return x
	def num_flat_features(self, x):
		size = x.size()[1:]
		num_features = 1
		for s in size:
			num_features *= s
		return num_features


In [3]:
net = LeNet()
print (net)

LeNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=1600, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


In [4]:
use_gpu = torch.cuda.is_available()
if use_gpu:
	net = net.cuda()
	print ('USE GPU')
else:
	print ('USE CPU')

USE GPU


In [5]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)

print ("1. Loading data")
train = pd.read_csv("train.csv").values
train = shuffle(train)
test  = pd.read_csv("test.csv").values

1. Loading data


In [6]:
print ("2. Converting data")
X_data  = train[:, 1:].reshape(train.shape[0], 1, 28, 28)
X_data  = X_data.astype(float)
X_data /= 255.0
X_data  = torch.from_numpy(X_data);
X_label = train[:,0];
X_label = X_label.astype(int);
X_label = torch.from_numpy(X_label);
X_label = X_label.view(train.shape[0],-1);
print (X_data.size(), X_label.size())

2. Converting data
torch.Size([42000, 1, 28, 28]) torch.Size([42000, 1])


In [7]:
print ("3. Training phase")
nb_train = train.shape[0]
nb_epoch = 30000
nb_index = 0
nb_batch = 16

3. Training phase


In [8]:
for epoch in range(nb_epoch):
	if nb_index + nb_batch >= nb_train:
		nb_index = 0
	else:
		nb_index = nb_index + nb_batch

	mini_data  = Variable(X_data[nb_index:(nb_index+nb_batch)].clone())
	mini_label = Variable(X_label[nb_index:(nb_index+nb_batch)].clone(), requires_grad = False)
	mini_data  = mini_data.type(torch.FloatTensor)
	mini_label = mini_label.type(torch.LongTensor)
	if use_gpu:
		mini_data  = mini_data.cuda()
		mini_label = mini_label.cuda()
	optimizer.zero_grad()
	mini_out   = net(mini_data)
	mini_label = mini_label.view(nb_batch)
	mini_loss  = criterion(mini_out, mini_label)
	mini_loss.backward()
	optimizer.step()

	if (epoch + 1) % 2000 == 0:
		print("Epoch = %d, Loss = %f" %(epoch+1, mini_loss.data))
        #print("Epoch = %d, Loss = %f" %(epoch+1, mini_loss.data[0]))


Epoch = 2000, Loss = 2.270382
Epoch = 4000, Loss = 2.214530
Epoch = 6000, Loss = 0.850757
Epoch = 8000, Loss = 0.582284
Epoch = 10000, Loss = 0.520223
Epoch = 12000, Loss = 0.594253
Epoch = 14000, Loss = 0.149828
Epoch = 16000, Loss = 0.168479
Epoch = 18000, Loss = 0.113711
Epoch = 20000, Loss = 0.142033
Epoch = 22000, Loss = 0.165984
Epoch = 24000, Loss = 0.327500
Epoch = 26000, Loss = 0.282366
Epoch = 28000, Loss = 0.037631
Epoch = 30000, Loss = 0.318886


In [9]:
print ("4. Testing phase")

Y_data  = test.reshape(test.shape[0], 1, 28, 28)
Y_data  = Y_data.astype(float)
Y_data /= 255.0
Y_data  = torch.from_numpy(Y_data);
print (Y_data.size())
nb_test = test.shape[0]

net.eval()

final_prediction = np.ndarray(shape = (nb_test, 2), dtype=int)

4. Testing phase
torch.Size([28000, 1, 28, 28])


In [10]:
for each_sample in range(nb_test):
	sample_data = Variable(Y_data[each_sample:each_sample+1].clone())
	sample_data = sample_data.type(torch.FloatTensor)
	if use_gpu:
		sample_data = sample_data.cuda()
	sample_out = net(sample_data)
	_, pred = torch.max(sample_out, 1)
	final_prediction[each_sample][0] = 1 + each_sample
	final_prediction[each_sample][1] = pred.data
	if (each_sample + 1) % 2000 == 0:
		print("Total tested = %d" %(each_sample + 1))

Total tested = 2000
Total tested = 4000
Total tested = 6000
Total tested = 8000
Total tested = 10000
Total tested = 12000
Total tested = 14000
Total tested = 16000
Total tested = 18000
Total tested = 20000
Total tested = 22000
Total tested = 24000
Total tested = 26000
Total tested = 28000


In [11]:
print ('5. Generating submission file')

submission = pd.DataFrame(final_prediction, dtype=int, columns=['ImageId', 'Label'])
submission.to_csv('pytorch_new.csv', index=False, header=True)

# end

5. Generating submission file
