In [1]:
import pickle
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [2]:
train_file_name = '../database/CIFAR-10-Train.pkl'

In [3]:
with open(train_file_name, 'rb') as file:
    train_data = pickle.load(file)
train_data

{'data': [array([[[ 59,  43,  50, ..., 158, 152, 148],
          [ 16,   0,  18, ..., 123, 119, 122],
          [ 25,  16,  49, ..., 118, 120, 109],
          ..., 
          [208, 201, 198, ..., 160,  56,  53],
          [180, 173, 186, ..., 184,  97,  83],
          [177, 168, 179, ..., 216, 151, 123]],
  
         [[ 62,  46,  48, ..., 132, 125, 124],
          [ 20,   0,   8, ...,  88,  83,  87],
          [ 24,   7,  27, ...,  84,  84,  73],
          ..., 
          [170, 153, 161, ..., 133,  31,  34],
          [139, 123, 144, ..., 148,  62,  53],
          [144, 129, 142, ..., 184, 118,  92]],
  
         [[ 63,  45,  43, ..., 108, 102, 103],
          [ 20,   0,   0, ...,  55,  50,  57],
          [ 21,   0,   8, ...,  50,  50,  42],
          ..., 
          [ 96,  34,  26, ...,  70,   7,  20],
          [ 96,  42,  30, ...,  94,  34,  34],
          [116,  94,  87, ..., 140,  84,  72]]], dtype=uint8),
  array([[[154, 126, 105, ...,  91,  87,  79],
          [140, 145, 125, .

In [4]:
num_samples = len(train_data['data'])
num_samples

50000

In [5]:
# Process label
onehot_encoder = OneHotEncoder(sparse=False)
labels = np.array(train_data['label'])
# labels = np.array(train_data['label']).reshape(-1, 1)
# labels = onehot_encoder.fit_transform(labels)
# print(labels)
labels = labels.astype(np.int64)
labels.dtype

dtype('int64')

In [6]:
# x_train = torch.from_numpy(np.array(train_data['data']))
y_train = torch.from_numpy(labels)
# x_train
# y_train

In [7]:
# CNN with pyTorch

In [8]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool  = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1   = nn.Linear(16*5*5, 120)
        self.fc2   = nn.Linear(120, 84)
        self.fc3   = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16*5*5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
net = Net()

In [9]:
criterion = nn.CrossEntropyLoss() # use a Classification Cross-Entropy loss
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [10]:
batch_size = 25
num_load_data_times = int(num_samples / batch_size)
num_load_data_times

2000

In [11]:
for epoch in range(1): # loop over the dataset multiple times
    
    running_loss = 0.0
    for i in range(num_load_data_times):
        
        # get the inputs
        inputs = torch.from_numpy(np.array(train_data['data'][i * batch_size:i * batch_size + batch_size]).astype(np.float32))
        labels = y_train[i * batch_size:i * batch_size + batch_size]
        
        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)
        
        # zero the parameter gradients
        optimizer.zero_grad()
        
        # forward + backward + optimize
        
        outputs = net(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()        
        optimizer.step()
        
        # print statistics
        running_loss += loss.data[0]
        if i % 2000 == 1999: # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' % (epoch+1, i+1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1,  2000] loss: 1.873
Finished Training


In [None]:
# ============================
# Debug
inputs = torch.from_numpy(np.array(train_data['data'][i * batch_size:i * batch_size + batch_size]).astype(np.float32))
inputs = Variable(inputs)
outputs = net(inputs)
# inputs.size()
outputs
# ============================

In [12]:
test_file_name = '../database/CIFAR-10-Test.pkl'

In [13]:
with open(test_file_name, 'rb') as file:
    test_data = pickle.load(file)
test_data

{'data': [array([[[158, 159, 165, ..., 137, 126, 116],
          [152, 151, 159, ..., 136, 125, 119],
          [151, 151, 158, ..., 139, 130, 120],
          ..., 
          [ 68,  42,  31, ...,  38,  13,  40],
          [ 61,  49,  35, ...,  26,  29,  20],
          [ 54,  56,  45, ...,  24,  34,  21]],
  
         [[112, 111, 116, ...,  95,  91,  85],
          [112, 110, 114, ...,  95,  91,  88],
          [110, 109, 111, ...,  98,  95,  89],
          ..., 
          [124, 100,  88, ...,  97,  64,  85],
          [116, 102,  85, ...,  82,  82,  64],
          [107, 105,  89, ...,  77,  84,  67]],
  
         [[ 49,  47,  51, ...,  36,  36,  33],
          [ 51,  40,  45, ...,  31,  32,  34],
          [ 47,  33,  36, ...,  34,  34,  33],
          ..., 
          [177, 148, 137, ..., 146, 108, 127],
          [168, 148, 132, ..., 130, 126, 107],
          [160, 149, 132, ..., 124, 129, 110]]], dtype=uint8),
  array([[[235, 231, 232, ..., 233, 233, 232],
          [238, 235, 235, .

In [16]:
num_test_samples = len(test_data['data'])
num_test_samples

10000

In [17]:
test_temp = test_data['data'][0]
test_temp = torch.from_numpy(np.array([test_temp]).astype(np.float32))
test_temp


(0 ,0 ,.,.) = 
  158  159  165  ...   137  126  116
  152  151  159  ...   136  125  119
  151  151  158  ...   139  130  120
      ...         ⋱        ...      
   68   42   31  ...    38   13   40
   61   49   35  ...    26   29   20
   54   56   45  ...    24   34   21

(0 ,1 ,.,.) = 
  112  111  116  ...    95   91   85
  112  110  114  ...    95   91   88
  110  109  111  ...    98   95   89
      ...         ⋱        ...      
  124  100   88  ...    97   64   85
  116  102   85  ...    82   82   64
  107  105   89  ...    77   84   67

(0 ,2 ,.,.) = 
   49   47   51  ...    36   36   33
   51   40   45  ...    31   32   34
   47   33   36  ...    34   34   33
      ...         ⋱        ...      
  177  148  137  ...   146  108  127
  168  148  132  ...   130  126  107
  160  149  132  ...   124  129  110
[torch.FloatTensor of size 1x3x32x32]

In [18]:
test_outputs = net(Variable(test_temp))
test_outputs

Variable containing:
-0.8880 -1.3536  1.7315  2.3006  0.6166  2.2754  0.4962  0.7047 -0.4864 -1.5190
[torch.FloatTensor of size 1x10]

In [19]:
test_data['label'][0]

3

In [20]:
torch.max(test_outputs.data, 1)

(
  2.3006
 [torch.FloatTensor of size 1x1], 
  3
 [torch.LongTensor of size 1x1])

In [22]:
torch.save(net.state_dict(), './train-model.dat')

In [23]:
trained_net = Net()
trained_net.load_state_dict(torch.load('./train-model.dat'))

In [24]:
trained_net(Variable(test_temp))

Variable containing:
-0.8880 -1.3536  1.7315  2.3006  0.6166  2.2754  0.4962  0.7047 -0.4864 -1.5190
[torch.FloatTensor of size 1x10]

In [25]:
test_sample = np.random.rand(3,32,32)
test_sample = torch.from_numpy(test_sample)
trained_net(Variable(test_sample))

AssertionError: 