# Load data

In [120]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

In [121]:
X, y = fetch_openml('mnist_784', version=1, return_X_y=True)

In [122]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [129]:
X_train.shape

(49000, 784)

# Define Eval

In [51]:
def do_eval(y_true, y_pred):
    from sklearn.metrics import accuracy_score
    from sklearn.metrics import f1_score
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='micro')
    pr = precision_score(y_true, y_pred, average='micro')
    recall = recall_score(y_true, y_pred, average='micro')
    print('acc = {}, f1 = {}, pr = {}, recall = {}'.format(acc, f1, pr, recall))

# Train with sklearn

In [77]:
from sklearn.neural_network import MLPClassifier

In [80]:
clf = MLPClassifier(hidden_layer_sizes=100, activation='logistic', max_iter=500, batch_size=200)

In [81]:
clf.fit(X_train, y_train)

MLPClassifier(activation='logistic', batch_size=200, hidden_layer_sizes=100,
              max_iter=500)

In [82]:
clf.predict_proba(X_test[0].reshape(1, -1))

array([[5.61777051e-06, 3.07702857e-05, 4.77922998e-04, 5.42544518e-03,
        1.93008466e-07, 8.73714400e-05, 1.25508631e-06, 2.94671005e-06,
        9.93914794e-01, 5.36834663e-05]])

In [83]:
y_pred = clf.predict(X_test)

In [84]:
do_eval(y_test, y_pred)

acc = 0.9541904761904761, f1 = 0.9541904761904761, pr = 0.9541904761904761, recall = 0.9541904761904761


## notes
1. 3 layer(input, 50 neuron, output ) 0.946
2. 4 layer (input, 50, 50, output) 0.958
3. 3 layer (input, 100, output) 0.96
4. 4 layer (input, 100, 100, output) 0.968
5. 3 layer (input, 100, output), relu 0.96, logistic 0.95, tanh 0.94, identity 0.89

# Training with torch
pytorch core component:
+ torch.Tensor
+ torch.nn, torch.functional
+ torch.optim
+ torch.autograd

In [91]:
import torch
import numpy as np
x = torch.ones(1, requires_grad=True)
print(x.grad)

None


## auto grad

In [86]:
y = x + 2
z = y * y * 2

In [87]:
z.backward()

In [88]:
print(x.grad)

tensor([12.])


In [213]:
class Perceptron(torch.nn.Module):
    def __init__(self):
        super(Perceptron, self).__init__()
        self.fc = torch.nn.Linear(1, 1)
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        output = self.fc(x)
        output = self.relu(x)
        return output

In [215]:
net = Perceptron()
for name, param in net.named_parameters():
    print(name, param.size())

fc.weight torch.Size([1, 1])
fc.bias torch.Size([1])


# Define a 3-Layer NN
https://medium.com/biaslyai/pytorch-introduction-to-neural-network-feedforward-neural-network-model-e7231cff47cb

In [127]:
# 3 layer NN
class Feedforward(torch.nn.Module):
    def __init__(self, input_size, hidden_size):
        super(Feedforward, self).__init__()
        self.input_size = input_size
        self.hidden_size  = hidden_size
        self.hid1 = torch.nn.Linear(self.input_size, self.hidden_size)
        self.relu = torch.nn.ReLU()
        self.output = torch.nn.Linear(self.hidden_size, 10)
        self.softmax = torch.nn.Softmax(dim=1)

    def forward(self, x):
        hidden = self.relu(self.hid1(x))
        output = self.softmax(self.output(hidden))
        return output

## 1. define a NN

In [217]:
model = Feedforward(784, 50)
model

Feedforward(
  (hid1): Linear(in_features=784, out_features=50, bias=True)
  (relu): ReLU()
  (output): Linear(in_features=50, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)

In [197]:
for name, param in model.named_parameters():
    print(name, param.size())

hid1.weight torch.Size([50, 784])
hid1.bias torch.Size([50])
output.weight torch.Size([10, 50])
output.bias torch.Size([10])


In [333]:
784* 50

39200

## 2. prepare data

In [224]:
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.LongTensor([int(x) for x in y_train])
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.LongTensor([int(x) for x in y_test])

In [225]:
model(X_train_tensor[0].reshape([1, -1]))

tensor([[4.5152e-07, 3.0932e-09, 2.3359e-10, 1.0828e-26, 1.8537e-23, 5.9487e-15,
         2.6862e-29, 1.0000e+00, 1.2232e-15, 1.5186e-38]],
       grad_fn=<SoftmaxBackward>)

## 3. define loss function

In [226]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)

## 4. train the model

In [227]:
model.eval()
y_pred_tensor = model(X_test_tensor)
before_train_loss = criterion(y_pred_tensor, y_test_tensor)
print('Test loss before training' , before_train_loss.item())

Test loss before training 2.3226265907287598


In [228]:
import time
tic = time.time()
model.train()
epoch = 2000
for epoch in range(epoch):
    optimizer.zero_grad()
    # forward pass
    y_pred_tensor = model(X_train_tensor)
    train_loss = criterion(y_pred_tensor, y_train_tensor)
    if epoch % 100 == 0:
        print("epoch {}, train loss {}".format(epoch, train_loss.item()))
    # backword pass
    train_loss.backward()
    optimizer.step()
toc = time.time()
print(toc - tic, "s")

epoch 0, train loss 2.3230514526367188
epoch 100, train loss 1.7942602634429932
epoch 200, train loss 1.6264363527297974
epoch 300, train loss 1.604680061340332
epoch 400, train loss 1.5978587865829468
epoch 500, train loss 1.593381643295288
epoch 600, train loss 1.590737223625183
epoch 700, train loss 1.5884721279144287
epoch 800, train loss 1.586982011795044
epoch 900, train loss 1.585553765296936
epoch 1000, train loss 1.5843391418457031
epoch 1100, train loss 1.5832449197769165
epoch 1200, train loss 1.5826247930526733
epoch 1300, train loss 1.5819228887557983
epoch 1400, train loss 1.581292748451233
epoch 1500, train loss 1.580118179321289
epoch 1600, train loss 1.5792722702026367
epoch 1700, train loss 1.578666090965271
epoch 1800, train loss 1.5779352188110352
epoch 1900, train loss 1.577486276626587
335.93100905418396 s


## 5. do eval

In [230]:
y_pred_tensor = model(X_test_tensor)

In [231]:
y_pred_for_skearn = [str(v) for v in torch.argmax(y_pred_tensor, dim=1).numpy()]

In [232]:
do_eval(y_pred_for_skearn, y_test)

acc = 0.8601904761904762, f1 = 0.8601904761904762, pr = 0.8601904761904762, recall = 0.8601904761904762


# Train with CNN(conv1d)

## 1. prepare data

In [255]:
X_train_tensor = torch.FloatTensor(X_train)
X_train_cnn = X_train_tensor.reshape([X_train_tensor.size()[0], 1, 784])
y_train_cnn = torch.LongTensor([int(x) for x in y_train])
X_test_tensor = torch.FloatTensor(X_test)
X_test_cnn = X_test_tensor.reshape([X_test_tensor.size()[0], 1, 784])
y_test_cnn = torch.LongTensor([int(x) for x in y_test])

In [345]:
class SimpleCNN(torch.nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv1d(in_channels=1, out_channels=3, kernel_size=5),
            torch.nn.ReLU()
        ) # [N, 3, 784 - 5 + 1 = 780]
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv1d(3, 1, 5),
            torch.nn.ReLU()
        )# [N, 1, 780 - 5 + 1 = 776]
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(776, 10),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [346]:
simple_cnn_model = SimpleCNN()
simple_cnn_model

SimpleCNN(
  (conv1): Sequential(
    (0): Conv1d(1, 3, kernel_size=(5,), stride=(1,))
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv1d(3, 1, kernel_size=(5,), stride=(1,))
    (1): ReLU()
  )
  (fc): Sequential(
    (0): Linear(in_features=776, out_features=10, bias=True)
    (1): Softmax(dim=1)
  )
)

In [347]:
for name, param in simple_cnn_model.named_parameters():
    print(name, param.size())

conv1.0.weight torch.Size([3, 1, 5])
conv1.0.bias torch.Size([3])
conv2.0.weight torch.Size([1, 3, 5])
conv2.0.bias torch.Size([1])
fc.0.weight torch.Size([10, 776])
fc.0.bias torch.Size([10])


In [348]:
simple_cnn_model(X_train_cnn[0].reshape([1, 1, -1]))

tensor([[9.0634e-05, 1.6884e-03, 2.1729e-03, 1.3160e-01, 5.3248e-03, 6.2637e-02,
         2.0289e-02, 1.3425e-02, 2.6041e-02, 7.3674e-01]],
       grad_fn=<SoftmaxBackward>)

## 2. train the model

In [349]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(simple_cnn_model.parameters(), lr = 0.0001)

In [350]:
import time
tic = time.time()
simple_cnn_model.train()
epoch = 2000
for epoch in range(epoch):
    optimizer.zero_grad()
    # forward pass
    y_pred_cnn = simple_cnn_model(X_train_cnn)
    train_loss = criterion(y_pred_cnn, y_train_cnn)
    if epoch % 100 == 0:
        print("epoch {}, train loss {}".format(epoch, train_loss.item()))
    # backword pass
    train_loss.backward()
    optimizer.step()
toc = time.time()
print(toc - tic, "s")

epoch 0, train loss 2.3638970851898193
epoch 100, train loss 1.7927191257476807
epoch 200, train loss 1.6158777475357056
epoch 300, train loss 1.5797147750854492
epoch 400, train loss 1.5640912055969238
epoch 500, train loss 1.554017186164856
epoch 600, train loss 1.5466773509979248
epoch 700, train loss 1.5411839485168457
epoch 800, train loss 1.5369607210159302
epoch 900, train loss 1.5334538221359253
epoch 1000, train loss 1.5303295850753784
epoch 1100, train loss 1.527725338935852
epoch 1200, train loss 1.5254861116409302
epoch 1300, train loss 1.5234757661819458
epoch 1400, train loss 1.5217124223709106
epoch 1500, train loss 1.520141363143921
epoch 1600, train loss 1.5187087059020996
epoch 1700, train loss 1.5174540281295776
epoch 1800, train loss 1.516321063041687
epoch 1900, train loss 1.5153225660324097
16618.0857899189 s


## 3. eval model

In [352]:
y_pred_cnn = simple_cnn_model(X_test_cnn)
y_pred_for_skearn = [str(v) for v in torch.argmax(y_pred_cnn, dim=1).numpy()]
do_eval(y_pred_for_skearn, y_test)

acc = 0.9186190476190477, f1 = 0.9186190476190477, pr = 0.9186190476190477, recall = 0.9186190476190477


# Train with CNN(conv2d)

## 1. prepare data

In [375]:
X_train_tensor = torch.FloatTensor(X_train)
X_train_cnn2d = X_train_tensor.reshape([X_train_tensor.size()[0], 1, 28, 28])
y_train_cnn2d = torch.LongTensor([int(x) for x in y_train])
X_test_tensor = torch.FloatTensor(X_test)
X_test_cnn2d = X_test_tensor.reshape([X_test_tensor.size()[0], 1, 28, 28])
y_test_cnn2d = torch.LongTensor([int(x) for x in y_test])

## 2. define and train conv network

In [392]:
class CNN2D(torch.nn.Module):
    def __init__(self):
        super(CNN2D, self).__init__()
        self.conv1 = torch.nn.Sequential(
            torch.nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        ) # [N, 6, 24, 24] => max pool => [N, 6, 12, 12]
        self.conv2 = torch.nn.Sequential(
            torch.nn.Conv2d(6, 16, 5),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2)
        )# [N, 16, 8, 8] => max pool => [N, 16, 4, 4]
        self.fc1 = torch.nn.Sequential(
            torch.nn.Linear(16 * 4 * 4, 120),
            torch.nn.ReLU()
        )
        self.fc2 = torch.nn.Sequential(
            torch.nn.Linear(120, 84),
            torch.nn.ReLU()
        )
        self.fc3 = torch.nn.Sequential(
            torch.nn.Linear(84, 10),
            torch.nn.ReLU(),
            torch.nn.Softmax(dim=1)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

In [393]:
cnn2d_model = CNN2D()
cnn2d_model

CNN2D(
  (conv1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc1): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): ReLU()
  )
  (fc2): Sequential(
    (0): Linear(in_features=120, out_features=84, bias=True)
    (1): ReLU()
  )
  (fc3): Sequential(
    (0): Linear(in_features=84, out_features=10, bias=True)
    (1): ReLU()
    (2): Softmax(dim=1)
  )
)

In [394]:
for name, param in cnn2d_model.named_parameters():
    print(name, param.size())

conv1.0.weight torch.Size([6, 1, 5, 5])
conv1.0.bias torch.Size([6])
conv2.0.weight torch.Size([16, 6, 5, 5])
conv2.0.bias torch.Size([16])
fc1.0.weight torch.Size([120, 256])
fc1.0.bias torch.Size([120])
fc2.0.weight torch.Size([84, 120])
fc2.0.bias torch.Size([84])
fc3.0.weight torch.Size([10, 84])
fc3.0.bias torch.Size([10])


In [395]:
cnn2d_model(X_train_cnn2d[0].reshape([1, 1, 28, 28]))

tensor([[9.9993e-01, 1.6051e-09, 1.6051e-09, 1.6051e-09, 1.6051e-09, 1.6051e-09,
         7.0012e-05, 1.6051e-09, 2.4357e-08, 1.6051e-09]],
       grad_fn=<SoftmaxBackward>)

In [396]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cnn2d_model.parameters(), lr = 0.0001)

In [None]:
import time
tic = time.time()
cnn2d_model.train()
epoch = 2000
for epoch in range(epoch):
    optimizer.zero_grad()
    # forward pass
    y_pred_cnn2d = cnn2d_model(X_train_cnn2d)
    train_loss = criterion(y_pred_cnn2d, y_train_cnn2d)
    if epoch % 100 == 0:
        print("epoch {}, train loss {}".format(epoch, train_loss.item()))
    # backword pass
    train_loss.backward()
    optimizer.step()
toc = time.time()
print(toc - tic, "s")

epoch 0, train loss 2.3620917797088623
epoch 100, train loss 2.0110065937042236
epoch 200, train loss 1.7802565097808838
epoch 300, train loss 1.7657623291015625
epoch 400, train loss 1.75925874710083
epoch 500, train loss 1.7555022239685059
epoch 600, train loss 1.677213191986084
epoch 700, train loss 1.66169273853302
epoch 800, train loss 1.6586320400238037
epoch 900, train loss 1.6566377878189087
epoch 1000, train loss 1.4901421070098877


## 3. eval model

In [None]:
y_pred_cnn2d = simple_cnn_model(X_test_cnn2d)
y_pred_for_skearn = [str(v) for v in torch.argmax(y_pred_cnn2d, dim=1).numpy()]
do_eval(y_pred_for_skearn, y_test)

In [317]:
m = torch.nn.MaxPool1d(3, padding=0, stride=1)
input = torch.randn(20, 16, 50)
output = m(input)
output.size()

torch.Size([20, 16, 48])

In [316]:
47/3.0

15.666666666666666