# This exercise is to implement a convolutional neural network and test the influence of the architecture.

In [1]:
import torch
from torch.autograd import Variable
from torch import nn
from torch.nn import functional as F

import dlc_practical_prologue as prologue

In [2]:
train_input, train_target, test_input, test_target = \
    prologue.load_data(one_hot_labels = True, normalize = True, flatten = False)

class Net(nn.Module):
    def __init__(self,hidden):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5)
        self.fc1 = nn.Linear(256, hidden)
        self.fc2 = nn.Linear(hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=3, stride=3))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2, stride=2))
        x = F.relu(self.fc1(x.view(-1, 256)))
        x = self.fc2(x)
        return x

train_input, train_target = Variable(train_input), Variable(train_target)

model, criterion = Net(200), nn.MSELoss()
eta, mini_batch_size = 1e-1, 100



* Using MNIST
** Reduce the data-set (use --full for the full thing)
** Use 1000 train and 1000 test samples




In [3]:
def train_model(model, train_input, train_target, mini_batch_size):
    for e in range(0, 100):
        sum_loss = 0
        # We do this with mini-batches
        for b in range(0, train_input.size(0), mini_batch_size):
            output = model(train_input.narrow(0, b, mini_batch_size))
            loss = criterion(output, train_target.narrow(0, b, mini_batch_size))
            sum_loss = sum_loss + loss.item()
            model.zero_grad()
            loss.backward()
            for p in model.parameters():
                p.data.sub_(eta * p.grad.data)
        print(e, sum_loss)
        
        #predictions
def compute_nb_compute(model, test_input, test_target, mini_batch_size):
    total_error=0  
    for j in range(0, test_input.size(0), mini_batch_size):
        output = model(test_input.narrow(0, j, mini_batch_size))
        _,pred = output.max(1)
        for k in range(mini_batch_size):
            if test_target[j+k,pred[k]] <= 0:
                total_error= total_error + 1
    return total_error
#test error                    
for i in range(25):
    model=Net(200)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors=compute_nb_compute(model, test_input, test_target, mini_batch_size)
    print('{:d} test_error {:.02f}%'
          .format(i,(100 * nb_test_errors) / test_input.size(0)))
    
    
   






0 0.9114229381084442
1 0.786708191037178
2 0.7168004810810089
3 0.6580366119742393
4 0.6101455613970757
5 0.5773504748940468
6 0.5495828241109848
7 0.5110501646995544
8 0.48046528175473213
9 0.45653530955314636
10 0.47272537648677826
11 0.4313103035092354
12 0.39148445799946785
13 0.3896026872098446
14 0.3718101345002651
15 0.35106197744607925
16 0.3382747136056423
17 0.32805451564490795
18 0.3328677713871002
19 0.32881965674459934
20 0.3047296404838562
21 0.3003132678568363
22 0.2894335500895977
23 0.28381117433309555
24 0.27006947062909603
25 0.2659909203648567
26 0.26230955868959427
27 0.24835882522165775
28 0.24252261966466904
29 0.2524653133004904
30 0.23875216580927372
31 0.22472845762968063
32 0.2253351155668497
33 0.22932669892907143
34 0.217965517193079
35 0.20911866426467896
36 0.20298698171973228
37 0.20436720922589302
38 0.21247053518891335
39 0.19622241705656052
40 0.18884016200900078
41 0.1874116100370884
42 0.19262626767158508
43 0.19758685864508152
44 0.1794973649084568

63 0.14305988606065512
64 0.14647051505744457
65 0.1463863654062152
66 0.14348762203007936
67 0.14223817270249128
68 0.13998730666935444
69 0.13823532778769732
70 0.13701200112700462
71 0.1366683291271329
72 0.1381726348772645
73 0.13692646380513906
74 0.12956231739372015
75 0.12588264606893063
76 0.129978459328413
77 0.1305541256442666
78 0.12324800342321396
79 0.12143897637724876
80 0.12484254874289036
81 0.1275664996355772
82 0.1216212846338749
83 0.11961320973932743
84 0.11977567616850138
85 0.1184134092181921
86 0.11701384000480175
87 0.11467879358679056
88 0.11262853816151619
89 0.1145106740295887
90 0.11801356170326471
91 0.11326307337731123
92 0.10878535080701113
93 0.10857517085969448
94 0.11446032393723726
95 0.11173912137746811
96 0.10566227044910192
97 0.10401820112019777
98 0.10288694314658642
99 0.10196997039020061
3 test_error 7.10%
0 0.9223078563809395
1 0.7994844689965248
2 0.7354758083820343
3 0.6770420596003532
4 0.6261629350483418
5 0.5847909599542618
6 0.5509423762

25 0.26126753911376
26 0.27185847237706184
27 0.25204892084002495
28 0.23954573087394238
29 0.2398832757025957
30 0.24396818690001965
31 0.22937493585050106
32 0.25320329517126083
33 0.2379399724304676
34 0.21054266579449177
35 0.21024402789771557
36 0.20836388692259789
37 0.2026517204940319
38 0.20342419855296612
39 0.21380878426134586
40 0.2066117860376835
41 0.19300266169011593
42 0.1859699711203575
43 0.20424652192741632
44 0.18899658136069775
45 0.17320864275097847
46 0.17399475909769535
47 0.1829104647040367
48 0.17088144458830357
49 0.16419298853725195
50 0.17544853687286377
51 0.18511385843157768
52 0.16322857607156038
53 0.16314067877829075
54 0.1701703816652298
55 0.15804420411586761
56 0.14962370693683624
57 0.14721087086945772
58 0.15250410977751017
59 0.15823852829635143
60 0.1493393313139677
61 0.15492099709808826
62 0.15134257730096579
63 0.14046276453882456
64 0.13799727521836758
65 0.14010045025497675
66 0.14699793700128794
67 0.14034172613173723
68 0.1312330486252904


88 0.11987057980149984
89 0.11754033248871565
90 0.11973588075488806
91 0.11787739023566246
92 0.11183706112205982
93 0.11018029134720564
94 0.11552058067172766
95 0.11976806912571192
96 0.11200516391545534
97 0.10784799326211214
98 0.10596426110714674
99 0.10509873554110527
10 test_error 6.90%
0 0.9069950953125954
1 0.8255418762564659
2 0.7753104045987129
3 0.7279530093073845
4 0.681362509727478
5 0.6363313756883144
6 0.5946067050099373
7 0.5582358054816723
8 0.5280527882277966
9 0.5033182092010975
10 0.48223748430609703
11 0.4624144360423088
12 0.44229889661073685
13 0.4228380024433136
14 0.40643754228949547
15 0.3941662311553955
16 0.3910098299384117
17 0.36638203263282776
18 0.348006010055542
19 0.3662705160677433
20 0.33179246075451374
21 0.315999049693346
22 0.30830568447709084
23 0.3155390992760658
24 0.31684996001422405
25 0.28131589852273464
26 0.2789441365748644
27 0.2869705017656088
28 0.2779066562652588
29 0.255490742623806
30 0.2591141052544117
31 0.2598269749432802
32 0.2

51 0.16602643206715584
52 0.16705044731497765
53 0.16115843039005995
54 0.15961912088096142
55 0.16460649203509092
56 0.15662520751357079
57 0.15568212419748306
58 0.15701472479850054
59 0.14910588692873716
60 0.14923171885311604
61 0.1482120854780078
62 0.14283379819244146
63 0.14038949646055698
64 0.14122383669018745
65 0.14219446294009686
66 0.14062101859599352
67 0.1368167269974947
68 0.13501844834536314
69 0.13398913200944662
70 0.13226664066314697
71 0.13083809055387974
72 0.12796935252845287
73 0.12626616563647985
74 0.12774598691612482
75 0.12621068954467773
76 0.12294683046638966
77 0.12296121753752232
78 0.12265033088624477
79 0.12019075639545918
80 0.11875455547124147
81 0.11650637816637754
82 0.1148225236684084
83 0.11506258510053158
84 0.1152926292270422
85 0.11372898612171412
86 0.11179489456117153
87 0.11183639243245125
88 0.10991922114044428
89 0.10811045859009027
90 0.10643829125910997
91 0.10481741838157177
92 0.10644312296062708
93 0.10672356467694044
94 0.1039468180

12 0.3752246983349323
13 0.3916650451719761
14 0.3479282334446907
15 0.33503844402730465
16 0.33148416317999363
17 0.31748244166374207
18 0.2997220903635025
19 0.2923943065106869
20 0.3134753443300724
21 0.2944911625236273
22 0.2904530744999647
23 0.2671208381652832
24 0.26142232678830624
25 0.2524161282926798
26 0.2654981594532728
27 0.24148213677108288
28 0.2363433502614498
29 0.26907716877758503
30 0.22813303396105766
31 0.2151760384440422
32 0.21757544204592705
33 0.2214456293731928
34 0.20877122692763805
35 0.20265180431306362
36 0.22092296555638313
37 0.232655867934227
38 0.19920221343636513
39 0.20295837335288525
40 0.18765879422426224
41 0.1845992673188448
42 0.18455061968415976
43 0.18892482947558165
44 0.1872191894799471
45 0.17951475363224745
46 0.17983280681073666
47 0.17054956778883934
48 0.18544646725058556
49 0.16883709281682968
50 0.15838916320353746
51 0.1697556572034955
52 0.1636038413271308
53 0.16301147919148207
54 0.16074430383741856
55 0.15195137169212103
56 0.155

74 0.12424747925251722
75 0.11995998956263065
76 0.11740362737327814
77 0.11904392670840025
78 0.12141005415469408
79 0.11584559734910727
80 0.1142224371433258
81 0.12234766129404306
82 0.11872970778495073
83 0.1137170372530818
84 0.11367963161319494
85 0.11006216611713171
86 0.10651468392461538
87 0.10536527913063765
88 0.10732189938426018
89 0.10832828655838966
90 0.10384587571024895
91 0.1044832980260253
92 0.11059883143752813
93 0.10597552824765444
94 0.10272477846592665
95 0.10213293973356485
96 0.09938896913081408
97 0.0973439235240221
98 0.09748721867799759
99 0.09870266821235418
21 test_error 7.00%
0 0.8782722428441048
1 0.7754976004362106
2 0.7077341303229332
3 0.6548253148794174
4 0.6109516471624374
5 0.5665258951485157
6 0.528563816100359
7 0.4981091171503067
8 0.4752190597355366
9 0.45281440019607544
10 0.4174726828932762
11 0.425808098167181
12 0.41594986617565155
13 0.3782750405371189
14 0.3734884448349476
15 0.34535036608576775
16 0.3462349958717823
17 0.3475633747875690

In [None]:
train_model(model, train_input, train_target, mini_batch_size)

In [None]:
test_target

In [4]:
for i in [10,50,200,500,1000]:
    model=Net(i)
    train_model(model, train_input, train_target, mini_batch_size)
    nb_test_errors=compute_nb_compute(model, test_input, test_target, mini_batch_size)
    print('{:d} test_error {:.02f}%'
          .format(i,(100 * nb_test_errors) / test_input.size(0)))
    
    

0 1.2356549054384232
1 0.9899751991033554
2 0.8968219310045242
3 0.8545269817113876
4 0.8315159976482391
5 0.8120696917176247
6 0.8037835508584976
7 0.779967725276947
8 0.7570804134011269
9 0.7430567741394043
10 0.7339565083384514
11 0.725640133023262
12 0.705600418150425
13 0.7064817920327187
14 0.6982610374689102
15 0.6809043139219284
16 0.6679994091391563
17 0.6578317917883396
18 0.6484716236591339
19 0.6412962339818478
20 0.6375528909265995
21 0.6330124288797379
22 0.6203654445707798
23 0.6144750714302063
24 0.6359355822205544
25 0.6049691028892994
26 0.5954947918653488
27 0.5906554386019707
28 0.5868731401860714
29 0.5826430507004261
30 0.5769464261829853
31 0.5702421218156815
32 0.5651380531489849
33 0.5608419366180897
34 0.5569651536643505
35 0.5537548810243607
36 0.5520054139196873
37 0.5545418187975883
38 0.553274679929018
39 0.5422253273427486
40 0.5383787043392658
41 0.5462152883410454
42 0.5408135689795017
43 0.5303358696401119
44 0.5260577946901321
45 0.5229189097881317
46

65 0.13540377374738455
66 0.13311388529837132
67 0.13235137704759836
68 0.13223919365555048
69 0.1294406009837985
70 0.1272042728960514
71 0.1282707629725337
72 0.1297671301290393
73 0.12628283351659775
74 0.12501809746026993
75 0.1236394764855504
76 0.12046015728265047
77 0.12038727104663849
78 0.1235675634816289
79 0.1231840904802084
80 0.1172892153263092
81 0.11200323980301619
82 0.1106253769248724
83 0.11673439387232065
84 0.11299687810242176
85 0.10860747192054987
86 0.11456065531820059
87 0.1136795524507761
88 0.10655283369123936
89 0.1054677851498127
90 0.11068998742848635
91 0.10675892233848572
92 0.10106521379202604
93 0.10190895479172468
94 0.10162560362368822
95 0.09928604029119015
96 0.10091338027268648
97 0.1049458421766758
98 0.10229041147977114
99 0.10085556656122208
500 test_error 6.70%
0 0.8896979093551636
1 0.7651091441512108
2 0.6806166544556618
3 0.6107298545539379
4 0.5557088069617748
5 0.526993203908205
6 0.49443645775318146
7 0.4404837228357792
8 0.42891384288668

In [5]:
class Net2(nn.Module):
    def __init__(self,hidden):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=5)
        self.conv3 = nn.Conv2d(32,64 , kernel_size=2)
        self.fc1 = nn.Linear(9*64, hidden)
        self.fc2 = nn.Linear(hidden, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), kernel_size=2))
        x = F.relu(F.max_pool2d(self.conv2(x), kernel_size=2))
        x = F.relu(self.conv3(x))
        x = F.relu(self.fc1(x.view(-1, 9*64)))
        x = self.fc2(x)
        return x

train_input, train_target = Variable(train_input), Variable(train_target)

model, criterion = Net(200), nn.MSELoss()
eta, mini_batch_size = 1e-1, 100

# Three convolutional layers

In [6]:
model=Net2(i)
train_model(model, train_input, train_target, mini_batch_size)
nb_test_errors=compute_nb_compute(model, test_input, test_target, mini_batch_size)
print('{:d} test_error {:.02f}%'
          .format(i,(100 * nb_test_errors) / test_input.size(0)))
    
    

0 0.913086511194706
1 0.8459604829549789
2 0.8026556521654129
3 0.7522978708148003
4 0.6969285905361176
5 0.6431249640882015
6 0.5985627472400665
7 0.5646274127066135
8 0.5370636284351349
9 0.5132698528468609
10 0.49350589513778687
11 0.47785111516714096
12 0.4598428085446358
13 0.4418802559375763
14 0.4284229204058647
15 0.418483704328537
16 0.40500587970018387
17 0.3875449113547802
18 0.377601720392704
19 0.39023715257644653
20 0.36479436978697777
21 0.34448286332190037
22 0.33898106403648853
23 0.3414756879210472
24 0.3223242051899433
25 0.3092042412608862
26 0.3022250458598137
27 0.3093209136277437
28 0.2980999480932951
29 0.2781249526888132
30 0.2733121644705534
31 0.2801634185016155
32 0.26957553438842297
33 0.252764692530036
34 0.2551009990274906
35 0.25989568047225475
36 0.2394920475780964
37 0.2324938178062439
38 0.23072032630443573
39 0.2325668390840292
40 0.22539947554469109
41 0.2211578916758299
42 0.22258144803345203
43 0.2142724022269249
44 0.213254202157259
45 0.20573928