In [0]:
import numpy as np
import torch
import torch.nn as nn
from torch.nn import init
import torch.optim as optim
import math
import random
import os
from pathlib import Path 
import time
from tqdm import tqdm
import json
# from tqdm import tqdm_notebook as tqdm

In [0]:
def fetch_data():
	with open('training.json') as training_f:
		training = json.load(training_f)
	with open('validation.json') as valid_f:
		validation = json.load(valid_f)
	# If needed you can shrink the training and validation data to speed up somethings but this isn't always safe to do by setting k < 16000
	# k = #fill in
	# training = random.shuffle(training)
	# validation = random.shuffle(validation)
	# training, validation = training[:k], validation[:(k // 10)]
	tra = []
	val = []
	for elt in training:
		tra.append((elt["text"].split(),int(elt["stars"]-1)))
	for elt in validation:
		val.append((elt["text"].split(),int(elt["stars"]-1)))
    
	return tra, val

In [0]:
# train, valid = fetch_data()
# valid[99]

In [0]:
unk = '<UNK>'
# Consult the PyTorch documentation for information on the functions used below:
# https://pytorch.org/docs/stable/torch.html

class FFNN(nn.Module):
    def __init__(self, input_dim, h):
            super(FFNN, self).__init__()
            self.h = h
            self.W1 = nn.Linear(input_dim, h)
            self.activation = nn.ReLU()
            # self.activation = nn.Hardtanh()
            # self.activation = nn.LogSigmoid() # The rectified linear unit; one valid choice of activation function
            self.W2 = nn.Linear(h, 5)  #Changed
            # The below two lines are not a source for an error
            self.softmax = nn.LogSoftmax() # The softmax function that converts vectors into probability distributions; computes log probabilities for computational benefits
            self.loss = nn.NLLLoss() # The cross-entropy/negative log likelihood loss taught in class

    def compute_Loss(self, predicted_vector, gold_label):
        return self.loss(predicted_vector, gold_label)

    def forward(self, input_vector):
        # The z_i are just there to record intermediary computations for your clarity
        z1 = self.W1(input_vector)
        z3 = self.activation(z1)            # CHANGED
        z2 = self.W2(z3)
        predicted_vector = self.softmax(self.activation(z2))
        return predicted_vector


# Returns: 
# vocab = A set of strings corresponding to the vocabulary
def make_vocab(data):
    vocab = set()
    for document, _ in data:
        for word in document:
            vocab.add(word)
    return vocab 


# Returns:
# vocab = A set of strings corresponding to the vocabulary including <UNK>
# word2index = A dictionary mapping word/token to its index (a number in 0, ..., V - 1)
# index2word = A dictionary inverting the mapping of word2index
def make_indices(vocab):
    vocab_list = sorted(vocab)
    vocab_list.append(unk)
    word2index = {}
    index2word = {}
    for index, word in enumerate(vocab_list):
        word2index[word] = index 
        index2word[index] = word 
    vocab.add(unk)
    return vocab, word2index, index2word 


# Returns:
# vectorized_data = A list of pairs (vector representation of input, y)
def convert_to_vector_representation(data, word2index):
    vectorized_data = []
    for document, y in data:
        vector = torch.zeros(len(word2index)) 
        for word in document:
            index = word2index.get(word, word2index[unk])
            vector[index] += 1
        vectorized_data.append((vector, y))
    return vectorized_data


def main(hidden_dim, number_of_epochs):
     print("Fetching data")
     train_data, valid_data = fetch_data() # X_data is a list of pairs (document, y); y in {0,1,2,3,4}
     vocab = make_vocab(train_data)
     vocab, word2index, index2word = make_indices(vocab)
     print("Fetched and indexed data")
     train_data = convert_to_vector_representation(train_data, word2index) # vocab corresponding to unk will always be zero.
     valid_data = convert_to_vector_representation(valid_data, word2index)
     print("Vectorized data")
    #  print(len(train_data[0][0]))
    #  print(len(valid_data[0][0]))    
     
     model = FFNN(input_dim = len(vocab), h = hidden_dim)
     model = model.cuda()
    #  optimizer = optim.SGD(model.parameters(),lr=0.01, momentum=0.9)
     optimizer = optim.Adam(model.parameters())
     print("Training for {} epochs".format(number_of_epochs))
     for epoch in range(number_of_epochs):
         model.train()
        #  optimizer.zero_grad()    # CHANGED
         loss = None
         correct = 0
         total = 0
         start_time = time.time()
         print("Training started for epoch {}".format(epoch + 1))
         random.shuffle(train_data) # Good practice to shuffle order of training data
         minibatch_size = 16 
         N = len(train_data) 
         for minibatch_index in tqdm(range(N // minibatch_size), position=0, leave=False):
             optimizer.zero_grad()
             loss = None
             random_minibatch = random.sample(range(minibatch_size), minibatch_size)   #CHANGED
            #  for example_index in range(minibatch_size):
             for example_index in random_minibatch:
                 input_vector, gold_label = train_data[minibatch_index * minibatch_size + example_index]
                 input_vector = input_vector.cuda()
                 gold_label = torch.tensor([gold_label]).cuda()
                 predicted_vector = model(input_vector)
                 predicted_label = torch.argmax(predicted_vector)
                #  print(predicted_vector)
                #  print(predicted_label)
                #  print(torch.tensor(gold_label))
                #  break
                 correct += int(predicted_label == gold_label)
                 total += 1
                 example_loss = model.compute_Loss(predicted_vector.view(1,-1), gold_label) # torch.tensor([gold_label])
                 if loss is None:
                     loss = example_loss
                 else:
                     loss += example_loss

             loss = loss / minibatch_size      #CHANGED
             loss = loss.cuda()
             loss.backward()
             optimizer.step()
         print("Training completed for epoch {}".format(epoch + 1))
         print("Training accuracy for epoch {}: {}".format(epoch + 1, correct / total))
         print("Training time for this epoch: {}".format(time.time() - start_time))
         
         loss = None
         correct = 0
         total = 0
         start_time = time.time()
         print("Validation started for epoch {}".format(epoch + 1))
         random.shuffle(valid_data) # Good practice to shuffle order of validation data
         minibatch_size = 16 
         N = len(valid_data)
         with torch.no_grad(): # CHANGED
          # model.eval()           
          for minibatch_index in tqdm(range(N // minibatch_size), position=0, leave=False):
              optimizer.zero_grad()     # CHANGED
              loss = None
              for example_index in range(minibatch_size):
                  input_vector, gold_label = valid_data[minibatch_index * minibatch_size + example_index]
                  input_vector = input_vector.cuda()
                  gold_label = torch.tensor([gold_label]).cuda()
                  predicted_vector = model(input_vector)
                  predicted_label = torch.argmax(predicted_vector)
                  # if predicted_label == gold_label:
                  #   print(minibatch_index * minibatch_size + example_index)
                  correct += int(predicted_label == gold_label)
                  total += 1
                  example_loss = model.compute_Loss(predicted_vector.view(1,-1), gold_label)
                  if loss is None:
                      loss = example_loss
                  else:
                      loss += example_loss
              loss = loss / minibatch_size
              loss = loss.cuda()
              # loss.backward()                # CHANGED
              # optimizer.step()               # CHANGED
          print("Validation completed for epoch {}".format(epoch + 1))
          print("Validation accuracy for epoch {}: {}".format(epoch + 1, correct / total))
          print("Validation time for this epoch: {}".format(time.time() - start_time))
          print("the correct number {}".format(correct))



In [0]:
hidden_dim = 32
number_of_epochs = 1
main(hidden_dim=hidden_dim, number_of_epochs=number_of_epochs)
# if __name__ == '__main__':
# 	main()

Fetching data
Fetched and indexed data


  0%|          | 5/1000 [00:00<00:23, 41.93it/s]

Vectorized data
Training for 1 epochs
Training started for epoch 1


  6%|▌         | 6/100 [00:00<00:01, 50.25it/s]

Training completed for epoch 1
Training accuracy for epoch 1: 0.3985625
Training time for this epoch: 23.041085720062256
Validation started for epoch 1
0
2
5
11
13
16
17
20
22
23
24
26
34
40
41
43
45
51
55
62
70
71
73
74
75
76
77
78
79
80
86
87
91
93
94
99
100
103
104
105
107
108
109
110
111
114
115
116
118
119
120
126
127
128
129
134
137
138
139
141
142
144
146
148
152
154
156
157


 16%|█▌        | 16/100 [00:00<00:01, 49.74it/s]

159
161
162
164
165
176
177
180
183
187
188
189
191
193
195
196
198
199
200
201
205
206
209
210
212
217
218
222
223
226
227
232
233
237
238
239
240
241
245
247
248
250
259
262
263
264
265
266
268
276
278
281
282
289
295
296
303
304
305
306
307
309
311
315
317


 26%|██▌       | 26/100 [00:00<00:01, 49.13it/s]

320
322
324
327
330
331
332
336
338
340
341
345
348
350
351
352
354
359
360
361
363
364
366
368
370
373
375
376
378
380
382
387
389
390
392
393
394
395
400
405
406
409
410
411
413
414
415
421
424
427
428
430
433
435
436
442
443
444
446
449
450
451
452
457
458
461
462
463
464
466
471
472
474


 36%|███▌      | 36/100 [00:00<00:01, 48.68it/s]

478
479
480
483
485
496
498
502
503
504
505
506
512
514
517
521
524
525
527
529
538
540
541
542
544
546
547
551
553
554
556
557
558
560
561
562
569
570
571
572
573
574
575
577
578
579
580
582
591
592
596
597
598
602
604
606
607
608
609
611
612
614
615
620
621
623
625
627


 46%|████▌     | 46/100 [00:00<00:01, 48.38it/s]

633
638
640
643
644
646
650
651
655
656
657
660
663
665
671
672
678
680
681
682
686
687
688
690
693
695
696
697
698
702
706
707
708
709
710
711
717
718
721
722
723
725
726
727
728
729
732
733
734
736
738
739
741
749
750
751
752
755
756
758
761
765
770
774
776
777
779
781
783
785
787


 56%|█████▌    | 56/100 [00:01<00:00, 48.91it/s]

793
800
801
803
810
811
815
816
818
820
824
825
828
832
833
834
835
842
845
846
849
850
852
856
857
858
861
862
867
878
879
882
884
885
890
892
894
896
898
899
905
911
913
914
917
920
922
923
925
926
928
930
937
940
945
951


 66%|██████▌   | 66/100 [00:01<00:00, 49.04it/s]

953
956
960
961
963
966
967
971
973
974
975
978
979
981
983
986
987
988
989
992
996
997
999
1001
1005
1009
1010
1011
1013
1014
1017
1019
1021
1022
1025
1026
1027
1031
1036
1039
1040
1042
1045
1046
1047
1052
1053
1054
1056
1060
1062
1063
1066
1067
1068
1069
1074
1078
1080
1081
1083
1085
1086
1089
1090
1091
1092
1094
1095
1098
1099
1100
1101
1102
1103
1106
1107
1109


 76%|███████▌  | 76/100 [00:01<00:00, 49.04it/s]

1110
1112
1114
1115
1123
1125
1130
1133
1134
1135
1136
1137
1138
1140
1143
1144
1146
1149
1153
1155
1156
1165
1166
1168
1170
1171
1172
1175
1178
1180
1186
1189
1193
1196
1197
1199
1202
1210
1213
1214
1215
1216
1218
1219
1222
1226
1237
1241
1242
1244
1246
1247
1250
1251
1255
1256
1258
1263


 86%|████████▌ | 86/100 [00:01<00:00, 48.95it/s]

1269
1271
1279
1282
1283
1285
1288
1289
1290
1291
1292
1294
1295
1298
1300
1305
1306
1308
1310
1312
1315
1316
1317
1318
1323
1326
1327
1328
1333
1338
1341
1343
1344
1348
1349
1350
1351
1354
1356
1359
1364
1366
1367
1368
1370
1373
1374
1375
1376
1377
1378
1379
1382
1384
1385
1388
1389
1391
1394
1399
1400
1405
1406
1410
1412
1414
1416
1417
1419
1422
1423
1424


 96%|█████████▌| 96/100 [00:01<00:00, 48.74it/s]

1425
1431
1432
1434
1435
1437
1439
1440
1441
1442
1443
1444
1445
1455
1461
1462
1465
1466
1467
1468
1469
1471
1472
1474
1478
1482
1483
1484
1485
1487
1489
1497
1500
1501
1503
1504
1505
1509
1512
1513
1516
1517
1518
1519
1523
1524
1525
1526
1529
1533
1534
1535
1540
1542
1546
1549
1551
1553
1554
1560
1563
1564
1566
1571
1573
1574
1576




1585
1586
1587
1590
1591
1593
1594
Validation completed for epoch 1
Validation accuracy for epoch 1: 0.426875
Validation time for this epoch: 2.0497145652770996
the correct number 683


Training accuracy for epoch 1: 0.4165 <br>
Validation accuracy for epoch 1: 0.490625
<br><br>
Training accuracy for epoch 10: 0.703875<br>
Validation accuracy for epoch 10: 0.693125
<br><br>
Training accuracy for epoch 10: 0.7051875<br>
Validation accuracy for epoch 10: 0.53125

In [0]:
# for i in tqdm(range(4)):
#   print(i)
random.sample(range(10),10)

[9, 0, 2, 5, 8, 7, 6, 1, 4, 3]