In [1]:
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import transforms
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from PIL import Image

In [2]:
class DatasetFood(Dataset):
    
    def __init__(self, file_path, transform=None):
        self.data = pd.read_csv(file_path)
        self.data['img_name'] = "food-recognition-challenge/train_set/train_set/"+ self.data['img_name']
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        img_name = self.data['img_name'][index]
        image = Image.open(img_name)
        label = self.data['label'][index] - 1
        if self.transform is not None:
            image = self.transform(image)
            
        return image, label

In [3]:
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])


train_dataset = DatasetFood('food-recognition-challenge/train_labels.csv', transform)


In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1,
                                          shuffle=True, num_workers=4)


In [5]:
from torchvision import models
num_classes = 80
model = models.resnet101(pretrained=True)
device = torch.device("cuda" if torch.cuda.is_available() 
                                  else "cpu")

In [6]:
for param in model.parameters():
    param.requires_grad = False

    
model.fc = nn.Sequential(nn.Linear(2048, 512),
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(512, num_classes),
                                 nn.LogSoftmax(dim=1))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.003)
model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [7]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        print("BATCH", i)
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        print("OUTPUTS",outputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

BATCH 0
OUTPUTS tensor([[-3.9943, -4.5387, -4.4552, -4.2421, -4.4956, -4.4521, -4.4008, -4.2444,
         -4.3997, -4.4613, -4.4881, -4.4458, -4.1470, -4.5598, -4.3038, -4.4489,
         -4.4525, -4.2792, -4.1141, -4.3420, -4.1410, -4.6512, -4.4753, -4.7473,
         -4.2285, -4.5155, -4.4185, -4.2905, -4.4559, -4.4484, -4.4102, -4.3197,
         -4.2994, -4.2813, -4.2996, -4.3994, -4.2163, -4.3152, -4.4387, -4.4414,
         -4.4639, -4.0861, -4.3522, -4.3408, -4.6585, -4.7030, -4.3950, -4.4550,
         -4.7754, -4.3310, -4.4782, -4.5658, -4.1792, -4.3530, -4.1966, -4.2610,
         -4.2967, -4.6226, -4.4519, -4.6224, -4.4055, -4.3839, -4.3425, -4.2010,
         -4.2440, -4.2970, -4.4093, -4.3528, -4.5288, -4.4446, -4.3683, -4.4215,
         -4.4880, -4.2419, -4.5014, -4.5820, -4.6503, -4.2370, -4.2727, -4.4490]],
       grad_fn=<LogSoftmaxBackward>)
BATCH 1
OUTPUTS tensor([[ -8.6468,  -9.3935,  -8.5022,  -7.3077,  -9.7129,  -8.1131,  -9.0182,
          -9.3360,  -8.5817,  -0.0164,  

OUTPUTS tensor([[-4.4531, -4.2082, -3.1582, -6.5843, -4.5153, -4.2261, -5.4638, -7.3409,
         -5.7262, -4.1060, -5.6698, -5.5900, -5.0469, -5.1357, -6.2429, -4.0005,
         -6.2608, -5.3344, -4.4870, -6.5415, -5.7596, -4.1221, -4.2757, -6.9836,
         -6.2830, -7.1494, -6.9255, -5.9619, -6.2941, -4.8301, -8.4145, -7.3096,
         -3.8583, -6.5297, -5.1470, -7.1659, -5.1710, -1.9205, -6.0152, -6.2965,
         -5.5580, -5.8593, -6.5290, -6.8402, -8.3039, -3.8542, -5.6521, -4.3418,
         -4.2400, -2.3563, -4.5113, -5.5421, -7.9821, -6.5560, -4.7501, -5.6993,
         -4.9726, -6.1346, -5.4876, -6.7378, -5.4186, -6.2619, -5.4730, -5.9000,
         -5.6496, -7.2006, -6.1855, -5.3108, -6.0481, -1.1250, -7.3161, -5.7353,
         -6.3660, -5.7218, -6.8019, -4.8051, -5.3237, -6.4664, -5.1262, -6.2529]],
       grad_fn=<LogSoftmaxBackward>)
[1,    10] loss: 0.038
BATCH 10
OUTPUTS tensor([[-2.9961, -5.4902, -4.0761, -6.5548, -5.3999, -4.7092, -5.2876, -6.3697,
         -5.2637, -1.9

OUTPUTS tensor([[-2.7642, -4.6796, -4.7636, -8.0943, -1.7850, -5.4409, -5.5584, -5.6931,
         -2.1012, -4.2865, -5.7392, -7.0727, -2.9180, -6.6107, -6.7397, -5.3148,
         -6.2367, -5.4088, -4.7402, -6.6092, -6.2364, -1.9634, -4.7494, -5.9955,
         -8.0152, -6.1440, -7.9459, -5.3682, -5.7486, -7.1745, -8.9270, -6.4407,
         -6.4781, -7.2208, -5.5776, -7.1058, -4.1365, -3.4621, -6.5293, -5.0735,
         -7.0336, -5.3895, -8.1474, -7.7333, -8.9771, -5.4169, -4.5832, -3.9070,
         -5.2773, -4.5134, -5.9648, -5.3417, -8.9402, -7.4274, -5.3739, -7.3350,
         -7.6080, -4.2722, -6.6628, -6.3477, -4.7821, -7.9476, -5.0697, -6.4562,
         -6.0126, -6.4855, -6.5086, -4.9976, -2.9006, -5.2000, -7.1603, -2.3081,
         -7.7441, -6.7262, -6.6277, -6.0221, -6.2186, -7.0617, -5.3403, -6.1044]],
       grad_fn=<LogSoftmaxBackward>)
[1,    20] loss: 0.028
BATCH 20
OUTPUTS tensor([[-3.3528, -4.6008, -5.1678, -7.9546, -1.7269, -6.4595, -5.3445, -5.4620,
         -3.1291, -3.4

OUTPUTS tensor([[-2.8380, -3.2465, -5.4003, -5.2601, -3.9064, -4.3724, -3.7572, -4.6058,
         -5.0866, -3.6469, -5.5594, -5.6818, -4.6050, -3.6815, -5.0253, -3.7423,
         -5.7596, -5.0314, -4.2981, -4.8774, -5.3879, -4.3532, -5.0544, -3.9928,
         -6.2150, -5.2106, -5.5452, -3.9671, -4.9251, -4.6322, -3.3930, -3.7576,
         -4.8938, -4.4317, -5.2757, -5.3707, -3.9729, -4.4744, -5.3754, -4.9250,
         -4.2040, -4.2293, -4.6868, -4.6153, -5.7424, -5.5548, -3.7297, -4.4553,
         -4.0512, -2.8533, -3.7205, -4.1095, -6.0599, -5.5149, -4.1853, -4.4915,
         -6.2553, -3.5240, -5.1761, -5.0412, -3.6779, -5.9509, -5.6019, -4.3309,
         -5.6074, -4.9772, -4.7815, -4.9419, -4.0844, -3.6775, -5.8667, -4.5785,
         -4.7866, -4.8087, -5.0136, -4.6088, -5.5255, -5.5090, -4.5720, -4.3962]],
       grad_fn=<LogSoftmaxBackward>)
[1,    30] loss: 0.028
BATCH 30
OUTPUTS tensor([[-1.8877, -3.8286, -5.5243, -5.2975, -4.5424, -5.2828, -5.1997, -4.9332,
         -4.1855, -3.5

OUTPUTS tensor([[-2.3539, -4.6303, -5.7670, -4.5643, -5.0660, -5.6281, -4.6649, -4.7722,
         -4.6764, -4.0655, -5.7817, -5.2076, -4.9528, -3.8334, -3.4233, -4.6316,
         -4.9840, -4.3798, -4.1763, -5.1600, -4.4526, -4.7842, -5.2652, -3.8701,
         -5.8805, -4.8318, -4.2695, -2.6629, -4.4200, -5.1267, -4.3016, -3.9293,
         -5.2131, -4.6542, -5.9254, -4.2277, -4.7190, -4.7711, -6.1199, -5.0016,
         -3.5394, -4.5202, -4.0070, -4.4640, -4.6588, -5.5884, -4.5725, -4.6018,
         -4.6941, -4.0947, -3.9097, -4.4878, -4.1465, -4.9896, -4.3015, -4.2076,
         -4.8951, -4.0529, -4.6272, -5.0402, -4.4314, -4.7160, -4.8429, -3.4998,
         -5.3253, -4.5184, -4.6220, -5.4590, -3.8699, -3.9840, -5.6538, -4.9915,
         -4.5697, -5.1476, -5.1676, -4.7261, -5.4894, -4.7841, -4.0831, -5.1171]],
       grad_fn=<LogSoftmaxBackward>)
[1,    40] loss: 0.026
BATCH 40
OUTPUTS tensor([[-4.5221, -3.8041, -4.9431, -4.7107, -4.3771, -4.4059, -4.7335, -5.2469,
         -4.7940, -4.0

OUTPUTS tensor([[-4.3304, -4.3700, -4.4356, -4.3915, -4.3406, -4.4477, -4.4351, -4.4232,
         -4.3095, -4.3220, -4.4357, -4.4252, -4.3536, -4.3563, -4.2942, -4.4579,
         -4.4023, -4.4226, -4.3614, -4.3753, -4.4010, -4.3083, -4.4713, -4.3521,
         -4.4208, -4.3945, -4.3965, -4.3233, -4.3782, -4.4079, -4.3601, -4.3059,
         -4.3777, -4.3320, -4.4500, -4.4380, -4.3216, -4.2997, -4.4018, -4.4601,
         -4.3279, -4.4274, -4.4352, -4.4101, -4.3902, -4.4142, -4.4275, -4.3619,
         -4.3402, -4.3253, -4.3238, -4.3699, -4.3402, -4.4515, -4.4040, -4.4340,
         -4.3021, -4.3561, -4.4073, -4.4439, -4.4341, -4.3374, -4.3086, -4.3767,
         -4.3933, -4.4237, -4.4330, -4.3812, -4.3244, -4.3441, -4.4107, -4.3070,
         -4.4302, -4.3172, -4.4159, -4.4038, -4.4352, -4.3999, -4.3961, -4.3972]],
       grad_fn=<LogSoftmaxBackward>)
[1,    50] loss: 0.022
BATCH 50
OUTPUTS tensor([[-4.2432, -5.2474, -5.7602, -4.8397, -2.9737, -5.3069, -5.3144, -6.0171,
         -5.2941, -4.4

OUTPUTS tensor([[-4.0161, -4.3812, -4.4544, -4.5036, -3.8461, -4.6372, -4.6240, -4.7699,
         -4.5047, -4.3317, -4.6684, -4.7495, -4.5108, -4.2619, -4.4766, -4.4739,
         -4.4303, -4.3087, -4.3486, -4.7157, -4.2942, -4.5027, -4.6847, -4.4455,
         -4.7781, -3.9341, -4.4558, -4.2113, -4.3608, -4.2511, -4.5877, -4.3994,
         -4.3661, -4.1491, -4.7101, -4.5287, -4.1985, -4.1847, -4.7437, -4.4895,
         -4.4919, -4.6394, -4.2817, -4.4596, -3.9489, -4.6816, -4.7381, -3.9772,
         -4.3390, -4.2769, -4.5394, -3.8441, -4.1122, -4.3524, -4.7092, -4.4395,
         -4.1536, -4.5357, -4.2886, -4.6069, -4.7408, -3.6791, -4.2069, -4.4138,
         -4.6477, -4.6624, -4.3792, -4.3822, -4.6074, -4.1976, -4.4252, -3.8585,
         -4.2534, -4.5151, -4.5800, -4.4790, -4.7477, -4.4289, -4.5746, -4.6300]],
       grad_fn=<LogSoftmaxBackward>)
[1,    60] loss: 0.024
BATCH 60
OUTPUTS tensor([[-4.0623, -4.3504, -4.4229, -4.4879, -3.9395, -4.6016, -4.5876, -4.6999,
         -4.4592, -4.3

OUTPUTS tensor([[-4.0549, -4.2562, -4.3320, -4.5194, -3.9950, -4.6144, -4.5958, -4.6913,
         -4.4393, -4.3048, -4.6460, -4.6930, -4.4551, -4.1956, -4.3986, -4.5043,
         -4.3037, -4.3961, -4.3728, -4.6344, -4.3798, -4.4375, -4.6330, -4.4015,
         -4.7106, -4.0670, -4.4770, -4.2440, -4.3045, -4.2096, -4.5007, -4.3625,
         -4.4239, -4.2208, -4.6696, -4.5335, -4.2462, -4.2301, -4.6742, -4.5317,
         -4.3022, -4.5991, -4.3870, -4.4837, -4.0237, -4.6199, -4.6594, -4.0949,
         -4.3374, -4.1776, -4.3136, -4.0029, -4.1985, -4.4391, -4.6310, -4.4859,
         -4.1229, -4.3732, -4.3298, -4.6087, -4.6608, -3.8703, -4.2461, -4.3930,
         -4.6163, -4.6249, -4.4451, -4.4378, -4.4960, -4.2515, -4.3078, -3.9458,
         -4.2306, -4.4489, -4.5810, -4.5015, -4.6929, -4.4736, -4.5299, -4.6000]],
       grad_fn=<LogSoftmaxBackward>)
[1,    70] loss: 0.022
BATCH 70
OUTPUTS tensor([[-3.9979, -4.2358, -4.3146, -4.5461, -3.9221, -4.6547, -4.6353, -4.7578,
         -4.4825, -4.3

OUTPUTS tensor([[-3.4981, -4.0224, -4.1910, -4.9764, -3.2932, -5.2408, -5.2099, -5.6366,
         -5.1192, -4.4678, -5.3748, -5.5953, -5.0395, -3.9224, -4.9749, -4.4438,
         -3.9578, -4.4303, -4.6073, -5.5489, -4.4160, -5.1133, -4.8707, -4.8174,
         -5.7013, -3.4309, -4.8210, -4.2678, -4.2605, -3.8101, -5.1968, -4.8053,
         -4.6598, -3.9640, -5.4523, -4.9296, -4.3097, -4.3123, -5.6281, -4.8258,
         -4.2994, -5.2540, -4.3491, -4.8173, -3.2708, -5.4515, -5.5059, -3.6154,
         -4.6125, -3.8962, -4.3824, -3.2406, -4.0860, -4.2671, -5.4713, -4.7375,
         -3.8105, -4.5043, -4.4173, -5.1752, -5.5064, -2.8703, -4.3574, -4.7229,
         -5.3881, -5.3343, -4.3251, -4.6887, -5.2892, -4.2688, -4.1685, -3.1891,
         -3.8712, -5.1386, -5.1525, -4.8910, -5.5740, -4.7737, -5.1370, -5.3102]],
       grad_fn=<LogSoftmaxBackward>)
[1,    80] loss: 0.022
BATCH 80
OUTPUTS tensor([[-3.5421, -4.0174, -4.1810, -4.9286, -3.3615, -5.1751, -5.1445, -5.5380,
         -5.0431, -4.4

OUTPUTS tensor([[-4.2727, -4.2832, -4.3706, -4.4972, -4.2948, -4.5256, -4.5039, -4.4968,
         -4.2620, -4.2612, -4.5396, -4.5206, -4.3066, -4.2855, -4.2458, -4.4445,
         -4.3585, -4.5001, -4.3775, -4.4469, -4.4843, -4.2612, -4.4555, -4.3032,
         -4.5166, -4.3315, -4.4766, -4.2830, -4.3097, -4.3564, -4.3010, -4.2596,
         -4.4719, -4.2722, -4.5376, -4.5107, -4.2796, -4.2534, -4.4857, -4.5600,
         -4.2526, -4.4878, -4.5180, -4.4848, -4.3151, -4.4469, -4.4827, -4.3185,
         -4.2916, -4.2437, -4.2571, -4.3283, -4.2954, -4.4742, -4.4529, -4.5213,
         -4.2331, -4.2907, -4.3529, -4.5542, -4.4159, -4.2763, -4.2605, -4.3265,
         -4.4940, -4.5112, -4.4413, -4.4830, -4.2707, -4.2968, -4.3475, -4.2193,
         -4.3691, -4.2591, -4.5243, -4.4934, -4.5316, -4.4792, -4.4104, -4.4915]],
       grad_fn=<LogSoftmaxBackward>)
[1,    90] loss: 0.022
BATCH 90
OUTPUTS tensor([[-4.2719, -4.2816, -4.3698, -4.4999, -4.2940, -4.5280, -4.5061, -4.4994,
         -4.2612, -4.2

OUTPUTS tensor([[-4.2654, -4.2704, -4.3643, -4.4909, -4.2884, -4.5523, -4.5286, -4.5259,
         -4.2558, -4.2525, -4.5711, -4.4967, -4.3002, -4.2716, -4.2398, -4.4359,
         -4.3488, -4.5181, -4.3872, -4.4754, -4.5031, -4.2549, -4.4468, -4.2971,
         -4.5497, -4.3242, -4.4995, -4.2772, -4.3018, -4.3505, -4.2873, -4.2536,
         -4.4551, -4.2635, -4.5484, -4.5330, -4.2740, -4.2359, -4.4730, -4.5855,
         -4.2413, -4.5111, -4.5355, -4.5065, -4.3084, -4.4679, -4.4537, -4.3133,
         -4.2845, -4.2334, -4.2496, -4.3237, -4.2898, -4.4645, -4.4756, -4.5440,
         -4.2212, -4.2829, -4.3468, -4.5844, -4.3931, -4.2698, -4.2533, -4.3203,
         -4.5256, -4.5399, -4.4282, -4.5082, -4.2634, -4.2894, -4.3412, -4.2100,
         -4.3633, -4.2447, -4.5035, -4.4957, -4.5634, -4.4527, -4.4232, -4.5213]],
       grad_fn=<LogSoftmaxBackward>)
[1,   100] loss: 0.022
BATCH 100
OUTPUTS tensor([[-4.2648, -4.2694, -4.3638, -4.4888, -4.2879, -4.5552, -4.5312, -4.5290,
         -4.2554, -4.

OUTPUTS tensor([[-4.2592, -4.2621, -4.3595, -4.4753, -4.2829, -4.5808, -4.5554, -4.5574,
         -4.2515, -4.2385, -4.6039, -4.4852, -4.2954, -4.2631, -4.2332, -4.4302,
         -4.3421, -4.5367, -4.3989, -4.5065, -4.5224, -4.2505, -4.4410, -4.2926,
         -4.5844, -4.3184, -4.5237, -4.2729, -4.2959, -4.3364, -4.2792, -4.2493,
         -4.4312, -4.2487, -4.5287, -4.5570, -4.2698, -4.2266, -4.4476, -4.6117,
         -4.2339, -4.5368, -4.5533, -4.5296, -4.3030, -4.4431, -4.4314, -4.3092,
         -4.2789, -4.2260, -4.2440, -4.3197, -4.2856, -4.4444, -4.5015, -4.5676,
         -4.2136, -4.2720, -4.3378, -4.6155, -4.3813, -4.2641, -4.2475, -4.3158,
         -4.5585, -4.5703, -4.4202, -4.5339, -4.2578, -4.2834, -4.3366, -4.2029,
         -4.3507, -4.2366, -4.4906, -4.4776, -4.5968, -4.4352, -4.4392, -4.5526]],
       grad_fn=<LogSoftmaxBackward>)
[1,   110] loss: 0.022
BATCH 110
OUTPUTS tensor([[-4.2587, -4.2615, -4.3591, -4.4743, -4.2825, -4.5837, -4.5581, -4.5605,
         -4.2512, -4.

OUTPUTS tensor([[-4.2538, -4.2559, -4.3431, -4.4672, -4.2781, -4.6091, -4.5822, -4.5884,
         -4.2482, -4.2288, -4.6357, -4.4787, -4.2917, -4.2500, -4.2153, -4.4258,
         -4.3369, -4.5556, -4.4117, -4.5371, -4.5418, -4.2470, -4.4367, -4.2892,
         -4.6180, -4.3136, -4.5480, -4.2695, -4.2911, -4.3229, -4.2736, -4.2460,
         -4.4168, -4.2369, -4.5190, -4.5812, -4.2664, -4.2205, -4.4353, -4.6376,
         -4.2282, -4.5628, -4.5713, -4.5530, -4.2983, -4.4318, -4.4203, -4.3057,
         -4.2742, -4.2138, -4.2394, -4.3163, -4.2822, -4.4333, -4.5277, -4.5426,
         -4.2010, -4.2578, -4.3210, -4.6458, -4.3737, -4.2590, -4.2318, -4.3122,
         -4.5904, -4.6003, -4.4143, -4.5593, -4.2532, -4.2782, -4.3283, -4.1894,
         -4.3362, -4.2258, -4.4835, -4.4687, -4.6293, -4.4259, -4.4565, -4.5832]],
       grad_fn=<LogSoftmaxBackward>)
[1,   120] loss: 0.021
BATCH 120
OUTPUTS tensor([[-4.2533, -4.2553, -4.3416, -4.4666, -4.2777, -4.6119, -4.5849, -4.5915,
         -4.2479, -4.

OUTPUTS tensor([[-4.2493, -4.2511, -4.3239, -4.4627, -4.2743, -4.6371, -4.6090, -4.6187,
         -4.2461, -4.2225, -4.6665, -4.4747, -4.2891, -4.2240, -4.2071, -4.4228,
         -4.3330, -4.5751, -4.3878, -4.5671, -4.5618, -4.2448, -4.4336, -4.2764,
         -4.6503, -4.3098, -4.5724, -4.2672, -4.2874, -4.3159, -4.2697, -4.2439,
         -4.4094, -4.2304, -4.5138, -4.6055, -4.2643, -4.2165, -4.4286, -4.6634,
         -4.2239, -4.5889, -4.5575, -4.5767, -4.2946, -4.4262, -4.4022, -4.3034,
         -4.2706, -4.2041, -4.2359, -4.3139, -4.2654, -4.4269, -4.5541, -4.5318,
         -4.1900, -4.2504, -4.3129, -4.6565, -4.3685, -4.2549, -4.2229, -4.3098,
         -4.6214, -4.6296, -4.4099, -4.5847, -4.2496, -4.2740, -4.3121, -4.1807,
         -4.3251, -4.2122, -4.4792, -4.4638, -4.6608, -4.4202, -4.4748, -4.6131]],
       grad_fn=<LogSoftmaxBackward>)
[1,   130] loss: 0.022
BATCH 130
OUTPUTS tensor([[-4.2490, -4.2507, -4.3206, -4.4624, -4.2740, -4.6399, -4.6117, -4.6217,
         -4.2460, -4.

OUTPUTS tensor([[-4.2464, -4.2480, -4.3014, -4.4606, -4.2720, -4.6653, -4.5980, -4.6489,
         -4.2456, -4.2185, -4.6971, -4.4678, -4.2881, -4.2105, -4.1978, -4.4214,
         -4.3307, -4.5957, -4.3788, -4.5970, -4.5830, -4.2441, -4.4322, -4.2648,
         -4.6357, -4.2990, -4.5975, -4.2665, -4.2852, -4.3125, -4.2675, -4.2434,
         -4.4056, -4.2269, -4.5113, -4.6306, -4.2637, -4.2144, -4.4250, -4.6897,
         -4.2212, -4.6155, -4.5450, -4.6011, -4.2852, -4.4238, -4.3934, -4.3026,
         -4.2685, -4.1989, -4.2340, -4.3131, -4.2564, -4.4235, -4.5213, -4.5272,
         -4.1751, -4.2466, -4.3091, -4.6382, -4.3652, -4.2504, -4.2185, -4.3090,
         -4.6521, -4.6589, -4.4073, -4.6105, -4.2477, -4.2714, -4.3054, -4.1759,
         -4.3107, -4.2062, -4.4773, -4.4616, -4.6919, -4.4171, -4.4944, -4.6428]],
       grad_fn=<LogSoftmaxBackward>)
[1,   140] loss: 0.022
BATCH 140
OUTPUTS tensor([[-4.2461, -4.2477, -4.3001, -4.4605, -4.2718, -4.6681, -4.5962, -4.6519,
         -4.2456, -4.

OUTPUTS tensor([[-4.2440, -4.2455, -4.2918, -4.4595, -4.2703, -4.6928, -4.5856, -4.6782,
         -4.2458, -4.2155, -4.7267, -4.4524, -4.2878, -4.2040, -4.1808, -4.4207,
         -4.3201, -4.6166, -4.3754, -4.6261, -4.6043, -4.2442, -4.4148, -4.2600,
         -4.6242, -4.2888, -4.6224, -4.2664, -4.2837, -4.2992, -4.2660, -4.2341,
         -4.4032, -4.2246, -4.5099, -4.6555, -4.2638, -4.2130, -4.4227, -4.7155,
         -4.2192, -4.6417, -4.5398, -4.6254, -4.2741, -4.4227, -4.3889, -4.2845,
         -4.2670, -4.1954, -4.2202, -4.3129, -4.2525, -4.4213, -4.5047, -4.5250,
         -4.1636, -4.2441, -4.3069, -4.6307, -4.3627, -4.2301, -4.2157, -4.3089,
         -4.6818, -4.6875, -4.4053, -4.6361, -4.2464, -4.2694, -4.3021, -4.1726,
         -4.3042, -4.2031, -4.4762, -4.4605, -4.7219, -4.4149, -4.5145, -4.6718]],
       grad_fn=<LogSoftmaxBackward>)
[1,   150] loss: 0.021
BATCH 150
OUTPUTS tensor([[-4.2438, -4.2453, -4.2911, -4.4594, -4.2701, -4.6955, -4.5849, -4.6810,
         -4.2458, -4.

OUTPUTS tensor([[-4.2419, -4.2433, -4.2869, -4.4588, -4.2689, -4.7196, -4.5805, -4.7063,
         -4.2346, -4.2128, -4.7551, -4.4457, -4.2878, -4.1909, -4.1574, -4.4150,
         -4.3013, -4.6374, -4.3740, -4.6540, -4.6254, -4.2445, -4.4064, -4.2575,
         -4.6193, -4.2840, -4.6468, -4.2667, -4.2825, -4.2904, -4.2648, -4.2218,
         -4.4014, -4.2228, -4.5090, -4.6798, -4.2642, -4.2119, -4.4208, -4.7408,
         -4.2175, -4.6672, -4.5374, -4.6492, -4.2690, -4.4221, -4.3859, -4.2773,
         -4.2659, -4.1926, -4.2124, -4.3130, -4.2428, -4.4194, -4.4975, -4.5237,
         -4.1438, -4.2312, -4.3053, -4.6246, -4.3605, -4.2184, -4.2135, -4.3014,
         -4.7103, -4.7150, -4.4037, -4.6611, -4.2453, -4.2676, -4.2886, -4.1699,
         -4.3007, -4.2009, -4.4754, -4.4597, -4.7507, -4.4131, -4.5344, -4.6997]],
       grad_fn=<LogSoftmaxBackward>)
[1,   160] loss: 0.022
BATCH 160
OUTPUTS tensor([[-4.2417, -4.2431, -4.2865, -4.4588, -4.2688, -4.7222, -4.5802, -4.7091,
         -4.2330, -4.

OUTPUTS tensor([[-4.2405, -4.2419, -4.2843, -4.4590, -4.2682, -4.7461, -4.5786, -4.7341,
         -4.2239, -4.2108, -4.7831, -4.4428, -4.2886, -4.1851, -4.1476, -4.3927,
         -4.2904, -4.6585, -4.3741, -4.6817, -4.6469, -4.2457, -4.4031, -4.2567,
         -4.6175, -4.2817, -4.6713, -4.2678, -4.2669, -4.2867, -4.2644, -4.2175,
         -4.4004, -4.2218, -4.5089, -4.6496, -4.2391, -4.2117, -4.4154, -4.7660,
         -4.2165, -4.6926, -4.5367, -4.6731, -4.2667, -4.4223, -4.3761, -4.2746,
         -4.2655, -4.1906, -4.2092, -4.3111, -4.2320, -4.4184, -4.4944, -4.5235,
         -4.1356, -4.2243, -4.3047, -4.6069, -4.3591, -4.2133, -4.2122, -4.2880,
         -4.7384, -4.7423, -4.4028, -4.6860, -4.2451, -4.2666, -4.2803, -4.1680,
         -4.2989, -4.1999, -4.4755, -4.4599, -4.7602, -4.4121, -4.5549, -4.7272]],
       grad_fn=<LogSoftmaxBackward>)
[1,   170] loss: 0.022
BATCH 170
OUTPUTS tensor([[-4.2404, -4.2418, -4.2842, -4.4591, -4.2682, -4.7487, -4.5786, -4.7369,
         -4.2233, -4.

KeyboardInterrupt: 

In [None]:
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft,
                             num_epochs=num_epochs, is_inception=(model_name=="inception"))