In [6]:
!pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116
!pip install --upgrade pandas
!pip install --upgrade torchsummary

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu116
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Requirement already up-to-date: pandas in /usr/local/lib/python3.8/dist-packages (1.5.3)
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [7]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
import numpy as np

from ResNet import Bottleneck, ResNet, ResNet50
from torch.utils.data import Dataset
from torch.utils.data.dataset import random_split
from torchvision import models
from torchsummary import summary

In [8]:
class MDSMDataset(Dataset):
    def __init__(self, mdsmdata_file):
        self.df = pd.read_csv(mdsmdata_file)
        rating = self.df[['ReviewID', 'reviewStar']]
        self.rating = rating.drop_duplicates('ReviewID')
        self.height = self.df['ReviewID'].value_counts().max()

        mdsm_body = self.df.drop(['reviewNo', 'reviewStar'], axis=1)
        mdsm_body['imageCnt'] = (mdsm_body['imageCnt'] - mdsm_body['imageCnt'].min())/ (mdsm_body['imageCnt'].max() - mdsm_body['imageCnt'].min())
        mdsm_body['helpfulCnt'] = (mdsm_body['helpfulCnt'] - mdsm_body['helpfulCnt'].mean())/ mdsm_body['helpfulCnt'].std()
        body_height, body_width = mdsm_body.shape;
        self.width = body_width - 1

        dummy_mdsd = np.zeros((body_height, self.height, self.width), np.float32)
        mdsm_index = np.zeros(self.rating['ReviewID'].max()+1, int)
        mdsm_count = np.zeros(self.rating['ReviewID'].max()+1, int)
        mdsm_index.fill(-1)

        max_index = int(0)
        for index, body in mdsm_body.iterrows():
            dummy_index = max_index
            if mdsm_index[int(body['ReviewID'])] != -1:
                dummy_index = mdsm_index[int(body['ReviewID'])]
            else:
                mdsm_index[int(body['ReviewID'])] = dummy_index
                max_index = max_index + 1

            dummy_mdsd[dummy_index, mdsm_count[dummy_index]] = body.drop('ReviewID')
            mdsm_count[dummy_index] = mdsm_count[dummy_index] + 1

        self.mdsm_body = dummy_mdsd

    def __len__(self):
        return self.rating.shape[0]

    def __getitem__(self, idx):
        _tensor = torch.tensor(self.mdsm_body[idx])
        rtn_tensor = _tensor.unsqueeze(0)
        return rtn_tensor, self.rating.iloc[idx, 1]

In [9]:
print('-- Loading dataset--')
dataset = MDSMDataset('amazon_hmdvr_df_tokenized_sentiment_score_extended.csv')

train_size = len(dataset) * 0.8
test_size = len(dataset) - train_size

print('-- Building train and test dataset / dataloader--')
train_dataset, test_dataset = random_split(dataset, [int(train_size),int(test_size)])

trainloader = torch.utils.data.DataLoader(train_dataset, batch_size = 64, shuffle=True, num_workers=0)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size = 64, shuffle=True, num_workers=0)

classes = [0, 1, 2, 3, 4, 5]

net = ResNet50(6, 1).to('cuda')

summary(net, (1, 108, 12))

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor = 0.1, patience=5)

-- Loading dataset--
-- Building train and test dataset / dataloader--
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 64, 54, 6]           3,136
       BatchNorm2d-2            [-1, 64, 54, 6]             128
              ReLU-3            [-1, 64, 54, 6]               0
         MaxPool2d-4            [-1, 64, 27, 3]               0
            Conv2d-5            [-1, 64, 27, 3]           4,160
       BatchNorm2d-6            [-1, 64, 27, 3]             128
              ReLU-7            [-1, 64, 27, 3]               0
            Conv2d-8            [-1, 64, 27, 3]          36,928
       BatchNorm2d-9            [-1, 64, 27, 3]             128
             ReLU-10            [-1, 64, 27, 3]               0
           Conv2d-11           [-1, 256, 27, 3]          16,640
      BatchNorm2d-12           [-1, 256, 27, 3]             512
           Conv2d-13           [

In [19]:
correct = 0
total = 0
EPOCHS = 10
print('-- Start training : ', EPOCHS, 'epochs')
for epoch in range(EPOCHS):
    losses = []
    running_loss = 0
    train_loss = 0
    train_acc = 0
    for i, inp in enumerate(trainloader):
        inputs, labels = inp
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        losses.append(loss.item())

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        train_loss += loss.item()
        
        pred = outputs.data.max(1, keepdim=True)[1]
        train_acc += pred.eq(labels.data.view_as(pred)).sum()

        if i%100 == 0 and i > 0:
            print(f'Loss [{epoch+1}/{EPOCHS}, {i}](epoch, minibatch): ', f'{running_loss / 100:.5f}')
            running_loss = 0.0

    avg_loss = sum(losses)/len(losses)
    scheduler.step(avg_loss)
    
    train_loss /= len(trainloader.dataset)
    print('Train Epoch: {} Average loss: {:.4f} Accuracy : {:.4f}%)'.format(epoch, train_loss, 100. * train_acc / len(trainloader.dataset)))

print('Training Done')

-- Start training :  10 epochs
Loss [1/10, 100](epoch, minibatch):  1.18017
Loss [1/10, 200](epoch, minibatch):  1.15644
Train Epoch: 0 Average loss: 0.0182 Accuracy : 59.2963%)
Loss [2/10, 100](epoch, minibatch):  1.15956
Loss [2/10, 200](epoch, minibatch):  1.17990
Train Epoch: 1 Average loss: 0.0183 Accuracy : 58.8338%)
Loss [3/10, 100](epoch, minibatch):  1.17675
Loss [3/10, 200](epoch, minibatch):  1.14503
Train Epoch: 2 Average loss: 0.0181 Accuracy : 59.1826%)
Loss [4/10, 100](epoch, minibatch):  1.16493
Loss [4/10, 200](epoch, minibatch):  1.15032
Train Epoch: 3 Average loss: 0.0180 Accuracy : 59.0764%)
Loss [5/10, 100](epoch, minibatch):  1.17411
Loss [5/10, 200](epoch, minibatch):  1.13587
Train Epoch: 4 Average loss: 0.0180 Accuracy : 59.3873%)
Loss [6/10, 100](epoch, minibatch):  1.14983
Loss [6/10, 200](epoch, minibatch):  1.15605
Train Epoch: 5 Average loss: 0.0180 Accuracy : 59.2812%)
Loss [7/10, 100](epoch, minibatch):  1.15064
Loss [7/10, 200](epoch, minibatch):  1.147

In [13]:
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to('cuda'), labels.to('cuda')
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print('Accuracy on 10,000 test images: ', f'{100*(correct/total):.3f}', '%')

Accuracy on 10,000 test images:  58.083 %
