<a href="https://colab.research.google.com/github/natalievolk/LearnAI/blob/main/LearnAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import Adam, SGD
from torch.utils.data import Dataset, DataLoader
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout

# pillow to load image
from PIL import Image
import cv2
import requests
from io import BytesIO
#import urllib2



In [26]:
# training data
url_train = 'https://raw.githubusercontent.com/natalievolk/LearnAI/main/original-datasets/book30-listing-train.csv'
data_train = pd.read_csv(url_train, index_col=False, usecols=[2,5], encoding = "ISO-8859-1")
data_train.columns = ['jpg_url', 'classification']


# testing data
url_test = 'https://raw.githubusercontent.com/natalievolk/LearnAI/main/original-datasets/book30-listing-test.csv'
data_test = pd.read_csv(url_test, index_col=False, usecols=[2,5], encoding = "ISO-8859-1")
data_test.columns = ['jpg_url', 'classification']


In [27]:
# MAKING TRAINING SET

file = open("train.txt", 'w')
#Image Preprocessing
 
for index, image_name in enumerate(data_train['jpg_url']):
    if image_name[-4:] == ".jpg":
      file.write(image_name + " " + str(data_train['classification'][index]) + "\n")

file.close()


file = open("test.txt",'w')
 
for index, image_name in enumerate(data_test['jpg_url']):
    if image_name[-4:] == ".jpg":
      file.write(image_name + " " + str(data_test['classification'][index]) + "\n")
 
file.close()


In [28]:
class ImagesDataset(Dataset):

    def __init__(self, text_file):
        self.df = pd.read_csv(text_file, sep=' ')
        self.df.columns = ['jpg_url', 'classification']

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        response = requests.get(self.df['jpg_url'][idx])
        img = Image.open(BytesIO(response.content)).resize((400, 400))
        pix = np.array(img)
 
        return (pix, self.df['classification'][idx])

#dataset = ImagesDataset('train.txt')
#dataset[10]
#len(dataset)


In [29]:

class Net(Module):   
    def __init__(self):
        super(Net, self).__init__()

        self.cnn_layers = Sequential(
            # Defining a 2D convolution layer
            # in_channels = 3 (RGB), out_channels = 6
            Conv2d(3, 6, kernel_size=5, stride=2, padding=0),
            BatchNorm2d(6), # parameter = out_channels
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            # Defining another 2D convolution layer
            Conv2d(6, 12, kernel_size=7, stride=2, padding=0),
            BatchNorm2d(12),
            ReLU(inplace=True),
            MaxPool2d(kernel_size=2, stride=2),
            Dropout(0.2, inplace=True)
        )

        self.linear_layers = Sequential(
            Linear(12 * 48 * 48, 128),
            Linear(128, 64),
            Linear(64, 32)
        )

    # Defining the forward pass    
    def forward(self, x):
        x = self.cnn_layers(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x


In [30]:
# defining the model
model = Net()
# defining the optimizer
optimizer = Adam(model.parameters(), lr=0.07)
# defining the loss function
criterion = CrossEntropyLoss()
# checking if GPU is available
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    
print(model)

Net(
  (cnn_layers): Sequential(
    (0): Conv2d(3, 6, kernel_size=(5, 5), stride=(2, 2))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(6, 12, kernel_size=(7, 7), stride=(2, 2))
    (5): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace=True)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Dropout(p=0.2, inplace=True)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=27648, out_features=128, bias=True)
    (1): Linear(in_features=128, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=32, bias=True)
  )
)
