<a href="https://colab.research.google.com/github/natalievolk/LearnAI/blob/main/model_highest_acc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import Adam, SGD
from torch.utils.data import Dataset, DataLoader,TensorDataset
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from PIL import Image
import cv2
import requests
from io import BytesIO
from tqdm.notebook import tqdm

In [9]:
train_txt = "https://raw.githubusercontent.com/natalievolk/LearnAI/main/updated_train.txt"
test_txt = "https://raw.githubusercontent.com/natalievolk/LearnAI/main/updated_test.txt"

In [10]:
class ImagesDataset(Dataset):
    def __init__(self, text_file, transform):
        self.df = pd.read_csv(text_file, sep=' ')
        self.df.columns = ['jpg_url', 'classification']
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        response = requests.get(self.df['jpg_url'][idx])
        img = Image.open(BytesIO(response.content)).resize((225, 225))
        pix = np.array(img)
        try:
          pix = self.transform(pix)
        except:
          pix = np.stack((np.array(pix, copy=True), np.array(pix, copy=True), np.array(pix, copy=True)), axis=2)
          pix = self.transform(pix)
 
        return pix, int(self.df['classification'][idx])

In [11]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

train_dataset = ImagesDataset(train_txt,transform)
train_dataloader = DataLoader(train_dataset, batch_size=32, num_workers=16)

test_dataset = ImagesDataset(test_txt,transform)
test_dataloader = DataLoader(test_dataset, batch_size=32, num_workers=16)

In [12]:
class Net(nn.Module):   
    def __init__(self):
        super(Net, self).__init__()
        self.pool = nn.MaxPool2d(2,2)
        self.conv1 = nn.Conv2d(3,16,9)
        self.conv2 = nn.Conv2d(16, 64,11)
        self.conv3 = nn.Conv2d(64,256,13)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256*18*18, 512)
        self.fc2 = nn.Linear(512, 128)
        self.fc3 = nn.Linear(128, 5)
        self.softmax = nn.Softmax()

    # Defining the forward pass    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        #print(x.shape)
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

net = Net()
optimizer = Adam(net.parameters(), lr=0.00001)
criterion = CrossEntropyLoss()
if torch.cuda.is_available():
    net = net.cuda()
    criterion = criterion.cuda()
device = torch.device('cuda:0')

In [1]:
model_save_name = 'classifier5.pt'
path = F"/content/drive/MyDrive/Machine_Learning/{model_save_name}" 
net.load_state_dict(torch.load(path))
print(net)

NameError: ignored

In [47]:
#Training Stage
for epoch in tqdm(range(5)):  # loop over the dataset multiple times

    running_loss = 0.0
    running_accuracy = 0.0

    for i, data in enumerate(tqdm(train_dataloader)):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        predictions = net.softmax(outputs).argmax(dim=1)
        # print(labels)
        # labels1 = labels.argmax(dim=1)
        num_correct = torch.sum(predictions == labels).item()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        # print statistics
        running_loss += loss.item()
        running_accuracy += num_correct/predictions.shape[0]

    print('[%d] loss: %f avg_accuracy: %f' %
              (epoch + 1, running_loss, (running_accuracy/i)))

print('Finished Training')
#minibatch, cnn parameter, epoch, learning rate, 

#model_save_name = 'classifier3.pt'
#path = F"/content/gdrive/MyDrive/Machine_Learning/{model_save_name}" 
#torch.save(net.state_dict(), path)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=1490.0), HTML(value='')))




[1] loss: 1989.051372 avg_accuracy: 0.577103


HBox(children=(FloatProgress(value=0.0, max=1490.0), HTML(value='')))


[2] loss: 1981.248309 avg_accuracy: 0.583253


HBox(children=(FloatProgress(value=0.0, max=1490.0), HTML(value='')))


[3] loss: 1975.680824 avg_accuracy: 0.586409


HBox(children=(FloatProgress(value=0.0, max=1490.0), HTML(value='')))


[4] loss: 1970.570363 avg_accuracy: 0.589481


HBox(children=(FloatProgress(value=0.0, max=1490.0), HTML(value='')))


[5] loss: 1963.352851 avg_accuracy: 0.595001

Finished Training


In [48]:
model_save_name = 'classifier5.pt'
path = F"/content/drive/MyDrive/Machine_Learning/{model_save_name}" 
torch.save(net.state_dict(), path)

In [43]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [49]:
model_save_name = 'classifier5.pt'
path = F"/content/drive/MyDrive/Machine_Learning/{model_save_name}" 
net.load_state_dict(torch.load(path))
print(net)

Net(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv1): Conv2d(3, 16, kernel_size=(9, 9), stride=(1, 1))
  (conv2): Conv2d(16, 64, kernel_size=(11, 11), stride=(1, 1))
  (conv3): Conv2d(64, 256, kernel_size=(13, 13), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=82944, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=5, bias=True)
  (softmax): Softmax(dim=None)
)


In [50]:

#TESTING PHASE
 
correct = 0
total = 0
with torch.no_grad():
    for i, data in enumerate(tqdm(test_dataloader)):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
 
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
 
print('Accuracy of the network on the 5,510 test images: %.4f %%' % (
    100 * correct / total))


HBox(children=(FloatProgress(value=0.0, max=166.0), HTML(value='')))




Accuracy of the network on the 5,510 test images: 33.1510 %
