In [None]:
#Installation
!pip install remo
!python -m remo_app

In [1]:
#Imports
import torch
import torch.nn as nn
import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
import torchvision.models as models
import pandas as pd
import os
import tqdm
import torch.optim as optim
from torch.optim import lr_scheduler
import random
import remo
remo.set_viewer('jupyter')
from pprint import pprint

In [None]:
train_dataset = remo.create_dataset(name = 'flowers-train',
                    paths_to_upload=["flower/flowers/train", "flower/flowers/flowers_train_numeric.csv"],
                    annotation_task = "Image classification",
                    class_encoding="flower/flowers/data_encoding.csv")

valid_dataset = remo.create_dataset(name = 'flowers-valid',
                    paths_to_upload=["flower/flowers/valid", "flower/flowers/flowers_valid_numeric.csv"],
                    annotation_task = "Image classification",
                    class_encoding="flower/flowers/data_encoding.csv")

test_dataset = remo.create_dataset(name = 'flowers-test',
                    paths_to_upload=["flower/flowers/test", "flower/flowers/flowers_test_numeric.csv"],
                    annotation_task = "Image classification",
                    class_encoding="flower/flowers/data_encoding.csv")

In [2]:
remo.list_datasets()

[Dataset 16 - 'flowers-valid',
 Dataset 17 - 'flowers-train',
 Dataset 18 - 'flowers-test']

In [3]:
validation_dataset = remo.get_dataset(16)
training_dataset = remo.get_dataset(17)
testing_dataset = remo.get_dataset(18)

In [77]:
training_dataset.view()

Open http://localhost:8123/datasets/17


In [78]:
validation_dataset.view()

Open http://localhost:8123/datasets/16


In [79]:
testing_dataset.view()

Open http://localhost:8123/datasets/18


In [4]:
train_stats = training_dataset.get_annotation_statistics()
valid_stats = validation_dataset.get_annotation_statistics()
test_stats = testing_dataset.get_annotation_statistics()

pprint("Training Statistics {}".format(train_stats))
pprint("Validation Statistics {}".format(valid_stats))
pprint("Testing Statistics {}".format(test_stats))

("Training Statistics [{'AnnotationSet ID': 9, 'AnnotationSet name': 'Image "
 "classification', 'n_images': 0, 'n_classes': 102, 'n_objects': 0, "
 "'top_3_classes': [{'name': 'Petunia', 'count': 206}, {'name': 'Passion "
 "flower', 'count': 205}, {'name': 'Wallflower', 'count': 157}], "
 "'creation_date': None, 'last_modified_date': '2020-07-10T11:11:33.708416Z'}]")
("Validation Statistics [{'AnnotationSet ID': 8, 'AnnotationSet name': 'Image "
 "classification', 'n_images': 0, 'n_classes': 102, 'n_objects': 0, "
 "'top_3_classes': [{'name': 'Petunia', 'count': 28}, {'name': 'Cyclamen', "
 "'count': 25}, {'name': 'Passion flower', 'count': 21}], 'creation_date': "
 "None, 'last_modified_date': '2020-07-10T10:44:32.055997Z'}]")
("Testing Statistics [{'AnnotationSet ID': 10, 'AnnotationSet name': 'Image "
 "classification', 'n_images': 0, 'n_classes': 102, 'n_objects': 0, "
 "'top_3_classes': [{'name': 'Water lily', 'count': 28}, {'name': 'Passion "
 "flower', 'count': 25}, {'name': 'P

In [5]:
training_dataset.export_annotations_to_file("training.csv", annotation_format="csv", full_path='true')
testing_dataset.export_annotations_to_file("testing.csv", annotation_format="csv", full_path='true')
validation_dataset.export_annotations_to_file("validation.csv", annotation_format="csv", full_path='true')

In [53]:
class FlowerDataset(Dataset):
    def __init__(self, annotations, data_path, mapping, num_classes, transform=None, mode="train"):
        self.data = pd.read_csv(annotations)
        self.mapping = pd.read_csv(mapping)
        self.mapping = dict(zip(self.mapping["1"], self.mapping["0"]))
        self.data_path = data_path
        self.transform = transform
        self.num_classes = num_classes
        self.mode = mode
        
    def __len__(self):
        return len(self.data)
  
    def __getitem__(self, idx):
    
        labels = self.mapping[self.data.loc[idx, 'classes'].lower()]
        im_path = self.data_path + "/" + str(labels) + "/" + self.data.loc[idx, 'file_name']
        label_tensor = torch.as_tensor(labels-1, dtype=torch.long)
        im = Image.open(im_path)
        
        if self.transform:
            im = self.transform(im)
        if self.mode == "test":
            return {"im" : im, "labels": label_tensor, "im_name" : self.data.loc[idx, 'file_name']}
        else:
            return {"im" : im, "labels" : label_tensor}

In [33]:
model = models.resnet18(pretrained=True)

for param in model.parameters():
    param.required_grad = False

model.fc = nn.Sequential(nn.Linear(512, 256), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(256, 102), nn.LogSoftmax(dim=1))

In [34]:
def train_model(model, data_loaders, optimizer, criterion, num_epochs, dataset_size):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model.to(device)
    print("Training Data Size {}".format(dataset_size["train"]))
    print("Validation Data Size {}".format(dataset_size["valid"]))

    for epoch in range(num_epochs):
        model.train()
        print("Epoch Number {}".format(epoch))

        training_loss = 0.0
        val_loss = 0.0
        val_acc = 0
        correct_preds = 0
        best_acc = 0
        validation = 0.0
        total = 0


        data_loader = tqdm.tqdm(data_loaders["train"])
        for x, data in enumerate(data_loader):
            inputs, labels = data["im"].to(device), data["labels"].to(device)
            outputs = model(inputs)
            optimizer.zero_grad()

            loss = criterion(outputs, labels)


            loss.backward()
            optimizer.step()

            training_loss += loss.item()
        
        epoch_loss = training_loss / dataset_size["train"]

        print("Training Loss : {:.5f}".format(epoch_loss))

        val_data_loader = tqdm.tqdm(data_loaders["valid"])
        
        with torch.no_grad():
            model.eval()
            for x, data in enumerate(val_data_loader):
                inputs, labels = data["im"].to(device), data["labels"].to(device)
                outputs = model(inputs)

                val_loss = criterion(outputs, labels)
                _, index = torch.max(outputs, 1)

                total += labels.size(0)
                correct_preds += (index == labels).sum().item()

                validation += val_loss.item()

            val_acc = 100 * (correct_preds / total)

            print("Validation Loss : {:.5f}".format(validation / dataset_size["valid"]))
            print("Validation Accuracy is: {:.2f}%".format(val_acc))

            if best_acc < val_acc:
                best_acc = val_acc
                torch.save(model, "./saved_model_{:.2f}.pt".format(best_acc))


In [38]:
means = [0.485, 0.456, 0.406]
stds = [0.229, 0.224, 0.225]

train_transforms = transforms.Compose([
        transforms.RandomRotation(30),
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)
    ])

val_transforms = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)])

train_dataset = FlowerDataset(annotations='training.csv',
                                   data_path='./flower/train',
                                   num_classes=102,
                                   transform=train_transforms,
                                   mapping="flower/data_encoding.csv")

val_dataset = FlowerDataset(annotations='./validation.csv',
                              data_path='./flower/valid',
                              num_classes=102,
                              transform=val_transforms,
                              mapping="flower/data_encoding.csv")

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

data_loader = {"train" : train_loader, "valid": val_loader}
len_dict = {"train" : len(train_dataset), "valid" : len(val_dataset)}

In [39]:
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)
train_model(model=model, data_loaders=data_loader, optimizer=optimizer, num_epochs=5, dataset_size=len_dict, criterion=nn.NLLLoss())

  0%|          | 0/103 [00:00<?, ?it/s]

Training Data Size 6549
Validation Data Size 818
Epoch Number 0


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f5718290e80>>Exception ignored in: 
<bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f5718290e80>>Traceback (most recent call last):

  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
    self._shutdown_workers()
  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
    Traceback (most recent call last):
w.join()
  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/multiprocessing/process.py", line 122, in join
        assert self._parent_pid == os.getpid(), 'can only join a chil

Training Loss : 0.03449


Exception ignored in: <bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f5718290e80>>
Traceback (most recent call last):
Exception ignored in:   File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
<bound method _MultiProcessingDataLoaderIter.__del__ of <torch.utils.data.dataloader._MultiProcessingDataLoaderIter object at 0x7f5718290e80>>    self._shutdown_workers()

Traceback (most recent call last):
  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 962, in __del__
        w.join()self._shutdown_workers()

  File "/home/harsha/miniconda3/envs/pt_36/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 942, in _shutdown_workers
      Fil

Validation Loss : 0.01936
Validation Accuracy is: 73.96%
Epoch Number 1


100%|██████████| 103/103 [11:39<00:00,  6.79s/it]
  0%|          | 0/13 [00:00<?, ?it/s]

Training Loss : 0.02719


100%|██████████| 13/13 [00:28<00:00,  2.16s/it]
  0%|          | 0/103 [00:00<?, ?it/s]

Validation Loss : 0.01428
Validation Accuracy is: 79.95%
Epoch Number 2


100%|██████████| 103/103 [11:30<00:00,  6.70s/it]
  0%|          | 0/13 [00:00<?, ?it/s]

Training Loss : 0.02345


100%|██████████| 13/13 [00:27<00:00,  2.14s/it]
  0%|          | 0/103 [00:00<?, ?it/s]

Validation Loss : 0.01185
Validation Accuracy is: 82.76%
Epoch Number 3


100%|██████████| 103/103 [11:32<00:00,  6.72s/it]
  0%|          | 0/13 [00:00<?, ?it/s]

Training Loss : 0.02125


100%|██████████| 13/13 [00:27<00:00,  2.15s/it]
  0%|          | 0/103 [00:00<?, ?it/s]

Validation Loss : 0.00981
Validation Accuracy is: 86.31%
Epoch Number 4


100%|██████████| 103/103 [11:33<00:00,  6.73s/it]
  0%|          | 0/13 [00:00<?, ?it/s]

Training Loss : 0.01979


100%|██████████| 13/13 [00:27<00:00,  2.08s/it]

Validation Loss : 0.00885
Validation Accuracy is: 86.55%





In [58]:
test_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(means, stds)
    ])

test_dataset = FlowerDataset(annotations='./testing.csv',
                              data_path='./flower/test',
                              num_classes=102,
                              transform=test_transform,
                             mapping="./flower/data_encoding.csv",
                             mode="test")

test_dataset_loader = torch.utils.data.DataLoader(test_dataset,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=2)


In [71]:
def test(dataloader):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    test_model = torch.load("./saved_model_86.55.pt")
    
    test_model.eval()
    tk0 = tqdm.tqdm(test_dataset_loader)
    
    total = 0
    correct_preds = 0
    pred_list = {}
    
    with torch.no_grad():
        for x, data in enumerate(tk0):
            single_im, label = data["im"].to(device), data["labels"].to(device)
            im_name = data["im_name"]
            
            pred = test_model(single_im)

            _, index = torch.max(pred, 1)

            total += label.size(0)
            correct_preds += (index == label).sum().item()
            
            pred_list[im_name[0]] = (index+1).item()
            
    df = pd.DataFrame(pred_list.items(), columns=['file_name', 'class_name'])
    with open("results.csv", "w") as f:
        df.to_csv(f, index=False)
    print('Accuracy of the network on the test images: %d %%' % (100 * (correct_preds / total)))

In [72]:
test(test_dataset_loader)

100%|██████████| 819/819 [00:41<00:00, 19.60it/s]


           file_name  class_name
0    image_05013.jpg          47
1    image_04966.jpg          47
2    image_04993.jpg          47
3    image_00474.jpg          88
4    image_00563.jpg          88
..               ...         ...
814  image_05653.jpg           9
815  image_05636.jpg          43
816  image_05678.jpg          80
817  image_05637.jpg           4
818  image_05658.jpg          88

[819 rows x 2 columns]
Accuracy of the network on the test images: 84 %


In [76]:
testing_dataset.view()

Open http://localhost:8123/datasets/18
