In [1]:
import sys
# MC: need to specify path to remo in notebook
# Windows
#local_path_to_repo =  'C:/Users/crows/Documents/GitHub/remo-python'
# Mac
local_path_to_repo = '/Users/melodi/Docs/remo-python'
sys.path.insert(0, local_path_to_repo)

In [2]:
%load_ext autoreload
%autoreload 2
import remo


    (\(\ 
    (>':') Remo server is running: {'version': '0.3.4'}
                


## Get Dataset

In [3]:
remo.list_datasets()

[Dataset 1 - 'Vova's dataset',
 Dataset 2 - 'open images detection',
 Dataset 3 - 'Subset of OID',
 Dataset 4 - 'train',
 Dataset 6 - 'test',
 Dataset 12 - 'oid 100',
 Dataset 13 - 'open images sample data',
 Dataset 19 - 'oid building ',
 Dataset 21 - 'oid building and car',
 Dataset 22 - 'oid building car detection',
 Dataset 25 - 'open images data',
 Dataset 26 - 'oid car',
 Dataset 28 - 'oid car and person',
 Dataset 29 - 'test upload',
 Dataset 30 - 'test upload 2',
 Dataset 31 - 'cats and dogs']

In [4]:
my_dataset = remo.get_dataset(31)

In [5]:
my_dataset.view()

Open http://localhost:8000/datasets/31


![dataset](view_data2.gif)

In [6]:
my_dataset.view_annotation_statistics()

Open http://localhost:8000/annotation-detail/32/intro


![annotation](view_annotation_statistics.gif)

## Train-Test Split

In [6]:
len(my_dataset)

200

In [7]:
val = my_dataset[0:len(my_dataset) // 3]

In [8]:
train = my_dataset[len(my_dataset) // 3:]

In [9]:
val.export_annotation_to_csv('val.csv')

In [10]:
train.export_annotation_to_csv('train.csv')

In [11]:
import pandas as pd
df = pd.read_csv('train.csv')

In [12]:
df.head()

Unnamed: 0,file_name,class
0,cat.8.jpg,Cat
1,cat.9.jpg,Cat
2,cat.10.jpg,Cat
3,cat.11.jpg,Cat
4,cat.12.jpg,Cat


## Prepare for Image Classification

### Arrange Train & Validation Data

In [17]:
import glob
import os
from shutil import copyfile
import pandas as pd

def arrange_folders(phase, cls, root):
    """
    Given dataframe and directory path containing the images
    Arranges samples as given below: 
        root/phase/class/xxx.ext
    Args:
        root: string. Root directory path.
        phase: string. 'train' or 'val'
        cls: string.
    """
    df = pd.read_csv(phase + '_cats_dogs.csv')
    images = df.loc[df['class'] == cls].file_name.values
    phase_path = os.path.join(root, phase)
    if not os.path.exists(phase_path):
        os.mkdir(phase_path)
    class_path = os.path.join(phase_path, cls)
    if not os.path.exists(class_path):
        os.mkdir(class_path)
    for im in images:
        copyfile(os.path.join(root+cls, im), os.path.join(class_path, im))
        

In [38]:
arrange_folders(phase='val', cls='Cat', root='/Users/melodi/dogscats/sample/')

In [39]:
arrange_folders(phase='val', cls='Dog', root='/Users/melodi/dogscats/sample/')

In [40]:
arrange_folders(phase='train', cls='Cat', root='/Users/melodi/dogscats/sample/')

In [41]:
arrange_folders(phase='train', cls='Dog', root='/Users/melodi/dogscats/sample/')

Now we can continue to build our classification model with PyTorch.

## PyTorch

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import copy

In [15]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [18]:
train_dataset = datasets.ImageFolder(os.path.join('/Users/melodi/dogscats/sample/', 'train'), 
                                     transforms.Compose(
                                         [transforms.RandomResizedCrop(224),
                                          transforms.RandomHorizontalFlip(), 
                                          transforms.ToTensor(),
                                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

In [19]:
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=4)

In [20]:
val_dataset = datasets.ImageFolder(os.path.join('/Users/melodi/dogscats/sample/', 'val'), 
                                   transforms.Compose(
                                       [transforms.Resize(256),
                                        transforms.CenterCrop(224),
                                        transforms.ToTensor(),
                                        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

In [21]:
val_dataloader =  torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=True, num_workers=4)

In [22]:
datasizes = {x: len(val_dataset) if x == 'val' else len(train_dataset) for x in ['train','val'] }

In [23]:
datasizes 

{'train': 134, 'val': 66}

In [24]:
# load pre-trained resnet18
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features

num_classes = len(train_dataset.classes)
# reset the final fully connected layer
model_ft.fc = nn.Linear(num_ftrs, num_classes)


In [25]:
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

best_model_weights = copy.deepcopy(model_ft.state_dict())
best_acc = 0.0

In [26]:
epochs = 3

In [27]:
for epoch in range(epochs):      
    #print(f"Epoch {epoch}/{epochs-1}")
    #print(" ")
    for phase in ['train', 'val']:

        running_loss = 0.0
        running_corrects = 0

        if phase == 'train':
            model_ft.train()
            dataloader = train_dataloader
        else:
            model_ft.eval()
            dataloader = val_dataloader


        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            outputs = model_ft(images)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            running_corrects += torch.sum(preds == labels.data)
        else:
            epoch_acc = running_corrects.double() / datasizes[phase]
            epoch_loss = running_loss / datasizes[phase]
            #print(f"{phase} loss: {epoch_loss}")
            #print(f"{phase} acc: {epoch_acc}")

        if phase == 'val':
            if epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_weights = copy.deepcopy(model_ft.state_dict())

print(f'Best Val Acc: {best_acc}')
torch.save(model_ft.state_dict(), 'model_cat_dog.pth')
model_ft.load_state_dict(best_model_weights)

Best Val Acc: 0.9848484848484849


<All keys matched successfully>

In [28]:
model_ft.load_state_dict(torch.load('/Users/melodi/Docs/remo-python/example/model_cat_dog.pth'))

<All keys matched successfully>

In [29]:
class_names = train_dataset.classes

In [30]:
class_names

['Cat', 'Dog']

Now we will feed our validation dataset into the model and combine the predictions in a dataframe and export as .csv to upload Remo.

In [50]:
df_preds = pd.DataFrame(columns=['file_name','class'])

with torch.no_grad():
    for i, (inputs, labels) in enumerate(val_dataloader,0):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model_ft(inputs)
        _, preds = torch.max(outputs, 1)
        sample_fname, _ = val_dataloader.dataset.samples[i]
        sample_basename = sample_fname.split('/')[-1]
        for k in range(inputs.shape[0]):
            df_preds.loc[len(df_preds)+1] = [sample_basename, class_names[preds[k]]]

In [14]:
df_preds.to_csv('preds_raw_cats_dogs.csv')

Now, we can push our prediction annotations:

We first create a new annotation set called "Predictions"

In [8]:
my_dataset.create_annotation_set(annotation_task='Image classification', name='Predictions',classes=["Dog","Cat"])

{'id': 37,
 'name': 'Predictions',
 'released_at': None,
 'updated_at': '2020-01-23T14:00:21.217342Z',
 'task': 3,
 'dataset': 31,
 'last_annotated_date': None,
 'classes': [{'id': 37, 'name': 'Cat'}, {'id': 15, 'name': 'Dog'}],
 'is_last_modified': False,
 'type': 'image',
 'is_public': False}

Then we feed by providing the path and the annotation set id.

In [None]:
my_dataset.add_annotations_by_csv('/Users/melodi/Docs/remo-python/example/preds_raw_cats_dogs.csv', 37)

After adding our predictions we need to update our dataset using fetch() function

In [8]:
my_dataset.fetch()

In [7]:
my_dataset.annotation_sets

[Annotation set 32 - 'Image classification', task: Image classification, #classes: 2]

In [10]:
my_dataset.view()

Open http://localhost:8000/datasets
