In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd drive/MyDrive/DePaul\ Research

/content/drive/MyDrive/DePaul Research


In [2]:
import json
import torch
import os
import torchvision
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from PIL import Image
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from PIL import ImageDraw
from sklearn.metrics import classification_report
from tqdm import tqdm

from utils.data_utils import *
from engine import train_one_epoch, evaluate
import utils
import transforms as T

## Data Imports
- Annotation csv's
    - Annotation files created from AnimalDetector.ipynb
    - Loosely annotated objects in images with following labels
        - Animal, Human, Vehicle
- Labelled Classification File
    - Provided by Lincoln Zoo
    - Contains labels for each image with following classes (no bounding boxes)
    
``` json
label_encoding = {
    0  : 'bird',
    1  : 'cat', 
    2  : 'coyote',
    3  : 'dog',
    4  : 'e. cottontail',
    5  : 'empty',
    6  : 'human',
    7  : 'lawn mower',
    8  : 'raccoon',
    9  : 'rat',
    10 : 'squirrel',
    11 : 'striped skunk',
    12 : 'v. opossum',
    13 : 'w. t. deer'
}
```

In [5]:
with open('./data/annotations/D.json', 'rb') as f:
    d_detections = json.load(f)

with open('./data/annotations/D.json', 'rb') as f:
    j_detections = json.load(f)

with open('./data/annotations/D.json', 'rb') as f:
    r_detections = json.load(f)

with open('./data/annotations/D.json', 'rb') as f:
    s_detections = json.load(f)

In [6]:
labels = d_detections['detection_categories']
images = d_detections['images'] + j_detections['images'] + r_detections['images'] + s_detections['images']

In [8]:
df = pd.read_csv('updated_2018_detections')

In [11]:
sample = df[df['Directory'].str.contains('D02')]

In [13]:
# Create statified samples to get balanced representation of all classes
strat_sample = stratify_sample(df, 500)
sample_supplement = strat_sample[strat_sample['ShortName'].isin(['bird', 'cat', 'striped skunk', 'rat', 'v. opossum', 'w. t. deer'])]

final_sample = sample.append(sample_supplement)

In [15]:
final_sample['ShortName'].value_counts()

empty            5039
human             979
bird              592
v. opossum        507
w. t. deer        500
squirrel          454
dog               395
raccoon           314
striped skunk     211
cat               207
rat               146
e. cottontail     142
coyote              8
lawn mower          1
Name: ShortName, dtype: int64

In [None]:
im_dict, le = create_image_dict(images, final_sample)

In [21]:
dataset = WildlifeDataLoader(im_dict)
dataset[0]

(<PIL.Image.Image image mode=RGB size=760x512 at 0x7FD5AA6DE150>,
 {'area': tensor([4305.9658]),
  'boxes': tensor([[381.3680, 312.3712, 439.6904, 386.2016]]),
  'image_id': tensor([0]),
  'iscrowd': tensor([0]),
  'labels': tensor([4])})

### Begin Training Model
- Create DataLoaders
- Instantiate Model

In [24]:
# use our dataset and defined transformations
dataset = WildlifeDataLoader(im_dict, get_transform(train=False))

split_ind = lambda train_percent: int(len(dataset) * train_percent)

# split the dataset in train and test set
torch.manual_seed(1)
indices = torch.randperm(len(dataset)).tolist()

dataset_train = torch.utils.data.Subset(dataset, indices[:split_ind(0.9)])
dataset_test = torch.utils.data.Subset(dataset, indices[split_ind(0.9):])

# define training and validation data loaders
data_loader = torch.utils.data.DataLoader(
    dataset, batch_size=8, shuffle=True, num_workers=2,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=2, shuffle=False, num_workers=0,
    collate_fn=utils.collate_fn)

In [None]:
# path to checkpoint state_dict
checkpoint = None

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_classes = 14

if checkpoint:
    model = get_instance_segmentation_model(num_classes, pretrained=False)
    model.load_state_dict(torch.load(checkpoint))
else:
    model = get_instance_segmentation_model(num_classes)
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.0001)


num_epochs = 25
for epoch in range(num_epochs):
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=50)
    evaluate(model, data_loader_test, device=device)

In [30]:
torch.save(model.state_dict(), './5_16.pth')
print('model saved')

model saved


In [26]:
def evaluate_performance(dataframe, threshold=0.5):
    preds = []
    actual = []
    ims = []
    confidence = []

    model.eval()
    for dir, act_lab in tqdm(dataframe[['Directory','ShortName']].values):
        with torch.no_grad():
            prediction = model([image_loader(f'./images/{dir}')])

        # catch instances w/ no predictions
        try:
            if prediction[0]['scores'][0] > threshold:
                pred_label = le.inverse_transform([prediction[0]['labels'][0].to('cpu')])[0]
                preds.append(pred_label)
                confidence.append(prediction[0]['scores'][0])
            else:
                preds.append('empty')
                confidence.append(prediction[0]['scores'][0])

        except:
            preds.append('empty')
            confidence.append(0)
            
        actual.append(act_lab)
        ims.append(dir)

    return preds, actual, ims, confidence

In [27]:
preds, actual, ims, confidence = evaluate_performance(df.sample(10000))

100%|██████████| 10000/10000 [1:17:22<00:00,  2.15it/s]


In [28]:
results = pd.DataFrame([ims, preds, actual, confidence]).T
results.columns = ['Image', 'Prediction', 'Actual', 'Confidence']