<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Data-and-paths" data-toc-modified-id="Data-and-paths-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Data and paths</a></span></li><li><span><a href="#Prepare-data" data-toc-modified-id="Prepare-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Prepare data</a></span></li><li><span><a href="#Train-a-model" data-toc-modified-id="Train-a-model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Train a model</a></span></li></ul></div>

In [None]:
# Show python version in output using sys
import sys
print(sys.version)

In [None]:
import pandas as pd
import json
import os

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# we need to crop images
from PIL import Image


In [None]:
from imageai.Prediction.Custom import ModelTraining
from imageai.Prediction.Custom import CustomImagePrediction

## Data and paths

In [None]:
path_home = '/Users/henriknyberg/Documents/jobb/sillgrissla'  # os.getcwd()

data_path = os.path.join(path_home, "data")
print(data_path)

In [None]:
# Images
image_path = os.path.join(data_path, 'Annotation/AnnotationImagesSeabirds')
image_path_a1 = os.path.join(image_path, 'Adult birds')
image_path_a2 = os.path.join(image_path, 'Chicks')
image_path_a3 = os.path.join(image_path, 'Egg')


## Prepare data

Arrange data according to instructions at https://imageai.readthedocs.io/en/latest/custom/index.html

* Create a dataset folder with the name you will like your dataset to be called (e.g pets)

* In the dataset folder, create a folder by the name train

* In the dataset folder, create a folder by the name test

* In the train folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)

* In the test folder, create a folder for each object you want to the model to predict and give the folder a name that corresponds to the respective object name (e.g dog, cat, squirrel, snake)

* In each folder present in the train folder, put the images of each object in its respective folder. This images are the ones to be used to train the model

In [None]:
# Create new folders (unless they exist) for trainng and test data in the data folder
# Skip this if done manually
# paths:
train_data_path = os.path.join(data_path, 'train')
test_data_path = os.path.join(data_path, 'test')
train_adult = os.path.join(train_data_path, 'Adult birds')
train_chick = os.path.join(train_data_path, 'Chicks')   
train_egg = os.path.join(train_data_path, 'Egg')
test_adult = os.path.join(test_data_path, 'Adult birds')
test_chick = os.path.join(test_data_path, 'Chicks')   
test_egg = os.path.join(test_data_path, 'Egg')
    
try:
    # create training data folder
    os.mkdir(train_data_path)
    # create subfolders for object types
    os.mkdir(train_adult)
    os.mkdir(train_chick)
    os.mkdir(train_egg)
    print('Training data folder created')
except Exception:
    print('Training folder already exists')

# create test data folder
try:   
    os.mkdir(test_data_path )
    # create subfolders for object types
    os.mkdir(test_adult)
    os.mkdir(test_chick)
    os.mkdir(test_egg)
    print('Test data folder created')
except Exception:
    print('Test folder already exists')

In [None]:
# Read CSV file with JSON formatted information for each training image
path1 = os.path.join(data_path, 'Annotation', 'Export AIIOS Birds_v2.csv')
df = pd.read_csv(path1, sep=';', header=0)
print(df.shape)

In [None]:
# Split into training data and test data
df_train = df.sample(frac=0.8, axis=0)
print(df_train.shape)
df_train.head()

In [None]:
# Remaining used for testing
df_test = df.drop(df_train.index)
print(df_test.shape)
df_test.head()

In [None]:
# Working
# Loop over all training images
# Cut out object image
# Save in folder

image_nr = 0

for ind in df_train.index:
    p1_str = df_train.loc[ind].get_values()
    pname, json_string = p1_str[0].split(',', 1)
    print('File name: ', pname)
    # Get objects
    data = json.loads(json_string)
    obj_list = data['objects']
    
    # Loop over all objects in selected image
    for ii, obj in enumerate(obj_list):
        success = True
        # print(ii)
        # print(obj)
        # obj is a dictionary with 2 elements: 'type' and 'boundaries'
        # obj['boundaries'] is a list with one element: a dictionary with 2 elements:'cameraId' and 'boundaryPoints'
        object_type = obj['type'] # 'AdultBird', 'Chick' or 'Egg' 
        if len(obj['boundaries']) > 0:
            object_boundaries = obj['boundaries'][0]['boundaryPoints'] # This is a list with 4 elements

            print(object_type)

            #Read image
            
            if pname in os.listdir(image_path):
                im = Image.open(os.path.join(image_path, pname))
            elif pname in os.listdir(image_path_a1):
                im = Image.open(os.path.join(image_path_a1, pname))
            elif pname in os.listdir(image_path_a2):
                im = Image.open(os.path.join(image_path_a2, pname))
            elif pname in os.listdir(image_path_a3):
                im = Image.open(os.path.join(image_path_a3, pname))
            else:
                success = False
                print('Image {} not found!'.format(pname))
                # crash_me_now()
        else:
            print('Missing coordinates, skipping')
            success = False
        if success:
            x_vals = [x['coords'][0] for x in object_boundaries]
            y_vals = [x['coords'][1] for x in object_boundaries]
            # left = min(x_vals)
            # right = max(x_vals)
            # bottom = max(y_vals)
            # top = min(y_vals)
            # box1 = (left, top, right, bottom)
            box1 = (min(x_vals), min(y_vals), max(x_vals), max(y_vals))
            im_part = im.crop(box1) # this is the small image, check it       
            #plt.imshow(im_part)  # plot image
            # plt.show() # just for testing, comment this out when done testing
            
            # save image
            image_nr += 1
            if object_type == 'AdultBird':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(train_adult,fname))
            elif object_type == 'Chick':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(train_chick,fname))
            elif object_type == 'Egg':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(train_egg,fname))
        
print('Done')  
print('Last object:')
plt.imshow(im_part)
plt.show()

print('Training images in these folders:')
for pp in [train_adult, train_chick, train_egg]:
    print(pp)

In [None]:
# Seems to work
# Loop over all test images
# Cut out object image
# Save in folder
for ind in df_test.index:
    p1_str = df_test.loc[ind].get_values()
    pname, json_string = p1_str[0].split(',', 1)
    print('File name: ', pname)
    # Get objects
    data = json.loads(json_string)
    obj_list = data['objects']
    
    # Loop over all objects in selected image
    for ii, obj in enumerate(obj_list):
        success = True
        # print(ii)
        # print(obj)
        # obj is a dictionary with 2 elements: 'type' and 'boundaries'
        # obj['boundaries'] is a list with one element: a dictionary with 2 elements:'cameraId' and 'boundaryPoints'
        object_type = obj['type'] # 'AdultBird', 'Chick' or 'Egg' 
        if len(obj['boundaries']) > 0:
            object_boundaries = obj['boundaries'][0]['boundaryPoints'] # This is a list with 4 elements
        
            print(object_type)
        
            #Read image
           
            if pname in os.listdir(image_path):
                im = Image.open(os.path.join(image_path, pname))
            elif pname in os.listdir(image_path_a1):
                im = Image.open(os.path.join(image_path_a1, pname))
            elif pname in os.listdir(image_path_a2):
                im = Image.open(os.path.join(image_path_a2, pname))
            elif pname in os.listdir(image_path_a3):
                im = Image.open(os.path.join(image_path_a3, pname))
            else:
                success = False
                print('Image {} not found!'.format(pname))
                # crash_me_now()
        else:
            print('Missing coordinates, skipping')
            success = False
        if success:
            x_vals = [x['coords'][0] for x in object_boundaries]
            y_vals = [x['coords'][1] for x in object_boundaries]
            # left = min(x_vals)
            # right = max(x_vals)
            # bottom = max(y_vals)
            # top = min(y_vals)
            # box1 = (left, top, right, bottom)
            box1 = (min(x_vals), min(y_vals), max(x_vals), max(y_vals))
            im_part = im.crop(box1) # this is the small image, check it       
            #plt.imshow(im_part)  # plot image
            # plt.show() # just for testing, comment this out when done testing
            
            # save image
            image_nr += 1
            if object_type == 'AdultBird':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(test_adult,fname))
            elif object_type == 'Chick':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(test_chick,fname))
            elif object_type == 'Egg':
                fname = 'obj_' + str(image_nr) + '.jpg'
                im_part.save(os.path.join(test_egg,fname))
        
print('Done')  
print('Last object:')
plt.imshow(im_part)
plt.show()

print('Test images in these folders:')
for pp in [test_adult, test_chick, test_egg]:
    print(pp)

## Train a model

FIXME

In [None]:
model_type_list = ['SqueezeNet' , 'ResNet' , 'InceptionV3', 'DenseNet']
model_type = model_type_list[1]
print('Model type: ', model_type)

In [None]:
model_trainer = ModelTraining()
model_trainer.setModelTypeAsResNet()  # Resnet
model_trainer.setDataDirectory(data_path)
model_trainer.trainModel(num_objects=10, num_experiments=100, enhance_data=True, batch_size=32, 
                         show_network_summary=True)