# Wildtrack AI - Individual Identification Model Training


# 1. Initialization

In [0]:
#Set up Tensor flow 2.0

from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf

In [0]:
#General
import cv2
import csv
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
from google.colab.patches import cv2_imshow
from collections import defaultdict
import random
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

import pickle

# TF2.0/ Keras Libraries


from tensorflow.keras import backend as K





# Keras Imagenet pre=-trained models and pre-processors
from keras.preprocessing import image as KImage
from keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input as VGG16Pre

from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as XceptionPre

from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as MNPre

from tensorflow.keras.applications.imagenet_utils import preprocess_input


# TF2/ Keras Modeling utilities
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D, BatchNormalization, GlobalMaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Lambda, Flatten, Dense, Dropout
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import he_normal
from tensorflow.keras.initializers import lecun_normal
from tensorflow.keras.initializers import he_uniform
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.initializers import glorot_normal



# Plotting/ Visualization

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

random.seed(42)

Using TensorFlow backend.
  import pandas.util.testing as tm


In [0]:
#Mount Google Drive - Note this mounts your personal GDrive to the directory stated

from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#Set up various paths
#Set up path for csv files containing preprocessed images. CHange subfolder names to match your setup in google drive
csvpath='/content/drive/My Drive/U C Berkeley - Darragh/csv'
path="/content/drive/My Drive/U C Berkeley - Darragh/Training Data"
test_path="/content/drive/My Drive/U C Berkeley - Darragh/Test Data"
modelpath='/content/drive/My Drive/WildAI/csv'

## 2. Load Images and setup Data Structures

### 2-1. Load Images

Previously pre-processed images are loaded from csv files. The pre-processing is done per the input requirements for the model being used.  Pre-processing implementation can be found here: https://colab.research.google.com/drive/1tVg9y71wbf_-bpgOue4LAFbCSXuu2SCD?usp=sharing

In [0]:
BASE_MODEL="vgg16"  #CHoices are: vgg16, mobilenetv2, Xception

if BASE_MODEL=='vgg16':
  train_imagefile="Training-Images-224.csv"
  train_labelfile="Training-Labels-224.txt"
  test_imagefile="Test-Images-224.csv"
  test_labelfile="Test-Labels-224.txt"
  input_shape=(224,224,3)
  pretrained_model='species_classification_vgg16_model.h5'
  preprocessor=VGG16Pre
  savefile='vgg16_best_model'
  savemodel='vgg16_best_model.h5'
elif BASE_MODEL=="mobilenetv2":
  train_imagefile="Train-Images-Mobile-224.csv"
  train_labelfile="Train-Labels-Mobile-224.txt"
  test_imagefile="Test-Images-Mobile-224.csv"
  test_labelfile="Test-Labels-Mobile-224.txt"
  input_shape=(224,224,3)
  pretrained_model='species_classification_mobilenetv2_model.h5'
  preprocessor=MNPre
  savefile='mobilenetv2_best_model'
elif BASE_MODEL=="xception":
  train_imagefile="Training-Images-Xception-224.csv"
  train_labelfile="Training-Labels-Xception-224.txt"
  test_imagefile="Test-Images-Xception-224.csv"
  test_labelfile="Test-Labels-Xception-224.txt"
  input_shape=(224,224,3)
  pretrained_model='species_classification_xception_model.h5'
  preprocessor=XceptionPre
  savefile='xception_best_model'

In [0]:
#Function to load processed image data in csv files (both training and test, input data labels)
def LoadData(train_imagefile=train_imagefile,train_labelfile=train_labelfile,
             test_imagefile=test_imagefile,test_labelfile=test_labelfile):
  #Training Data Set
  Ind_DB=defaultdict(defaultdict)
  Individuals=[]
  Species=[]
  X=[]
  dataset=np.loadtxt(os.path.join(csvpath,train_imagefile),delimiter=",")
  f=open(os.path.join(csvpath,train_labelfile),'r')
  lines=f.readlines()
  for line in lines:
    vals=line.rstrip()
    Species.append(vals.split("-")[0])
    Individuals.append(vals)
  f.close()
    
  i=0
  for x in dataset:
    image=x.reshape(224,224,3)
    X.append(image)
    species=Species[i]
    key=Individuals[i]
    spec_DB=Ind_DB[species]
    if key not in spec_DB.keys():
      spec_DB[key]=[image]
    else:
      spec_DB[key].append(image)
    i=i+1


  #Test Data Set
  X_Test=[]
  Individuals_Test=[]
  Species_Test=[]
  dataset=np.loadtxt(os.path.join(csvpath,test_imagefile),delimiter=",")

  for x in dataset:
    image=x.reshape(224,224,3)
    X_Test.append(image)

  f=open(os.path.join(csvpath,test_labelfile),'r')
  lines=f.readlines()
  for line in lines:
    vals=line.rstrip()
    Species_Test.append(vals.split("-")[0])
    Individuals_Test.append(vals)
  f.close()

  X_Test=np.asarray(X_Test)
    
  return (Ind_DB,X_Test,Species_Test,Individuals_Test)

In [0]:
# Load Pre-Processed Images
Ind_DB,X_Test,Species_Test,Individuals_Test=LoadData(train_imagefile=train_imagefile, train_labelfile=train_labelfile,
                                                     test_imagefile=test_imagefile, test_labelfile=test_labelfile)

### 2-2. Generate Triples

A "triple" is defined as 2 footprints for the same individual, one for a different individual - all withing the same species. This section generates all possible triples from the data we have and writes them back out to a file, splitting out a subsection for validation/ dev. 
During Model training/evaluation, triples definitions are read back out from this file to generate training/ validation data.  

In [0]:
# Function create triples (A1,A2,B) for individuals within a species. All possible combinations are enumerated and written back to file

Species=[]

# Given a pair, add distinct footprint to create triples
def UpdateTriples(doubles,footprint):
  new_triples=[]
  for double in doubles:
    if double[0]==footprint or double[1]==footprint:
      print("Error in Update: ",double,footprint)
    new_triples.append((double[0],double[1],footprint))
  return new_triples

def AddTriples(singles,previous,footprint):
  new_triples=[]
  new_doubles=[]

  for base in previous:
    new_doubles.append((base,footprint))
    for single in singles:
      if base==single or footprint==single:
        print("Error in add: ",base,footprint,single)
      new_triples.append((base,footprint,single))
  return new_doubles,new_triples


# Function to go through each species and generate triples 
# that are then written back out to file. 

def LoadDataSet(DB,output_folder,outfile):
  for species in DB.keys():

    Species.append(species)
    filename=outfile+"_"+species+".csv"
    f = open(os.path.join(output_folder,filename),"w")
    writer = csv.writer(f)

    print("\n\n*** SPECIES:  ",species)

    triples=[]
    doubles=[]
    singles=[]
    individuals=DB[species]

    for individual,printlist in individuals.items():
      print("\n* INDIVIDUAL: ",individual)
      previous=[]
      prev_doubles=[]
      num_prints=len(printlist)

      for i in range(num_prints):
        uniq_print=individual+'|'+str(i)
        new_triples1=UpdateTriples(doubles,uniq_print)
        new_doubles,new_triples2=AddTriples(singles,previous,uniq_print)
        prev_doubles.extend(new_doubles)
        for triple in new_triples1:
          writer.writerow(triple)
        for triple in new_triples2:
          writer.writerow(triple)
        previous.append(uniq_print)
      doubles.extend(prev_doubles)
      singles.extend(previous)
    f.close()


In [0]:
# Create Triples
LoadDataSet(Ind_DB,csvpath,"triples")



*** SPECIES:   Amur Tiger

* INDIVIDUAL:  Amur Tiger-261

* INDIVIDUAL:  Amur Tiger-237

* INDIVIDUAL:  Amur Tiger-279

* INDIVIDUAL:  Amur Tiger-440

* INDIVIDUAL:  Amur Tiger-565

* INDIVIDUAL:  Amur Tiger-682

* INDIVIDUAL:  Amur Tiger-1020


*** SPECIES:   Bengal Tiger

* INDIVIDUAL:  Bengal Tiger-Aria

* INDIVIDUAL:  Bengal Tiger-Fenimore

* INDIVIDUAL:  Bengal Tiger-India

* INDIVIDUAL:  Bengal Tiger-Lucky

* INDIVIDUAL:  Bengal Tiger-Moki

* INDIVIDUAL:  Bengal Tiger-Mona

* INDIVIDUAL:  Bengal Tiger-Rajah

* INDIVIDUAL:  Bengal Tiger-Rajaji


*** SPECIES:   Cheetah

* INDIVIDUAL:  Cheetah-Aiko

* INDIVIDUAL:  Cheetah-Alvin

* INDIVIDUAL:  Cheetah-Chiquita

* INDIVIDUAL:  Cheetah-Jamu

* INDIVIDUAL:  Cheetah-Kiki

* INDIVIDUAL:  Cheetah-Pano

* INDIVIDUAL:  Cheetah-Rusty

* INDIVIDUAL:  Cheetah-Sandy

* INDIVIDUAL:  Cheetah-Tearmark


*** SPECIES:   Leopard

* INDIVIDUAL:  Leopard-Keanu

* INDIVIDUAL:  Leopard-Lewa

* INDIVIDUAL:  Leopard-Mick

* INDIVIDUAL:  Leopard-Ombeli

*

In [0]:
#SPlit out Test vs Validation Data for triples
from sklearn.model_selection import train_test_split
for species in Species:
  fname='triples_'+species+'.csv'
  dataset=pd.read_csv(os.path.join(csvpath,fname),header=None)
  trainset,devset = train_test_split(dataset, test_size=0.25, random_state=42)
  fname='triples_'+species+'_train.csv'
  trainset.to_csv(os.path.join(csvpath,fname))
  fname='triples_'+species+'_dev.csv'
  devset.to_csv(os.path.join(csvpath,fname))

### 2-3. Label Encoding for Species

In [0]:
#Create Label Encoding for Species
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
le = LabelEncoder()
le.fit(Species)
Y=le.transform(Species)
print(le.classes_)

['African elephant' 'African lion' 'Amur Tiger' 'Bengal Tiger'
 'Black Rhino' 'Bongo' 'Cheetah' 'Leopard' 'Lowland Tapir' 'Puma'
 'White Rhino']


## 3. Model Set up 

Reference implementation for Individual Identification done with VGG16 pretrained model

### 3-1. Common Functions


In [0]:
# Function returns image array given species and key

def get_img(DB,species,key):
  #print(key)
  pipe='|'
  values=key.split(pipe)
  individual=values[0]
  indx=int(values[1])
  try:
    spec_DB=DB[species]
    imglist=spec_DB[individual]
    x=imglist[indx]
  except:
    print("Error with loading ",key)  
    x=np.zeros((224,224,3))

  return x


# Model Generator class to generate triple sets of images for a given species, 
# given batch size adn number of steps.

def triples_generator(folder,DB,species,dataset="train",batch_size=32,num_steps=100):
  fname='triples_'+str(species)+'_'+dataset+'.csv'
  df=pd.read_csv(os.path.join(folder,fname))
  target=np.zeros((batch_size,768))
  total=df.shape[0]
  sample_size=int(num_steps*batch_size)

  while 1:
    indices=np.random.randint(0,total,size=sample_size)

    for i in range(num_steps):
      triples=[np.zeros((batch_size,224,224,3))for i in range(3)]
      cnt=0

      for j in range((i*batch_size),((i+1)*batch_size)):
        k=indices[j]
        triples[0][cnt,:,:,:]=get_img(DB,species,df.iloc[k,1])
        triples[1][cnt,:,:,:]=get_img(DB,species,df.iloc[k,2])
        triples[2][cnt,:,:,:]=get_img(DB,species,df.iloc[k,3])
        cnt=cnt+1

      yield (triples, target)


#CUstom loss function for Triplets Network
def triplet_loss(y_true,y_pred,alpha=1.2):
  ln=y_pred.shape.as_list()[-1]
  anchor=y_pred[:,0:int(ln/3)]
  positive=y_pred[:,int(ln/3):int(2*ln/3)]
  negative=y_pred[:,int(2*ln/3):ln]

  p_dist=K.sqrt(K.sum(K.square(anchor-positive),axis=1))
  n_dist=K.sqrt(K.sum(K.square(anchor-negative),axis=1))
  loss=K.maximum(p_dist-n_dist+alpha,0.0)
  return K.mean(loss)  

#Return L2 Norm
def calcl2(X,prints):
  l2norm=[]
  for i in range(len(prints)):
    l2norm.append(np.linalg.norm(X - prints[i]))
  return l2norm

### 3-2. Set up Triplets Model functions for Training

In [0]:
#Function to create triplets model starting with a base model pre-trained as a species classifier

def Create_TripletTrainer(csvpath,pretrained_model=pretrained_model,input_shape=(224,224,3)):
  zero_model = load_model(os.path.join(csvpath,pretrained_model))
  x=zero_model.get_layer('Embedding').output
  x = Lambda(lambda  x: K.l2_normalize(x,axis=1))(x)
  triplet_model=Model(inputs=zero_model.input,outputs=x)
  input_shape=[224,224,3]
  X1=Input(input_shape)
  X2=Input(input_shape)
  X3=Input(input_shape)
  encoded1 = triplet_model(X1)
  encoded2 = triplet_model(X2)
  encoded3 = triplet_model(X3)

  concat_vector=concatenate([encoded1,encoded2,encoded3],axis=-1,name='concat')
  model=Model(inputs=[X1,X2,X3],outputs=concat_vector)
  model.compile(loss=triplet_loss,optimizer=Adam(0.000005))

  return model

In [0]:
# Function to Train a model for a given species using a pre-trained model as a base. 
# Trained model weights are saved to file passed into the savefile parameter. 

def TrainModel(DB,species="Leopard",pretrained_model=pretrained_model,input_shape=(224,224,3),savefile=savefile):
  tf.keras.backend.clear_session()

  model=Create_TripletTrainer(modelpath,pretrained_model,input_shape=(224,224,3))

  es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=5)
  chkpoint=savefile+'_'+str(species)+'.h5'
  mc = ModelCheckpoint(os.path.join(modelpath,'testing',chkpoint), save_weights_only=True,monitor='val_loss', mode='min')
  train_gen=triples_generator(csvpath,DB,species,dataset="train",batch_size=50,num_steps=200)
  val_gen=triples_generator(csvpath,DB,species,dataset="dev",batch_size=50,num_steps=40)

  print("Training for Species: ",species)
  
  model.fit(train_gen,steps_per_epoch=200, epochs=30,verbose=1,validation_data=val_gen,validation_steps=40,callbacks=[es,mc])
  
  return True



## 4. Model Training

Train Model for each species and save weights to file

In [0]:
TrainModel(Ind_DB,"Amur Tiger")

Training for Species:  Amur Tiger
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 00014: early stopping


True

In [0]:
TrainModel(Ind_DB,"Bengal Tiger")

Training for Species:  Bengal Tiger
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 00008: early stopping


True

In [0]:
TrainModel(Ind_DB,"Black Rhino")

Training for Species:  Black Rhino
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 00007: early stopping


True

In [0]:
TrainModel(Ind_DB,"Cheetah")

Training for Species:  Cheetah
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30

In [0]:
TrainModel(Ind_DB,'Leopard')

Training for Species:  Leopard
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 00009: early stopping


True

In [0]:
TrainModel(Ind_DB,"Lowland Tapir")

In [0]:
TrainModel(Ind_DB,"Puma")

In [0]:
TrainModel(Ind_DB,"White Rhino")

In [0]:
TrainModel(Ind_DB,"Bongo")

Training for Species:  Bongo
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 00008: early stopping


True

In [0]:
TrainModel(Ind_DB,"African elephant")

Training for Species:  African elephant
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 00009: early stopping


True

In [0]:
TrainModel(Ind_DB,"African lion")

Training for Species:  African lion
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 00010: early stopping


True