<a href="https://colab.research.google.com/github/jtdsouza/Course-Overview/blob/master/WildAID.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Initialization and Load Image Data

In [0]:
#Set up Tensor flow 2.0

from __future__ import absolute_import, division, print_function, unicode_literals

try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

import tensorflow as tf

In [0]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, ZeroPadding2D, Activation, Input, concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.layers import MaxPooling2D, AveragePooling2D, BatchNormalization, GlobalMaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.layers import Concatenate
from tensorflow.keras.layers import Lambda, Flatten, Dense, Dropout
from tensorflow.keras.layers import Layer
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.initializers import he_normal
from tensorflow.keras.initializers import lecun_normal
from tensorflow.keras.initializers import he_uniform
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.keras.initializers import glorot_normal
from tensorflow.keras import backend as K
from keras.preprocessing import image as KImage
from keras.preprocessing.image import ImageDataGenerator

import cv2
import csv
import os
import numpy as np
from numpy import genfromtxt
import pandas as pd
from google.colab.patches import cv2_imshow
from collections import defaultdict
import random
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.vgg16 import preprocess_input as VGG16Pre
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.vgg19 import preprocess_input as VGG19Pre
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as InceptionPre
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input as XceptionPre
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input as MNPre
from tensorflow.keras.applications.resnet_v2 import ResNet152V2
from tensorflow.keras.applications.resnet_v2 import ResNet50V2
from tensorflow.keras.applications.nasnet import NASNetMobile
from tensorflow.keras.applications.densenet import DenseNet121

from tensorflow.keras.applications.imagenet_utils import preprocess_input

from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ModelCheckpoint
import pickle

from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

Using TensorFlow backend.
  import pandas.util.testing as tm


In [0]:
#Mount Google Drive - Note this mounts your personal GDrive to the directory stated

from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


## Load Images and setup Data Structures

Images are loaded from csv files that contain previously processed data sets.
The preprocessing implementation can be found here: https://colab.research.google.com/drive/1tVg9y71wbf_-bpgOue4LAFbCSXuu2SCD?usp=sharing

In [0]:
  #Set up path for csv files containing preprocessed images. CHange subfolder names to match your setup in google drive
  csvpath='/content/drive/My Drive/WildAI/csv'

  #Function to load processed image data in csv files (both training and test, input data labels)
  def LoadData(train_imagefile="Training-Images-224.csv",train_labelfile="Training-Labels-224.txt",
               test_imagefile="Test-Images-224.csv",test_labelfile="Test-Labels-224.txt"):
    
    #Training Data Set
    X=[]
    Individuals=[]
    Species=[]
    Ind_DB=defaultdict(defaultdict)

    
    dataset=np.loadtxt(os.path.join(csvpath,train_imagefile),delimiter=",")
    f=open(os.path.join(csvpath,train_labelfile),'r')
    lines=f.readlines()
    for line in lines:
      vals=line.rstrip()
      Species.append(vals.split("-")[0])
      Individuals.append(vals)
    i=0
    for x in dataset:
      image=x.reshape(224,224,3)
      X.append(image)
      species=Species[i]
      key=Individuals[i]
      spec_DB=Ind_DB[species]
      if key not in spec_DB.keys():
        spec_DB[key]=[image]
      else:
        spec_DB[key].append(image)
      i=i+1


    #Test Data Set
    X_Test=[]
    Individuals_Test=[]
    Species_Test=[]
    dataset=np.loadtxt(os.path.join(csvpath,test_imagefile),delimiter=",")

    for x in dataset:
      image=x.reshape(224,224,3)
      X_Test.append(image)

    f=open(os.path.join(csvpath,test_labelfile),'r')
    lines=f.readlines()
    for line in lines:
      vals=line.rstrip()
      Species_Test.append(vals.split("-")[0])
      Individuals_Test.append(vals)

    X_Test=np.asarray(X_Test)
    X=np.asarray(X)
    return (X,Species,Individuals,Ind_DB,X_Test,Species_Test,Individuals_Test)

In [0]:
# Load Pre-Processed Images

X,Species,Individuals, Ind_DB,X_Test,Species_Test,Individuals_Test=LoadData()

#Use this line for augmented images
#X,Species,Individuals,Ind_DB,X_Test,Species_Test,Individuals_Test=LoadData(train_imagefile="Training-Images-224.csv",train_labelfile="Training-Labels-224.txt",
#              test_imagefile="Test-Images-224.csv",test_labelfile="Test-Labels-224.txt")

In [0]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
le = LabelEncoder()
le.fit(Species)
Y=le.transform(Species)
Y_Test=le.transform(Species_Test)
Y1=to_categorical(np.array(Y))
Y_Test1=to_categorical(np.array(Y_Test))
print(Y1.shape)
print(le.classes_)



(1437, 8)
['Amur Tiger' 'Bengal Tiger' 'Black Rhino' 'Cheetah' 'Leopard'
 'Lowland Tapir' 'Puma' 'White Rhino']


In [0]:

#For Species Classification
from sklearn.model_selection import train_test_split

X_Train, X_Val, Y_Train, Y_Val = train_test_split(X, Y1, test_size=0.10, random_state=42)

In [0]:
## DYNAMIC AUGMENTATION

datagen = ImageDataGenerator(rescale=1/255.,rotation_range=30,
	zoom_range=0.15,
	width_shift_range=0.2,
	height_shift_range=0.2,
	shear_range=0.15,
	fill_mode="nearest",validation_split=0.0)
training_generator = datagen.flow(X_Train, Y_Train, batch_size=32,shuffle=True,seed=7)
validation_generator = datagen.flow(X_Val, Y_Val, batch_size=32,shuffle=True,seed=7)



# VGG16 Model

Reference implementation for both Species Classification and Individual Identification done with VGG16 pretrained model

## Load/Setup Base Model

In [0]:
input_shape=(224,224,3)
vgg=VGG16(weights='imagenet',include_top=False,input_shape=input_shape)
vgg.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

## Species Classification Task

### PreTrained Network - Train all Layers

In [0]:
#Set all layers of pretrained VGG16 model as trainable. Add a few dense layers on top

vgg_model=Sequential()
vgg_model.add(VGG16(weights='imagenet',include_top=False,input_shape=input_shape))
vgg_model.add(Flatten())
vgg_model.add(Dropout(0.4))
vgg_model.add(Dense(256, activation='relu',name="Dense1"))
vgg_model.add(Dense(128, activation='relu'))
vgg_model.add(Dense(64, activation='relu'))
vgg_model.add(Dropout(0.4))
vgg_model.add(Dense(8))

vgg_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 25088)             0         
_________________________________________________________________
Dense1 (Dense)               (None, 256)               6422784   
_________________________________________________________________
dense_3 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_4 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)               

#### First Time training Run

In [0]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
vgg_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.00001),
              loss=loss_fn,
              metrics=['accuracy'])
#STATIC
vgg_model.fit(X_Train,Y_Train,validation_data=(X_Val,Y_Val),epochs=30)

#DYNAMIC ** Note: tried Validation without augmentation (from above) and got ~20% accuracy..
#history = vgg_model.fit_generator(training_generator,steps_per_epoch=(len(X_Train))//32, validation_data=validation_generator,validation_steps=len(X_Val)//32,epochs=30)


NameError: ignored

In [0]:
#Save model
vgg_model.save_weights(os.path.join(csvpath,"vgg-model.h5"))
#vgg_model.save_weights(os.path.join(csvpath,"vgg-model-augmented.h5"))


In [0]:
#EVal;uate on Test Data WITHOUT Augmentation
vgg_model.evaluate(X_Test,  Y_Test1, verbose=2)

5/5 - 1s - loss: 0.0579 - accuracy: 0.9851


[0.057863593101501465, 0.9850746393203735]

In [0]:
#Evaluate on Test Data WITH Augmentation
test_generator = datagen.flow(X_Test, Y_Test1, batch_size=32,shuffle=True,seed=7)
vgg_model.evaluate(test_generator,steps=len(X_Test)//32)



[0.1945144236087799, 0.9453125]

#### Subsequent Runs. - Reload model

In [0]:
vgg_model.load_weights(os.path.join(csvpath,"vgg-model.h5"))

In [0]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
vgg_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.00001),
              loss=loss_fn,
              metrics=['accuracy'])

In [0]:
test_generator = datagen.flow(X_Test, Y_Test1, batch_size=32,shuffle=True,seed=7)
vgg_model.evaluate(X_Test,  Y_Test1, verbose=2)

## Individual Identification Task


### Common Functions

In [0]:
## Functions used in TRIPLES Network Architecture (for Identification)

# Assumes Ind_DB (see load data section) populated with training images, 
# creates set of triples for training a triplets network using batch size specified below.

def GetTriples(batch_size=20,rnd=False):
  if rnd:
    sample_size=int(batch_size/len(Ind_DB.keys()))
  triples=[np.zeros((batch_size,224,224,3))for i in range(3)]
  cnt=0
  while (cnt<batch_size):
    for spec,inds in list(Ind_DB.items()):
      names=list(inds.keys())
      pop_size=len(names)
      if pop_size<2:
        continue
      else:
        if rnd and pop_size>sample_size:
          sample=random.sample(names,sample_size)
        else:
          sample=names

      #print("Sample: ",sample)

      for ind in sample:
        nonmatch=""
        #print(ind)
        key=str(ind)
        pair=random.sample(inds[key],2)
        triples[0][cnt,:,:,:]=pair[0]
        triples[1][cnt,:,:,:]=pair[1]
        while len(nonmatch)==0: 
          x = str(random.sample(names,1)[0])
          #print(x)
          if x!=key:
            nonmatch=x
            #print(nonmatch)
        triples[2][cnt,:,:,:]=random.sample(inds[nonmatch],1)[0]
        cnt=cnt+1
        #print("Iteration complete: ",cnt)
        if cnt==batch_size:
          break
      if cnt==batch_size:
        break
  target=np.zeros((batch_size,768))
  return triples,target


# For use when using keras.modelsfit_generator
def batch_gen(batch_size=20,rnd=False):
  #print("IN!")
  while True:
    triples,targets=GetTriples(batch_size,rnd)
    x= (triples,targets)
    #print(len(triples))
    yield (triples,targets)


#CUstom loss function for Triplets Network
def triplet_loss(y_true,y_pred,alpha=1.0):
  ln=y_pred.shape.as_list()[-1]
  anchor=y_pred[:,0:int(ln/3)]
  positive=y_pred[:,int(ln/3):int(2*ln/3)]
  negative=y_pred[:,int(2*ln/3):ln]

  p_dist=K.sqrt(K.sum(K.square(anchor-positive),axis=1))
  n_dist=K.sqrt(K.sum(K.square(anchor-negative),axis=1))
  loss=K.maximum(p_dist-n_dist+alpha,0.0)
  return K.mean(loss)  


def calcl2(X,prints):
  l2norm=[]
  for i in range(len(prints)):
    l2norm.append(np.linalg.norm(X - prints[i]))
  return l2norm

def Validate(test_data,master_DB,trained_model):
  X=test_data[0]
  species=test_data[1]
  distary=[]
  indary=[]
  support_DB=master_DB[species]
  for individual,prints in support_DB.items():
    prints=np.asarray(prints)
    prints_encoded=trained_model.predict(prints)
    dist=calcl2(X,prints_encoded)
    ind=[individual]*len(dist)
    distary.extend(dist)
    indary.extend(ind)
  order=np.argsort(distary)
  #print(distary)
  #print(order)
  #print(indary)
  #print("Target Individual: ",true_ind)
  #print(distary[i],indary[i])
  return indary[order[0]],indary[order[1]],indary[order[2]]



### Set up Triplets Model

In [0]:
#STart with VGG model used for SPecies classification (assumes loaded per previous section)
vgg_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 25088)             0         
_________________________________________________________________
Dense1 (Dense)               (None, 256)               6422784   
_________________________________________________________________
dense_3 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_4 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)               

In [0]:
#Create Triplets Model Network

x=vgg_model.get_layer('Dense1').output
x = Lambda(lambda  x: K.l2_normalize(x,axis=1))(x)
triplet_model=Model(inputs=vgg_model.input,outputs=x)
input_shape=[224,224,3]
X1=Input(input_shape)
X2=Input(input_shape)
X3=Input(input_shape)
encoded1 = triplet_model(X1)
encoded2 = triplet_model(X2)
encoded3 = triplet_model(X3)

concat_vector=concatenate([encoded1,encoded2,encoded3],axis=-1,name='concat')
model=Model(inputs=[X1,X2,X3],outputs=concat_vector)
model.compile(loss=triplet_loss,optimizer=Adam(0.00001))
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
input_7 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
model (Model)                   (None, 256)          21137472    input_6[0][0]                    
                                                                 input_7[0][0]              

### First time Training 

In [0]:
triples,targets=GetTriples(3000,True)
Anchor = triples[0]
Positive = triples[1]
Negative = triples[2]
Y=targets
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)

mc = ModelCheckpoint('/content/drive/My Drive/WildAI/csv/best_model.h5', monitor='val_loss', mode='min')

model.fit([Anchor,Positive,Negative],y=targets, batch_size=50, epochs=120,verbose=2,validation_split=0.1,callbacks=[es,mc])

In [0]:

model.save_weights("ind-model.h5")
#trained_model=Model(inputs=X1,outputs=encoded1)
#trained_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#trained_model.load_weights("ind-model.h5")
#trained_model.load_weights("/content/drive/My Drive/U C Berkeley - Darragh/csv/best_model.h5")

### EValuate /Test trained Model

In [0]:
#Load previously trained model
trained_model=Model(inputs=X1,outputs=encoded1)
trained_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
#trained_model.load_weights("ind-model.h5")
trained_model.load_weights("/content/drive/My Drive/WildAI/csv/best_model.h5")

### Updated Method: using Mean Reference Evmeddings

In [0]:
#ALternate to KNNs - Establish a Mean vector for each Individual
from sklearn.ensemble import IsolationForest



def create_reference(input,type=0):
  if type==0:   #Simple Mean
    result=np.mean(input,axis=0)
  elif type==1:  #Remove outliers
    std=np.std(input,axis=0, dtype=np.float64)
    mean=np.mean(input,axis=0,dtype=np.float64)
    cutoff=std
    ll=mean-cutoff
    ul=mean+cutoff
    new_mean=np.zeros(input.shape[1])
    for j in range(input.shape[1]):
      keep=[]
      for i in range(input.shape[0]):
        if input[i,j]<ll[j] or input[i,j]>ul[j]:
          continue
        else:
          keep.append(input[i,j])
      new_mean[j]=np.mean(np.array(keep))
    result=new_mean
  elif type==2:  #Random Isolation Forests
    clf = IsolationForest( max_samples=10, random_state = 1, contamination= 'auto')
    preds = clf.fit_predict(input)
    new_mean=np.zeros(input.shape[1])
    for j in range(input.shape[1]):
      keep=[]
      for i in range(input.shape[0]):
        if preds[i]==1:
          keep.append(input[i,j])
      new_mean[j]=np.mean(np.array(keep))
    result=new_mean



  return result  



def FindReferenceEmbeddings(DB,model):
  Ref_Embeddings=defaultdict(defaultdict)
  for species in DB.keys():
    support_DB=DB[species]
    Ref_DB=Ref_Embeddings[species]
    X=[]
    Y=[]
    for individual,prints in support_DB.items():
      if 'Unknown' in individual:
        continue
      else:
        prints=np.asarray(prints)
        prints_encoded=model.predict(prints)
        reference_print=create_reference(prints_encoded,type=2)
        Ref_DB[individual]=reference_print
  return Ref_Embeddings

def findnearest(Ref_Individuals,X):
  inds=[]
  dist=[]
  for individual,embedding in Ref_Individuals.items():
    inds.append(individual)
    dist.append(np.linalg.norm(X - embedding))
  i=np.argmin(np.asarray(dist))
  found=inds[i]
  return found


In [0]:
#Test Results

#trained_model.load_weights("best-model.h5")
Ref_Embeddings=FindReferenceEmbeddings(Ind_DB,trained_model)

X_Test_encoded=trained_model.predict(X_Test)
num=len(X_Test_encoded)
count=defaultdict(int)
correct_count=defaultdict(int)
correct=0
for i in range(num):
  #X_encoded=trained_mode.predict(X_Test[i])  
  x=X_Test_encoded[i]
  species=Species_Test[i]
  count[species]+=1
  true=Individuals_Test[i]
  predicted=findnearest(Ref_Embeddings[species],x.reshape(1,-1))
  #predicted=predict(knns[species],pcas[species],x.reshape(1,-1))
  if true==predicted:
    correct=correct+1
    correct_count[species]+=1
  else:
    print(predicted,'  ----    ',true)
Accuracy=correct/num
print("Overall Accuracy = ",Accuracy)
for species in count.keys():
  print("Accuracy for ",species,": ",correct_count[species]/count[species] )


  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))
  % (self.max_samples, n_samples))


Amur Tiger-682   ----     Amur Tiger-261
Amur Tiger-440   ----     Amur Tiger-279
Amur Tiger-682   ----     Amur Tiger-440
Amur Tiger-237   ----     Amur Tiger-682
Amur Tiger-261   ----     Amur Tiger-1020
Bengal Tiger-India   ----     Bengal Tiger-Fenimore
Bengal Tiger-Aria   ----     Bengal Tiger-India
Bengal Tiger-Rajaji   ----     Bengal Tiger-Lucky
Cheetah-Alvin   ----     Cheetah-Aiko
Cheetah-Kiki   ----     Cheetah-Chiquita
Cheetah-Alvin   ----     Cheetah-Pano
Leopard-Timbila   ----     Leopard-Keanu
Leopard-Timbila   ----     Leopard-Keanu
Leopard-Keanu   ----     Leopard-Shakira
Leopard-Lewa   ----     Leopard-Shakira
Leopard-Timbila   ----     Leopard-Shakira
Leopard-Lewa   ----     Leopard-Mick
Leopard-Lewa   ----     Leopard-Timbila
Leopard-Keanu   ----     Leopard-Wahoo
Lowland Tapir-Chuvisco M   ----     Lowland Tapir-Chuva F
Lowland Tapir-Chuva F   ----     Lowland Tapir-Feminha F
Lowland Tapir-Sorocaba 5   ----     Lowland Tapir-Sorocaba
Lowland Tapir-Sorocaba 5   ----

### Original method - Using K Nearest Neighbors

In [0]:
# Use K-Newares Neighbors to evaluate training/ test results

def FindKNN(X,Y,X_Test,Y_Test):
  k_range=range(1,20)
  scores={}
  scores_list=[]
  for k in k_range:
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(X,Y)
    Y_Pred=knn.predict(X_Test)
    acc=metrics.accuracy_score(Y_Test,Y_Pred)
    scores[k]=acc
    scores_list.append(acc)
  #print(scores)
  return np.argmax(scores_list)


def FitKNNs(DB,model):
  knns={}
  #tsnes={}
  for species in DB.keys():
    support_DB=DB[species]
    X=[]
    Y=[]
    for individual,prints in support_DB.items():
      if 'Unknown' in individual:
        continue
      else:
        prints=np.asarray(prints)
        prints_encoded=model.predict(prints)
        ind=[individual]*len(prints_encoded)
        X.extend(prints_encoded)
        Y.extend(ind)
    #df=pd.DataFrame(X)
    #tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
    #tsne_results = tsne.fit_transform(df)
    #XT=pd.DataFrame()
    #YT=pd.DataFrame(Y)
    #XT['tsne-2d-one'] = tsne_results[:,0]
    #XT['tsne-2d-two'] = tsne_results[:,1]
    X_Train, X_Val, Y_Train, Y_Val = train_test_split(X, Y, test_size=0.20, random_state=42)
    #X_Train, X_Val, Y_Train, Y_Val = train_test_split(XT, YT, test_size=0.20, random_state=42)
    k=FindKNN(X_Train,Y_Train,X_Val,Y_Val)+1
    #print(species," : ",k)
    #k=3
    knn=KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_Train,Y_Train)
    knns[species]=knn
    #tsnes[species]=tsne
  #return knns,tsnes
  return knns

def predict(knn,x):
  #dfx=pd.DataFrame(x)
  #tsne_results = tsne.transform(dfx)
  #XT=pd.DataFrame()
  #XT['tsne-2d-one'] = tsne_results[:,0]
  #XT['tsne-2d-two'] = tsne_results[:,1]
  predicted=knn.predict(x)
  return(predicted)


In [0]:
#Test Results

#trained_model.load_weights("best-model.h5")
knns=FitKNNs(Ind_DB,trained_model)


X_Test_encoded=trained_model.predict(X_Test)
num=len(X_Test_encoded)
count=defaultdict(int)
correct_count=defaultdict(int)
correct=0
for i in range(num):
  #X_encoded=trained_mode.predict(X_Test[i])  
  x=X_Test_encoded[i]
  species=Species_Test[i]
  count[species]+=1
  true=Individuals_Test[i]
  predicted=predict(knns[species],x.reshape(1,-1))
  #predicted=predict(knns[species],pcas[species],x.reshape(1,-1))
  if true==predicted[0]:
    correct=correct+1
    correct_count[species]+=1
  else:
    print(predicted[0],'  ----    ',true)
Accuracy=correct/num
print("Overall Accuracy = ",Accuracy)
for species in count.keys():
  print("Accuracy for ",species,": ",correct_count[species]/count[species] )


Amur Tiger-682   ----     Amur Tiger-261
Amur Tiger-440   ----     Amur Tiger-279
Amur Tiger-682   ----     Amur Tiger-440
Amur Tiger-237   ----     Amur Tiger-682
Amur Tiger-237   ----     Amur Tiger-682
Amur Tiger-261   ----     Amur Tiger-1020
Bengal Tiger-India   ----     Bengal Tiger-Fenimore
Bengal Tiger-Aria   ----     Bengal Tiger-India
Bengal Tiger-Rajaji   ----     Bengal Tiger-Lucky
Bengal Tiger-India   ----     Bengal Tiger-Moki
Cheetah-Kiki   ----     Cheetah-Chiquita
Cheetah-Tearmark   ----     Cheetah-Chiquita
Cheetah-Alvin   ----     Cheetah-Pano
Leopard-Timbila   ----     Leopard-Keanu
Leopard-Timbila   ----     Leopard-Keanu
Leopard-Keanu   ----     Leopard-Shakira
Leopard-Lewa   ----     Leopard-Shakira
Leopard-Timbila   ----     Leopard-Shakira
Leopard-Lewa   ----     Leopard-Mick
Leopard-Lewa   ----     Leopard-Timbila
Leopard-Keanu   ----     Leopard-Wahoo
Lowland Tapir-Chuvisco M   ----     Lowland Tapir-Chuva F
Lowland Tapir-Chuva F   ----     Lowland Tapir-Femi

In [0]:
#ALternate prediction (WORK IN PROGRESS)
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

def norm_predict(x):
  

SyntaxError: ignored

## Visualization of Results

In [0]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [0]:
Y=le.transform(Species)
X_encoded=trained_model.predict(X)
le.classes_

In [0]:
df1=pd.DataFrame(X_encoded)
df1['y']=Y
df1['Names']=Individuals 
df_ATiger=df1[df1.y==0]

df_BTiger=df1[df1.y==1]

df_BRhino=df1[df1.y==2]

df_Cheetah=df1[df1.y==3]

df_Leopard=df1[df1.y==4]

df_LTapir=df1[df1.y==5]

df_Puma=df1[df1.y==6]

df_WRhino=df1[df1.y==7]



In [0]:
def plotprints(df):
  dfx=df.drop(['y','Names'],axis=1)
  tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
  tsne_results = tsne.fit_transform(dfx)
  df['tsne-2d-one'] = tsne_results[:,0]
  df['tsne-2d-two'] = tsne_results[:,1]
  plt.figure(figsize=(16,10))
  num=df['Names'].nunique()

  sns.scatterplot(x="tsne-2d-one", y="tsne-2d-two",
                  hue="Names",
                  palette=sns.color_palette("hls", num),
                  data=df,
                  legend="full",
                  alpha=0.6)



In [0]:
#Plot projected 2D clusters for each species
plotprints(df_WRhino)
plotprints(df_BRhino)
plotprints(df_ATiger)
plotprints(df_BTiger)
plotprints(df_Puma)
plotprints(df_Cheetah)
plotprints(df_Leopard)
plotprints(df_LTapir)

# MobileNetV2 - UNDER CONSTRUCTION

Placeholder Section to reproduce work done above (with VGG16) using MobileNet V2 . 
Note: INCOMPLETE: Needs to be completed

In [0]:
input_shape=(224,224,3)
mnet=MobileNetV2(weights='imagenet',include_top=False,input_shape=input_shape)
mnet.summary()

In [0]:
# Load Pre-Processed Images
  csvpath='/content/drive/My Drive/U C Berkeley - Darragh/csv'
  X,Species,Individuals, Ind_DB,X_Test,Species_Test,Individuals_Test=LoadData("Train-Images-Mobile-224.csv","Train-Labels-Mobile-224.txt","Test-Images-Mobile-224.csv","Test-Labels-Mobile-224.txt")

In [0]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
le = LabelEncoder()
le.fit(Species)
Y=le.transform(Species)
Y_Test=le.transform(Species_Test)
Y1=to_categorical(np.array(Y))
Y_Test1=to_categorical(np.array(Y_Test))
print(Y1.shape)
print(le.classes_)
#For Species Classification
from sklearn.model_selection import train_test_split

X_Train, X_Val, Y_Train, Y_Val = train_test_split(X, Y1, test_size=0.10, random_state=42)

In [0]:

mnet_model=Sequential()
mnet_model.add(mnet)
mnet_model.add(Flatten())
mnet_model.add(Dense(128, activation='relu',name="Dense1"))
mnet_model.add(Dense(64, activation='relu',name="Dense2"))
mnet_model.add(Dropout(0.8))
mnet_model.add(Dense(8))

mnet_model.summary()

In [0]:
loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
mnet_model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.00001),
              loss=loss_fn,
              metrics=['accuracy'])
mnet_model.fit(X_Train,Y_Train,validation_data=(X_Val,Y_Val),epochs=50)

In [0]:
mnet_model.evaluate(X_Test,  Y_Test1, verbose=2)

### Identification Task

In [0]:
mnet_model.summary()

In [0]:
triplet_model=Model(inputs=mnet_model.input,outputs=mnet_model.get_layer('Dense1').output)
triplet_model.summary()

In [0]:
input_shape=[224,224,3]
X1=Input(input_shape)
X2=Input(input_shape)
X3=Input(input_shape)
encoded1 = triplet_model(X1)
encoded2 = triplet_model(X2)
encoded3 = triplet_model(X3)

concat_vector=concatenate([encoded1,encoded2,encoded3],axis=-1,name='concat')
model=Model(inputs=[X1,X2,X3],outputs=concat_vector)
model.compile(loss=triplet_loss,optimizer=Adam(0.00001))
model.summary()

In [0]:
triples,targets=GetTriples(1500,True)
Anchor = triples[0]
Positive = triples[1]
Negative = triples[2]
Y=targets

model.fit([Anchor,Positive,Negative],y=targets, batch_size=40, epochs=60,verbose=2)

In [0]:
model.save_weights("ind-model-mnet.h5")
trained_model=Model(inputs=X1,outputs=encoded1)
trained_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
trained_model.load_weights("ind-model-mnet.h5")

num=len(X_Test)
correct=0
X_encoded=trained_model.predict(X_Test)

for i in range(num):
  #img=x = np.expand_dims(X_Test[i], axis=0)
  #X_encoded=trained_model.predict(X_Test[i])  
  test_data=[X_encoded[i],Species_Test[i]]
  true=Individuals_Test[i]
  predicted,silver,bronze=Validate(test_data,Ind_DB,trained_model)
  print(true,predicted,silver,bronze)
  if true==predicted:
    correct=correct+1
Accuracy=correct/num
print("Accuracy = ",Accuracy)