In [62]:
import pickle
import imageio
import cv2
import pandas as pd
from sklearn.model_selection import train_test_split

from tqdm import tqdm

import numpy as np
import os
from os import listdir

## Adjust CSV

In [63]:
#adjust names to fit
train_csv = "train.csv"
train_df = pd.read_csv(train_csv)
train_df.species.replace({"globis": "short_finned_pilot_whale",
                          "pilot_whale": "short_finned_pilot_whale",
                          "kiler_whale": "killer_whale",
                          "bottlenose_dolpin": "bottlenose_dolphin"}, inplace=True)

species_labels = list(train_df.species.unique())
images = train_df['image']
sid = train_df['individual_id']
train_df

Unnamed: 0,image,species,individual_id
0,00021adfb725ed.jpg,melon_headed_whale,cadddb1636b9
1,000562241d384d.jpg,humpback_whale,1a71fbb72250
2,0007c33415ce37.jpg,false_killer_whale,60008f293a2b
3,0007d9bca26a99.jpg,bottlenose_dolphin,4b00fe572063
4,00087baf5cef7a.jpg,humpback_whale,8e5253662392
...,...,...,...
51028,fff639a7a78b3f.jpg,beluga,5ac053677ed1
51029,fff8b32daff17e.jpg,cuviers_beaked_whale,1184686361b3
51030,fff94675cc1aef.jpg,blue_whale,5401612696b9
51031,fffbc5dd642d8c.jpg,beluga,4000b3d7c24e


In [64]:
def get_id(sp):
    return species_labels.index(sp)
#encode species
train_df["species"] = train_df.apply(lambda row :get_id(row["species"]),axis = 1)


#train_df = pd.concat([train_df, pd.get_dummies(train_df["species"],prefix='species_',drop_first=True)], axis = 1)
#train_df.drop(['species'],axis=1, inplace=True)
train_df

Unnamed: 0,image,species,individual_id
0,00021adfb725ed.jpg,0,cadddb1636b9
1,000562241d384d.jpg,1,1a71fbb72250
2,0007c33415ce37.jpg,2,60008f293a2b
3,0007d9bca26a99.jpg,3,4b00fe572063
4,00087baf5cef7a.jpg,1,8e5253662392
...,...,...,...
51028,fff639a7a78b3f.jpg,4,5ac053677ed1
51029,fff8b32daff17e.jpg,17,1184686361b3
51030,fff94675cc1aef.jpg,7,5401612696b9
51031,fffbc5dd642d8c.jpg,4,4000b3d7c24e


## Load images

In [None]:
#Training Data
MAX_SIZE = 64
dataset = []
for i,img in enumerate(tqdm(images)): 
    image = imageio.imread("train_images/"+img)
    image = cv2.resize(image, dsize=(64, 64), interpolation=cv2.INTER_CUBIC)
    #dataset.append((image,sid[i]))
    dataset.append(image)


 32%|███████████████████████▉                                                    | 16112/51033 [10:30<28:07, 20.69it/s]

In [38]:
#Testing Data
test_dir = "test_images"
test_dataset = []
for img in tqdm(os.listdir(test_dir)): 
    image = imageio.imread("test_images/"+img)
    image = cv2.resize(image, dsize=(64, 64), interpolation=cv2.INTER_CUBIC)
    test_dataset.append(image)

  1%|▋                                                                             | 261/27956 [00:11<19:45, 23.37it/s]


KeyboardInterrupt: 

In [None]:
X = []
Y = []

for i, image in enumerate(dataset):
   X.append(image)
   Y.append(label.index(train_df["species"][i]))
    
X = np.array(X)
Y = np.array(Y)


X_valid, X_train = X[:5000] / 255.0, X[5000:] / 255.0
y_valid, y_train = Y[:5000], Y[5000:]
X_test = test_dataset / 255.0


data_set = (X_train,y_train)

#save_label = open("base_test.pickle","wb")
#pickle.dump(label, save_label)
#save_label.close()


## Modelling

In [None]:
#generating model
model = keras.Sequential([
    layers.Flatten(input_shape=[64, 64]),
    layers.Dense(128, activation="relu"),
    layers.Dense(64, activation="relu"),
    layers.Dense(10, activation="softmax"),
])
model.summary()
keras.utils.plot_model(model, show_shapes=True)

In [None]:
#compiling model
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(),
              optimizer=keras.optimizers.SGD(),                    
              metrics=[keras.metrics.SparseCategoricalAccuracy()]) 

In [None]:
#training model
epochs = 20
history = model.fit(X_train, y_train, epochs=epochs,
                    validation_data=(X_valid, y_valid))
#saving trained model
with open('base_model.pkl','wb') as f:
    pickle.dump(model,f)

In [None]:
#visualize model performance
accuracy = history.history['sparse_categorical_accuracy']
val_accuracy = history.history['val_sparse_categorical_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(10, 7))
plt.plot(range(epochs), accuracy, "r", label="Training Accuracy")
plt.plot(range(epochs), val_accuracy, "orange", label="Validation Accuracy")
plt.plot(range(epochs), loss, "b", label="Training Loss")
plt.plot(range(epochs), val_loss, "g", label="Validation Loss")
plt.legend(loc="lower left")
plt.gca().set_ylim(0, 1)
plt.grid(True)

plt.show()

In [None]:
#evaluate model
model.evaluate(X_test, y_test)

## Predicting

In [None]:
if model not in globals():
    model = pickle.load(open('base_model.pkl', 'rb'))
    
X_new = X_test
y_proba = model.predict(X_new)
