<a href="https://colab.research.google.com/github/mmuokubamichael/12th-place-solution-Turtle-Recall-Conservation-Challenge/blob/main/tutle_classification_solution_with_EDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os

SOURCE_URL = 'https://storage.googleapis.com/dm-turtle-recall/images.tar'
IMAGE_DIR = './turtle_recall/images'
TAR_PATH = os.path.join(IMAGE_DIR, os.path.basename(SOURCE_URL))
EXPECTED_IMAGE_COUNT = 13891

%sx mkdir --parents "{IMAGE_DIR}"
if len(os.listdir(IMAGE_DIR)) != EXPECTED_IMAGE_COUNT:
  %sx wget --no-check-certificate -O "{TAR_PATH}" "{SOURCE_URL}"
  %sx tar --extract --file="{TAR_PATH}" --directory="{IMAGE_DIR}"
  %sx rm "{TAR_PATH}"

print(f'The total number of images is: {len(os.listdir(IMAGE_DIR))}')

In [None]:
import pandas as pd 
import requests
import io
import urllib.parse

BASE_URL = 'https://storage.googleapis.com/dm-turtle-recall/'

def read_csv_from_web(file_name):
  url = urllib.parse.urljoin(BASE_URL, file_name)  
  content = requests.get(url).content
  return pd.read_csv(io.StringIO(content.decode('utf-8')))

# Read in csv files.
train = read_csv_from_web('train.csv')
test = read_csv_from_web('test.csv')
sample_submission = read_csv_from_web('sample_submission.csv')

# Convert image_location strings to lowercase.
for df in [train, test]:
  df.image_location = df.image_location.apply(lambda x: x.lower())
  assert set(df.image_location.unique()) == set(['left', 'right', 'top'])

In [None]:
train.shape

In [None]:
import numpy as np


In [None]:
turtle_ids = sorted(np.unique(train.turtle_id)) + ['new_turtle']
labels = dict(zip(turtle_ids, np.arange(len(turtle_ids))))
label_lookup = {v: k for k, v in labels.items()}
num_classes = len(labels)
image_to_turtle = dict(zip(train.image_id, train.turtle_id))

image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) 
              if f.split('.')[0] in train.image_id.values]

image_ids = [os.path.basename(f).split('.')[0] for f in image_files]
image_turtle_ids = [image_to_turtle[id] for id in image_ids]

In [None]:
y_target= pd.DataFrame(image_turtle_ids,columns=["target"])

In [None]:
from sklearn.preprocessing import OneHotEncoder,LabelEncoder
target_encode=LabelEncoder()
y_target['target']=target_encode.fit_transform(y_target['target'])

In [None]:
target_onehot=OneHotEncoder()
main_target=target_onehot.fit_transform(y_target[['target']]).toarray()

In [None]:
main_target

In [None]:
import functools
from PIL import Image  # Image utilities.
import tqdm

In [None]:
def crop_and_resize(pil_img):
  """Crops square from center of image and resizes to (224, 224)."""
  w, h = pil_img.size
  crop_size = min(w, h)
  crop = pil_img.crop(
      ((w - crop_size) // 2,
       (h - crop_size) // 2,
       (w + crop_size) // 2,
       (h + crop_size) // 2))
  return crop.resize((224, 224))


tqdm.tqdm._instances.clear()
loaded_images = [crop_and_resize(Image.open(f)) for f in tqdm.tqdm(image_files)]

In [None]:
train=[]
for x in loaded_images:
  train.append(np.array(x))


In [None]:
x_train=np.array(train)
y_train=np.array(main_target)

In [None]:
x_train.shape,y_train.shape


In [None]:
from keras.applications.vgg16 import VGG16
from keras.applications.inception_v3 import InceptionV3
from keras.applications.xception import Xception
from keras.applications.resnet_v2 import ResNet101V2
from keras.applications.resnet_v2 import ResNet152V2
from keras.applications.resnet import ResNet50
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.models import Model
from keras.layers import Dense
from keras.layers import Flatten
from keras import Sequential
from keras.layers import Dense, LSTM
from keras.layers import TimeDistributed

In [None]:
def estimator(model):
  for layer in model.layers:
    layer.trainable = False
  # add new classifier layers
  flat1 = Flatten()(model.layers[-1].output)
  class1 = Dense(1024, activation='relu')(flat1)
  output = Dense(100, activation='softmax')(class1)
  # define new model
  model = Model(inputs=model.inputs, outputs=output)
  model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=[
          'categorical_accuracy'
      ])
  return model

In [None]:
resnet=ResNet50(include_top=False, input_shape=(224, 224, 3))
nasnet=InceptionResNetV2(include_top=False, input_shape=(224, 224, 3))
vg16 = VGG16(include_top=False, input_shape=(224, 224, 3))
inception=InceptionV3(include_top=False, input_shape=(224, 224, 3))
xception=Xception(include_top=False, input_shape=(224, 224, 3))
resnet101v2=ResNet101V2(include_top=False, input_shape=(224, 224, 3))
resnet152v2=ResNet152V2(include_top=False, input_shape=(224, 224, 3))


vg16=estimator(vg16)
inception=estimator(inception)
xception=estimator(xception)
resnet101v2=estimator(resnet101v2)
resnet152v2=estimator(resnet152v2)
resnet=estimator(resnet)
nasnet=estimator(nasnet)

In [None]:
nasnet.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f0810f9b790>

In [None]:
resnet152v2.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f080c0f0710>

In [None]:
resnet101v2.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f079e903ad0>

In [None]:
xception.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f079d126e10>

In [None]:
vg16.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f079da04fd0>

In [None]:
inception.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f079c808510>

In [None]:
resnet.fit(x_train,y_train,epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f079b4b3cd0>

In [None]:
from sklearn.model_selection import KFold
skf=KFold(n_splits=10)

In [None]:
def model_train(estimator,train,label,test):
    test_pred_y = np.zeros((test.shape[0],100))
    for count, (train_index,test_index) in enumerate(skf.split(train,label)):
        x_train,x_test = train[train_index],train[test_index]
        y_train,y_test = label[train_index],label[test_index]
        print(f'========================Fold{count +1}==========================')
        estimator.fit(x_train,y_train,epochs=30)
                              
        
        test_pred= estimator.predict(test)
        test_pred_y +=test_pred
        
    return test_pred_y

In [None]:
predicts=model_train(resnet,x_train,y_train,test_set)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 2

In [None]:
predicts.shape

(490, 100)

In [None]:
tqdm.tqdm._instances.clear()
test_image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) 
                    if f.split('.')[0] in test.image_id.values]
test_image_ids = [os.path.basename(f).split('.')[0] for f in test_image_files]
loaded_test_images = [crop_and_resize(Image.open(f)) for f in tqdm.tqdm(test_image_files)]

100%|██████████| 490/490 [00:20<00:00, 23.92it/s]


In [None]:
test=[]
for x in loaded_test_images:
  test.append(np.array(x))


In [None]:
test_set=np.array(test)

In [None]:
prediction_resnet=resnet.predict(test_set)
prediction_vg16=vg16.predict(test_set)

In [None]:
prediction=prediction_resnet + prediction_vg16

In [None]:
prediction.shape

(490, 100)

In [None]:
prediction_vg16=vg16.predict(test_set)
prediction_inception=inception.predict(test_set)
prediction_xception=xception.predict(test_set)
prediction_resnet101v2=resnet101v2.predict(test_set)
prediction_resnet152v2=resnet152v2.predict(test_set)
prediction_nasnet=nasnet.predict(test_set)

(100,)

In [None]:
prediction=predicts

In [None]:
classes=[]
for i in range(490):
  class_prob=prediction[i]
  top_values_index = sorted(range(len(class_prob)), key=lambda i: class_prob[i])[-5:]
  top_values_index.reverse()
  classes.append(top_values_index)


In [None]:
classes

In [None]:
target_encode

In [None]:
transform_full=[]
for x in classes:
  pred=target_encode.inverse_transform(x)
  transform_full.append(pred)
  

In [None]:
t_id_C0wevyOl', 't_id_EEbWq5Pj', 't_id_G5eoqwD8', 't_id_8b8sprYe

In [None]:
transform_full

In [None]:
transform_full

In [None]:
transform_full

In [None]:
sample_submission.head()

Unnamed: 0,image_id,prediction1,prediction2,prediction3,prediction4,prediction5
0,ID_6NEDKOYZ,new_turtle,t_id_d6aYXtor,t_id_qZ0iZYsC,new_turtle,t_id_d6aYXtor
1,ID_57QZ4S9N,new_turtle,t_id_d6aYXtor,t_id_qZ0iZYsC,new_turtle,t_id_d6aYXtor
2,ID_OCGGJS5X,new_turtle,t_id_d6aYXtor,t_id_qZ0iZYsC,new_turtle,t_id_d6aYXtor
3,ID_R2993S3S,new_turtle,t_id_d6aYXtor,t_id_qZ0iZYsC,new_turtle,t_id_d6aYXtor
4,ID_2E011NB0,new_turtle,t_id_d6aYXtor,t_id_qZ0iZYsC,new_turtle,t_id_d6aYXtor


In [None]:
test_image_ids

In [None]:
transform_full_data_frame=pd.DataFrame(transform_full,columns=['prediction1','prediction2','prediction3','prediction4','prediction5'])

In [None]:
transform_full_data_frame["image_id"]=test_image_ids

In [None]:
transform_full_data_frame.head()

Unnamed: 0,prediction1,prediction2,prediction3,prediction4,prediction5,image_id
0,t_id_EEbWq5Pj,t_id_X3bKBWYW,t_id_2QmcRkNj,t_id_8b8sprYe,t_id_G5eoqwD8,ID_JUQM4S99
1,t_id_8b8sprYe,t_id_n2FBHk6d,t_id_9GFmcOd5,t_id_SwQZGIpa,t_id_D0gA44av,ID_DFT8JWF0
2,t_id_VP2NW7aV,t_id_2E8o5Jtl,t_id_ROFhVsy2,t_id_HxxqrdTx,t_id_AOWArhGb,ID_3QP1M0SV
3,t_id_Ts5LyVQz,t_id_AOWArhGb,t_id_SwQZGIpa,t_id_C0wevyOl,t_id_fxTQ5vHC,ID_G5S2G6IF
4,t_id_4ZfTUmwL,t_id_Ts5LyVQz,t_id_D0gA44av,t_id_B7LaSiac,t_id_d6aYXtor,ID_91E1O0FJ


In [None]:
transform_full_data_frame=transform_full_data_frame[["image_id","prediction1","prediction2","prediction3","prediction4","prediction5"]]

In [None]:
transform_full_data_frame.to_csv("firstsub2.csv",index=False)

In [None]:
class_prob = [0.98,0.50,0.60,0.90,0.87,0.79,0.87]
top_values_index = sorted(range(len(class_prob)), key=lambda i: class_prob[i])[-5:]

In [None]:
top_values_index.reverse()

In [None]:
top_values_index

[0, 3, 6, 4, 5]