In [1]:
pip install gdown

In [2]:
import gdown
url = 'https://drive.google.com/uc?id=1UOJcE-YEfPqhF65Gc1wrZx9N16kLLmf6'
output = 'train_test_zip'
gdown.download(url, output)

In [3]:
!unzip train_test_zip

In [4]:
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow.keras as tfk
import tensorflow.keras.layers as tfkl
import random
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.vgg16 import preprocess_input
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
import shutil
import scipy

In [7]:
# Random seed for reproducibility
seed = 42

random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)


dataset_dir  = r'kaggle/working'
training_dir = os.path.join(dataset_dir,'train')
val_dir      = os.path.join(dataset_dir,'val')
labels       = {
    0:'Apple',     
    1:'Blueberry', 
    2:'Cherry',    
    3:'Corn',      
    4:'Grape',     
    5:'Orange',    
    6:'Peach',     
    7:'Pepper',    
    8:'Potato',    
    9:'Raspberry', 
    10:'Soybean',   
    11:'Squash',     
    12:'Strawberry',
    13:'Tomato'
}

In [8]:
# Creating an instance of ImageDataGenerator without Data Augmentation
noaug_train_data_gen = ImageDataGenerator(preprocessing_function=tfk.applications.resnet50.preprocess_input)
valid_data_gen       = ImageDataGenerator(preprocessing_function=tfk.applications.resnet50.preprocess_input)
test_data_gen        = ImageDataGenerator(preprocessing_function=tfk.applications.resnet50.preprocess_input)

os.chdir('/')

noaug_train_gen = noaug_train_data_gen.flow_from_directory(directory=training_dir,
                                                           target_size=(256,256),
                                                           color_mode='rgb',
                                                           classes=None,
                                                           class_mode='categorical',
                                                           batch_size=1,
                                                           shuffle=True,
                                                           seed=seed)

valid_gen = valid_data_gen.flow_from_directory(directory=val_dir,
                                               target_size=(256,256),
                                               color_mode='rgb',
                                               classes=None,
                                               class_mode='categorical',
                                               batch_size=1,
                                               shuffle=False, # no need to shuffle validation set
                                               seed=seed)

# 1st Model

In [None]:
supernet = tfk.applications.ResNet50(
    include_top=False,
    weights="imagenet",
    input_shape=(256,256,3),
)

In [None]:
# Use the supernet as feature extractor
supernet.trainable = False

inputs = tfk.Input(shape=(256,256,3))
x = supernet(inputs)
x1 = tfkl.GlobalAveragePooling2D(name="gl_avg_pool")(x)
x2 = tfkl.Dense(
    512, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x1)
x3 = tfkl.Dropout(0.3, seed=seed)(x2)
x4 = tfkl.Dense(
    256, 
    activation='relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x3)
x5 = tfkl.Dropout(0.3, seed=seed)(x4)
outputs = tfkl.Dense(
    14, 
    activation='softmax',
    kernel_initializer = tfk.initializers.GlorotUniform(seed))(x5)
    
# Connect input and output through the Model class
tl_model = tfk.models.Model(inputs = inputs, outputs = outputs, name = 'model')

# Freeze first N layers, e.g., until 14th
# for i, layer in enumerate(tl_model.get_layer('resnet50').layers[:-14]):
#     layer.trainable=False
# for i, layer in enumerate(tl_model.get_layer('resnet50').layers):
#     print(i, layer.name, layer.trainable)

# Compile the model
tl_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(learning_rate = 1e-4), metrics=[tfk.metrics.CategoricalAccuracy(), tfk.metrics.AUC()])

In [None]:
supernet.trainable = True
for i, layer in enumerate(tl_model.get_layer('resnet50').layers[:-14]):
    layer.trainable=False
for i, layer in enumerate(tl_model.get_layer('resnet50').layers):
    print(i, layer.name, layer.trainable)

In [None]:
# Train the model
os.chdir('/')
tl_history = tl_model.fit(
    x = noaug_train_gen,
    epochs = 150,
    validation_data = valid_gen,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)]
).history

In [None]:
os.chdir('/')
os.chdir('kaggle/working')
tl_model.save('rn_model_with_data_aug')

In [None]:
shutil.make_archive("rn", "zip", "rn_model_with_data_aug")
from IPython.display import FileLink
FileLink(r'rn.zip')

# Model Numebr 2

In [None]:
# Model Number 2
input_layer = tfkl.Input(shape=(256, 256, 3), name='Input')

conv1 = tfkl.Conv2D(
    filters=16,
    kernel_size=(3, 3),
    strides = (1, 1),
    padding = 'same',
    activation = 'relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed)
)(input_layer)
pool1 = tfkl.MaxPooling2D(
    pool_size = (2, 2)
)(conv1)

conv2 = tfkl.Conv2D(
    filters=32,
    kernel_size=(3, 3),
    strides = (1, 1),
    padding = 'same',
    activation = 'relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed)
)(pool1)
pool2 = tfkl.MaxPooling2D(
    pool_size = (2, 2)
)(conv2)

conv3 = tfkl.Conv2D(
    filters=64,
    kernel_size=(3, 3),
    strides = (1, 1),
    padding = 'same',
    activation = 'relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed)
)(pool2)
pool3 = tfkl.MaxPooling2D(
    pool_size = (2, 2)
)(conv3)

conv4 = tfkl.Conv2D(
    filters=128,
    kernel_size=(3, 3),
    strides = (1, 1),
    padding = 'same',
    activation = 'relu',
    kernel_initializer = tfk.initializers.GlorotUniform(seed)
)(pool3)
pool4 = tfkl.MaxPooling2D(
    pool_size = (2, 2)
)(conv4)

x1 = tfkl.GlobalAveragePooling2D(name="gl_avg_pool")(pool4)
# flattening_layer = tfkl.Flatten(name='Flatten')(pool4)
flattening_layer = tfkl.Dropout(0.3, seed=seed)(x1)
classifier_layer = tfkl.Dense(units=512, activation='relu')(flattening_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
classifier_layer = tfkl.Dense(units=512, activation='relu')(classifier_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
classifier_layer = tfkl.Dense(units=512, activation='relu')(classifier_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
output_layer     = tfkl.Dense(units=14, activation='softmax', name='Output')(classifier_layer)
    
simple_model = tfk.models.Model(inputs=input_layer, outputs=output_layer)
simple_model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=[tfk.metrics.CategoricalAccuracy(), tfk.metrics.AUC()])

In [None]:
sm_history = simple_model.fit(
    x = noaug_train_gen,
    epochs = 150,
    validation_data = valid_gen,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)]
).history

In [None]:
os.chdir('/')
os.chdir('kaggle/working')
simple_model.save('simple_model_with_data_aug')
shutil.make_archive("sm_model", "zip", "simple_model_with_data_aug")

In [None]:
from IPython.display import FileLink
FileLink(r'sm_model.zip')

# Model Number 3

In [None]:
# Model Number 2
input_layer = tfkl.Input(shape=(256, 256, 3), name='Input')

conv1 = tfkl.Conv2D(filters=16, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(input_layer)
pool1 = tfkl.MaxPooling2D(pool_size = (2, 2))(conv1)

conv2 = tfkl.Conv2D(filters=32, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(pool1)
pool2 = tfkl.MaxPooling2D(pool_size = (2, 2))(conv2)

conv3 = tfkl.Conv2D(filters=64, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(pool2)
pool3 = tfkl.MaxPooling2D(pool_size = (2, 2))(conv3)

conv4 = tfkl.Conv2D(filters=128, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(pool3)
pool4 = tfkl.MaxPooling2D(pool_size = (2, 2))(conv4)


conv5 = tfkl.Conv2D(filters=128, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(pool4)
conv6 = tfkl.Conv2D(filters=128, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(conv5)
conv7 = tfkl.Conv2D(filters=128, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(conv6)
conv8 = tfkl.Conv2D(filters=128, kernel_size=(3, 3), strides = (1, 1), padding = 'same', activation = 'relu', kernel_initializer = tfk.initializers.GlorotUniform(seed))(conv7)

x1 = tfkl.GlobalAveragePooling2D(name="gl_avg_pool")(conv8)
# flattening_layer = tfkl.Flatten(name='Flatten')(pool4)
flattening_layer = tfkl.Dropout(0.3, seed=seed)(x1)
classifier_layer = tfkl.Dense(units=512, activation='relu')(flattening_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
classifier_layer = tfkl.Dense(units=512, activation='relu')(classifier_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
classifier_layer = tfkl.Dense(units=512, activation='relu')(classifier_layer)
classifier_layer = tfkl.Dropout(0.3, seed=seed)(classifier_layer)
output_layer     = tfkl.Dense(units=14, activation='softmax', name='Output')(classifier_layer)
    
simple_model2 = tfk.models.Model(inputs=input_layer, outputs=output_layer)
simple_model2.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics=[tfk.metrics.CategoricalAccuracy(), tfk.metrics.AUC()])

In [None]:
os.chdir('/')
sm2_history = simple_model2.fit(
    x = noaug_train_gen,
    epochs = 150,
    validation_data = valid_gen,
    callbacks = [tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)]
).history

In [None]:
os.chdir('/')
os.chdir('kaggle/working')
simple_model.save('sm2_with_data_aug')
shutil.make_archive("sm2_model", "zip", "sm2_with_data_aug")

In [None]:
from IPython.display import FileLink
FileLink(r'sm_model.zip')

# Implementing hard voting

In [9]:
os.chdir('/kaggle/working')

In [10]:
os.mkdir('inc_rn')
os.chdir('inc_rn')
url = 'https://drive.google.com/uc?id=1WwKL9qFj8Qxj-bselgg-5x5NyRCZsoh2'
output = 'inc_rn'
gdown.download(url, output)
!unzip inc_rn
os.chdir('..')

In [11]:
os.mkdir('rn')
os.chdir('rn')
url = 'https://drive.google.com/uc?id=1e4xa2sJFGQMfyoKKuuS-YKKhRL2KDO8z'
output = 'rn'
gdown.download(url, output)
!unzip rn
os.chdir('..')

In [12]:
os.mkdir('sm')
os.chdir('sm')
url = 'https://drive.google.com/uc?id=13Q4eYoPFVlVdaJdHhhHJKMJAbDPV4Ez1'
output = 'sm'
gdown.download(url, output)
!unzip sm
os.chdir('..')

In [13]:
os.mkdir('sm2')
os.chdir('sm2')
url = 'https://drive.google.com/uc?id=1C1dqu7B1dEE5vrUcSB6NPg0U-sPSlgQK'
output = 'sm2'
gdown.download(url, output)
!unzip sm2
os.chdir('..')

In [None]:
os.getcwd()

In [14]:
# load models to perform soft voting
# model 1: ResNet50
# model 2: InceptionResNetV2
# model 3: simple model 1
# model 4: simple model 2
os.chdir('/kaggle/working')

inc_rn = tfk.models.load_model('inc_rn')
rn     = tfk.models.load_model('rn')
sm     = tfk.models.load_model('sm')
sm2    = tfk.models.load_model('sm2')

In [None]:
os.chdir('/')
models = [inc_rn, rn, sm, sm2]
models_preds = []
for model in models:
    models_preds.append(np.argmax(model.predict(valid_gen), axis=1))
    print("{} done!".format(str(model)))


In [None]:
models_preds = np.array(models_preds)
models_preds = np.transpose(models_preds, (1, 0))
models_preds = scipy.stats.mode(models_preds, axis=1)[0]
models_preds = np.squeeze(models_preds)

In [None]:
acc   = 0
pos   = 0
total = len(models_preds)
for i in range(len(models_preds)):
    if models_preds[i] == valid_gen.labels[i]:
        pos += 1

acc = pos/total
print("accuracy is: {}".format(acc))

In [None]:
print('hi')

In [47]:
class CustomLayer(tfk.layers.Layer):
    
    def __init__(self, units = 1, **kwargs):
        self.units = 1
        
        super(CustomLayer, self).__init__(**kwargs)
        
        
    def call(self, inputs):
        from collections import Counter
            
        
        os.chdir('/kaggle/working')
#         inc_rn = tfk.models.load_model('inc_rn')
#         rn     = tfk.models.load_model('rn')
#         sm     = tfk.models.load_model('sm')
#         sm2    = tfk.models.load_model('sm2')
        models = [inc_rn, rn, sm, sm2]
        models_preds = []

        for model in models:
            model_output = tf.math.argmax(model(inputs), axis=1)
#             unique, _, count = tf.unique_with_counts(model_output)
#             max_occurrences = tf.reduce_max(count)
#             max_cond = tf.equal(count, max_occurrences)
#             max_numbers = tf.squeeze(tf.gather(unique, tf.where(max_cond)))
            models_preds.append(model_output)
#         models_pred.append(sm.predict(inputs))
        
#         models_preds = np.array(models_preds)
#         samples = tf.transpose(models_preds)
#         ensemble_model_pred = scipy.stats.mode(models_preds, axis=0)[0]
        
#         ensemble_model_preds = []
#         for i in range(len(models_preds[0])):
#             mode_of = []
#             for j in range(len(models_preds)):
#                 mode_of.append(models_preds[j][i].ref())
#             ensemble_model_preds.append(Counter(mode_of).most_common()[0][0])
        
        
        
        

#         models_preds = tf.squeeze(max_numbers)
        
#         output = [1,2,3]
        return models_preds
#     def compute_output_shape(self, input_shape):
#         return ()

        

In [None]:
os.chdir('/')
np.argmax(sm2.predict(valid_gen), axis=1)

In [48]:
os.chdir('/')
inputs1       = tfkl.Input(shape=(256, 256, 3))
inter_layer1  = CustomLayer(units=1)
layer1_output = inter_layer1(inputs1)
# inter_layer2  = CustomLayer2(units=1)
# layer2_output = inter_layer2(layer1_output)
final_model   = tfk.models.Model(inputs = inputs1, outputs = layer1_output)

In [49]:
final_model.compile(loss=tfk.losses.CategoricalCrossentropy(), #continu together
              optimizer=tfk.optimizers.Adam(),
              metrics=['accuracy'])

In [None]:
final_model.summary()

In [51]:
os.chdir('/kaggle/working/val')

In [52]:
os.chdir('/')
preds = final_model.predict(valid_gen)

In [54]:
os.chdir('/kaggle/working')
final_model.save('ensemble_model')

In [34]:
from IPython import 

In [29]:
from statistics import mode
for i in range(len(preds[0])):
    mode_of = []
    for j in range(len(preds)):
        mode_of.append(preds[j][i])
    print(mode(mode_of))
    #     print(preds[1][1])

In [None]:
acc   = 0
pos   = 0
total = len(preds)
for i in range(len(preds)):
    if preds[i] == valid_gen.labels[i]:
        pos += 1

acc = pos/total
print("accuracy is: {}".format(acc))

In [None]:
inputs = np.array([[1,2,3,4],
                   [5,1,2,9],
                   [3,2,56,3]])
argmax_list = np.zeros((inputs.shape[0]))
for i in range(inputs.shape[0]):
    maximum = np.max(inputs[i])
    for j in range(inputs[i].shape[0]):
        if maximum == inputs[i][j]:
            argmax_list[i] = j

In [None]:
argmax_list