# Data preparation for models training #

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# RUN THIS
import os
import shutil
import numpy as np
import zipfile
import math
from functools import partial
import tensorflow as tf
import random

In [None]:
!pip install --upgrade --force-reinstall --no-deps kaggle

In [None]:
! mkdir ~/.kaggle

In [None]:
! cp kaggle.json ~/.kaggle/

In [None]:
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle competitions download tpu-getting-started

In [None]:
with zipfile.ZipFile("/content/tpu-getting-started.zip", 'r') as zip_ref:
    zip_ref.extractall("/content")

In [None]:
# RUN THIS

os.mkdir("flower_classification")
os.mkdir("flower_classification/data_before_split")
os.mkdir("flower_classification/training")
os.mkdir("flower_classification/validation")
os.mkdir("flower_classification/test")

In [None]:
# RUN THIS

temp_classes = ['pink primrose',    'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea',     'wild geranium',     'tiger lily',           'moon orchid',              'bird of paradise', 'monkshood',        'globe thistle',         # 00 - 09
           'snapdragon',       "colt's foot",               'king protea',      'spear thistle', 'yellow iris',       'globe-flower',         'purple coneflower',        'peruvian lily',    'balloon flower',   'giant white arum lily', # 10 - 19
           'fire lily',        'pincushion flower',         'fritillary',       'red ginger',    'grape hyacinth',    'corn poppy',           'prince of wales feathers', 'stemless gentian', 'artichoke',        'sweet william',         # 20 - 29
           'carnation',        'garden phlox',              'love in the mist', 'cosmos',        'alpine sea holly',  'ruby-lipped cattleya', 'cape flower',              'great masterwort', 'siam tulip',       'lenten rose',           # 30 - 39
           'barberton daisy',  'daffodil',                  'sword lily',       'poinsettia',    'bolero deep blue',  'wallflower',           'marigold',                 'buttercup',        'daisy',            'common dandelion',      # 40 - 49
           'petunia',          'wild pansy',                'primula',          'sunflower',     'lilac hibiscus',    'bishop of llandaff',   'gaura',                    'geranium',         'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           'cautleya spicata', 'japanese anemone',          'black-eyed susan', 'silverbush',    'californian poppy', 'osteospermum',         'spring crocus',            'iris',             'windflower',       'tree poppy',            # 60 - 69
           'gazania',          'azalea',                    'water lily',       'rose',          'thorn apple',       'morning glory',        'passion flower',           'lotus',            'toad lily',        'anthurium',             # 70 - 79
           'frangipani',       'clematis',                  'hibiscus',         'columbine',     'desert-rose',       'tree mallow',          'magnolia',                 'cyclamen ',        'watercress',       'canna lily',            # 80 - 89
           'hippeastrum ',     'bee balm',                  'pink quill',       'foxglove',      'bougainvillea',     'camellia',             'mallow',                   'mexican petunia',  'bromelia',         'blanket flower',        # 90 - 99
           'trumpet creeper',  'blackberry lily',           'common tulip',     'wild rose']

classes = {}

for i in range( len( temp_classes  ) ):
  classes[i] = temp_classes[i]
  

In [None]:
# RUN THIS AFTER RUNNING THE NEXT TWO CELLS


datasets = [ "../input/tpu-getting-started/tfrecords-jpeg-192x192" ,
            "../input/tpu-getting-started/tfrecords-jpeg-224x224" ,
            "../input/tpu-getting-started/tfrecords-jpeg-331x331" ,
            "../input/tpu-getting-started/tfrecords-jpeg-512x512"]
image_sizes = [[192,192] , [224,224] , [331,331] , [512,512] ]
id_labeled = 0
id_unlabeled = 0 
for i in range( len(datasets) ) :
  labeled_data_file_names = []
  unlabeled_data_file_names = []
  for j in os.listdir(datasets[i] + "/train"):
    labeled_data_file_names.append( datasets[i] + "/train" + "/" + j )
  for j in os.listdir(datasets[i] + "/val"):
    labeled_data_file_names.append( datasets[i] + "/val" + "/" + j )
  for j in os.listdir(datasets[i] + "/test"):
    unlabeled_data_file_names.append( datasets[i] + "/test" + "/" + j )
  examples = convert_tfrecords_to_jpeg( labeled_data_file_names )
  for example in examples:
    save_image( example , image_sizes[i] , "./flower_classification/data_before_split/" , id_labeled )
    id_labeled += 1
  examples = convert_tfrecords_to_jpeg( unlabeled_data_file_names , labeled = False )
  for example in examples:
    save_image( example , image_sizes[i] , "./flower_classification/test/" , id_unlabeled , labeled = False )
    id_unlabeled += 1

In [None]:
# RUN THIS

def save_image( example , image_size , save_path , id , labeled = True ):
  image = tf.image.decode_jpeg( example['image'] , channels=3)
  image = tf.cast(image, tf.float32)
  image = tf.reshape(image, [ *image_size  , 3])
  image = image.numpy()
  if labeled:
    if not os.path.isdir( save_path + classes[ example['class'].numpy().squeeze() ] ):
      os.mkdir( save_path + classes[ example['class'].numpy().squeeze() ] )
    tf.keras.utils.save_img(
        save_path + classes [ example['class'].numpy().squeeze() ] + "/" + str(id) + '.jpeg'  , image , scale=True
    )
  else:
    tf.keras.utils.save_img(
        save_path  + str(id) + '.jpeg'  , image , scale=True
    )

In [None]:
# RUN THIS

def convert_tfrecords_to_jpeg( files_paths , labeled = True ):
  # Define features
  read_features = (
        {
            "image": tf.io.FixedLenFeature([], tf.string),
            "class": tf.io.FixedLenFeature([], tf.int64),
        }
        if labeled
        else {"image": tf.io.FixedLenFeature([], tf.string),}
  )
  examples = []
  for path in files_paths:
    for example in tf.compat.v1.io.tf_record_iterator( path ):
      example = tf.io.parse_single_example( example , read_features )
      examples.append(example)
  return examples

In [None]:
# RUN THIS


for class_name in os.listdir("./flower_classification/data_before_split"):
  os.mkdir("./flower_classification/training/" + class_name )
  os.mkdir("./flower_classification/validation/" + class_name )
  class_images = os.listdir( "./flower_classification/data_before_split/" + class_name )
  random.shuffle(class_images)
  training_images = class_images[ : int( 0.9 * len(class_images) ) ]
  val_images = class_images[ int( 0.9 * len(class_images) ) :  ]
  for image in training_images:
    shutil.copyfile( "./flower_classification/data_before_split/" + class_name + "/" + image ,
                "./flower_classification/training/" + class_name + '/' + image )
  for image in val_images:
    shutil.copyfile( "./flower_classification/data_before_split/" + class_name + "/" + image ,
                "./flower_classification/validation/" + class_name + '/' + image )

In [None]:
from google.colab import drive
drive.flush_and_unmount()

# Models part #

----------------------------------------------- Setting image data generators ------------------------------------

In [None]:
!pip install tensorflow_addons==0.15.0

In [None]:
import math
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random 
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Flatten,Conv2D, MaxPooling2D,BatchNormalization,LayerNormalization
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.models import Model
from keras import optimizers
import tensorflow_addons as tfa
from tensorflow.keras.metrics import Accuracy

In [None]:
train_batch_size = 1024
val_batch_size =  512                
image_size = ( 299 , 299 )               

train_datagen = ImageDataGenerator( rescale = 1./255 ,
                                   rotation_range = 40 ,
                                   width_shift_range = 0.2 ,
                                   height_shift_range = 0.2,
                                   zoom_range = 0.2,
                                   fill_mode = 'nearest',
                                   horizontal_flip = True )

val_datagen = ImageDataGenerator(rescale = 1./255)

train_path = "./flower_classification/training"

val_path = "./flower_classification/validation"

train_data = train_datagen.flow_from_directory( train_path ,
                                               target_size = image_size ,
                                               class_mode = 'categorical',
                                               batch_size = train_batch_size)

val_data = val_datagen.flow_from_directory( val_path ,
                                          target_size = image_size ,
                                          class_mode = 'categorical',
                                          batch_size = val_batch_size )

In [None]:
inception_resnet = InceptionResNetV2(
    include_top=False, weights='imagenet',
    input_shape= (299,299,3)
)

for layer in inception_resnet.layers:
  layer.trainable = False

x = Flatten()(inception_resnet.output)
x = Dense ( 256 , activation = 'relu' )(x)
x = Dense( 128 , activation = 'relu' )(x)
x = Dense ( 128 , activation = 'relu' ) (x)
x = Dense(104, activation='softmax')(x)
model = Model(inputs= inception_resnet.input , outputs = x )
model.summary()

In [None]:
os.remove("./fscore_weights.h5")
os.remove("./acc_weights.h5")

In [None]:
import os
os.remove("./model_history_log.csv")

In [None]:
accuracy_checkpoint_filepath = './acc_weights.h5'
fscore_checkpoint_filepath = './fscore_weights.h5'

accuracy_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath= accuracy_checkpoint_filepath ,
    save_weights_only=True,
    monitor='val_accuracy',
    mode='max',
    save_best_only=True)

fscore_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath= fscore_checkpoint_filepath ,
    save_weights_only=True,
    monitor='val_my_fscore',
    mode='max',
    save_best_only=True)

In [None]:
fscore_metric = tfa.metrics.F1Score(num_classes=104,average = 'macro' , name = 'my_fscore')

#accuracy_metric = Accuracy(name = 'my_accuracy')

adam = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(optimizer=adam , loss="categorical_crossentropy" , metrics=[ fscore_metric , 'accuracy' ])

In [None]:
from keras.callbacks import CSVLogger
csv_logger = CSVLogger("./model_history_log.csv", append=True)

In [None]:
model.load_weights("./fscore_weights.h5")

In [None]:
model_history = model.fit(
    x=train_data, epochs=15 , verbose='auto',validation_data=val_data,
    steps_per_epoch = int(59234. / train_batch_size) , validation_steps = int( 6626. / val_batch_size) ,
    callbacks = [ fscore_callback , accuracy_callback , csv_logger ]
)