# Step 1: Imports #

We begin by importing several Python packages.

In [None]:
import math, re, os
import numpy as np
import tensorflow as tf


from tensorflow.keras.applications import VGG16
from tensorflow.keras import Sequential
from keras.layers import *
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report,confusion_matrix
import seaborn as sns
tf.random.set_seed(20)
np.random.seed(20)
print("Tensorflow version " + tf.__version__)







# Step 2: Distribution Strategy #

A TPU has eight different *cores* and each of these cores acts as its own accelerator. (A TPU is sort of like having eight GPUs in one machine.) We tell TensorFlow how to make use of all these cores at once through a **distribution strategy**. Run the following cell to create the distribution strategy that we'll later apply to our model.

In [None]:
# Detect TPU, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICAS: ", strategy.num_replicas_in_sync)

We'll use the distribution strategy when we create our neural network model. Then, TensorFlow will distribute the training among the eight TPU cores by creating eight different *replicas* of the model, one for each core.

# Step 3: Loading the Competition Data #

## Get GCS Path ##

When used with TPUs, datasets need to be stored in a [Google Cloud Storage bucket](https://cloud.google.com/storage/). You can use data from any public GCS bucket by giving its path just like you would data from `'/kaggle/input'`. The following will retrieve the GCS path for this competition's dataset.

In [None]:
from kaggle_datasets import KaggleDatasets

GCS_DS_PATH = KaggleDatasets().get_gcs_path('tpu-getting-started')
print(GCS_DS_PATH) # what do gcs paths look like?

You can use data from any public dataset here on Kaggle in just the same way. If you'd like to use data from one of your private datasets, see [here](https://www.kaggle.com/docs/tpu#tpu3pt5).

## Load Data ##

When used with TPUs, datasets are often serialized into [TFRecords](https://www.kaggle.com/ryanholbrook/tfrecords-basics). This is a format convenient for distributing data to each of the TPUs cores. We've hidden the cell that reads the TFRecords for our dataset since the process is a bit long. You could come back to it later for some guidance on using your own datasets with TPUs.

In [None]:

IMAGE_SIZE = [192, 192]
GCS_PATH = GCS_DS_PATH + '/tfrecords-jpeg-192x192'
AUTO = tf.data.experimental.AUTOTUNE

TRAINING_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/train/*.tfrec')
VALIDATION_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/val/*.tfrec')
TEST_FILENAMES = tf.io.gfile.glob(GCS_PATH + '/test/*.tfrec') 

CLASSES = ['pink primrose',    'hard-leaved pocket orchid', 'canterbury bells', 'sweet pea',     'wild geranium',     'tiger lily',           'moon orchid',              'bird of paradise', 'monkshood',        'globe thistle',         # 00 - 09
           'snapdragon',       "colt's foot",               'king protea',      'spear thistle', 'yellow iris',       'globe-flower',         'purple coneflower',        'peruvian lily',    'balloon flower',   'giant white arum lily', # 10 - 19
           'fire lily',        'pincushion flower',         'fritillary',       'red ginger',    'grape hyacinth',    'corn poppy',           'prince of wales feathers', 'stemless gentian', 'artichoke',        'sweet william',         # 20 - 29
           'carnation',        'garden phlox',              'love in the mist', 'cosmos',        'alpine sea holly',  'ruby-lipped cattleya', 'cape flower',              'great masterwort', 'siam tulip',       'lenten rose',           # 30 - 39
           'barberton daisy',  'daffodil',                  'sword lily',       'poinsettia',    'bolero deep blue',  'wallflower',           'marigold',                 'buttercup',        'daisy',            'common dandelion',      # 40 - 49
           'petunia',          'wild pansy',                'primula',          'sunflower',     'lilac hibiscus',    'bishop of llandaff',   'gaura',                    'geranium',         'orange dahlia',    'pink-yellow dahlia',    # 50 - 59
           'cautleya spicata', 'japanese anemone',          'black-eyed susan', 'silverbush',    'californian poppy', 'osteospermum',         'spring crocus',            'iris',             'windflower',       'tree poppy',            # 60 - 69
           'gazania',          'azalea',                    'water lily',       'rose',          'thorn apple',       'morning glory',        'passion flower',           'lotus',            'toad lily',        'anthurium',             # 70 - 79
           'frangipani',       'clematis',                  'hibiscus',         'columbine',     'desert-rose',       'tree mallow',          'magnolia',                 'cyclamen ',        'watercress',       'canna lily',            # 80 - 89
           'hippeastrum ',     'bee balm',                  'pink quill',       'foxglove',      'bougainvillea',     'camellia',             'mallow',                   'mexican petunia',  'bromelia',         'blanket flower',        # 90 - 99
           'trumpet creeper',  'blackberry lily',           'common tulip',     'wild rose']                                                                                                                                               # 100 - 102


def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.cast(image, tf.float32) / 255.0  # convert image to floats in [0, 1] range
    image = tf.reshape(image, [*IMAGE_SIZE, 3]) # explicit size needed for TPU
    return image

def read_labeled_tfrecord(example):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "class": tf.io.FixedLenFeature([], tf.int64),  # shape [] means single element
    }
    example = tf.io.parse_single_example(example, LABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    label = tf.cast(example['class'], tf.int32)
    return image, label # returns a dataset of (image, label) pairs

def read_unlabeled_tfrecord(example):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string), # tf.string means bytestring
        "id": tf.io.FixedLenFeature([], tf.string),  # shape [] means single element
        # class is missing, this competitions's challenge is to predict flower classes for the test dataset
    }
    example = tf.io.parse_single_example(example, UNLABELED_TFREC_FORMAT)
    image = decode_image(example['image'])
    idnum = example['id']
    return image, idnum # returns a dataset of image(s)

def load_dataset(filenames, labeled=True, ordered=False):
    # Read from TFRecords. For optimal performance, reading from multiple files at once and
    # disregarding data order. Order does not matter since we will be shuffling the data anyway.

    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False # disable order, increase speed

    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=AUTO) # automatically interleaves reads from multiple files
    dataset = dataset.with_options(ignore_order) # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(read_labeled_tfrecord if labeled else read_unlabeled_tfrecord, num_parallel_calls=AUTO)
    # returns a dataset of (image, label) pairs if labeled=True or (image, id) pairs if labeled=False
    return dataset

In [None]:
TRAINING_FILENAMES

In [None]:
mydataset = load_dataset(TRAINING_FILENAMES, labeled=True)
mydataset

In [None]:
only10classes = ['pink primrose', 
                 'snapdragon', 
                 'purple coneflower', 
                 'king protea', 
                 'wild geranium',    
                 'tiger lily',         
                'peruvian lily',   
                 'bird of paradise',
                 'monkshood',        
                 'globe thistle', 
           ]     
len(only10classes)

In [None]:
def display_20_image(images):
    
    w = 10
    h = 10
    fig = plt.figure(figsize=(9, 13))
    columns = 4
    rows = 5
    # prep (x,y) for extra plotting
    xs = np.linspace(0, 2*np.pi, 60)  # from 0 to 2pi
    ys = np.abs(np.sin(xs))           # absolute of sine
    ax = []

    for i in range(columns*rows):
        img = np.random.randint(10, size=(h,w))
        ax.append( fig.add_subplot(rows, columns, i+1) )
        plt.imshow(images[i].reshape(images[i].shape))

    # do extra plots on selected axes/subplots
    # note: index starts with 0
    ax[2].plot(xs, 3*ys)
    ax[19].plot(ys**2, xs)

    plt.show()  # finally, render the plot


In [None]:
mydataset

In [None]:

class0images = []
class1images = []
class2images = []
class3images = []
class4images = []
class5images = []
class6images = []
class7images = []
class8images = []
class9images = []

for images, labels in mydataset:  # only take first element of dataset
    #print(images.numpy().shape)
    if int(labels.numpy()) == 0:
        class0images.append(images.numpy())
    if int(labels.numpy()) == 10:
        class1images.append(images.numpy())
    if int(labels.numpy()) == 16:
        class2images.append(images.numpy())
    if int(labels.numpy()) == 12:
        class3images.append(images.numpy())
    if int(labels.numpy()) == 4:
        class4images.append(images.numpy())
    if int(labels.numpy()) == 5:
        class5images.append(images.numpy())
    if int(labels.numpy()) == 17:
        class6images.append(images.numpy())
    if int(labels.numpy()) == 7:
        class7images.append(images.numpy())
    if int(labels.numpy()) == 8:
        class8images.append(images.numpy())
    if int(labels.numpy()) == 9:
        class9images.append(images.numpy())
print("done")

In [None]:
print(len(class0images))
print(len(class1images))
print(len(class2images))
print(len(class3images))
print(len(class4images))
print(len(class5images))
print(len(class6images))
print(len(class7images))
print(len(class8images))
print(len(class9images))


In [None]:
train=np.concatenate([class0images[:50],class1images[:50],class2images[:50],class3images[:50],class4images[:50],class5images[:50],class6images[:50],class7images[:50],class8images[:50],class9images[:50]])

In [None]:
train.shape

In [None]:
label0 =(len(class0images)*"pink_primrose ").split(' ')[:-1]
label1 =(len(class1images)*"snapdragon ").split(' ')[:-1]
label2 =(len(class2images)*"purple_coneflower ").split(' ')[:-1]
label3 =(len(class3images)*"king_protea ").split(' ')[:-1]
label4 =(len(class4images)*"wild_geranium ").split(' ')[:-1]
label5 =(len(class5images)*"tiger_lily ").split(' ')[:-1]
label6 =(len(class6images)*"peruvian_lily ").split(' ')[:-1]
label7 =(len(class7images)*"birdof_p_aradise ").split(' ')[:-1]
label8 =(len(class8images)*"monkshood ").split(' ')[:-1]
label9 =(len(class9images)*"globe_thistle ").split(' ')[:-1]

In [None]:
label0 =(len(class0images)*"0 ").split(' ')[:-1]
label1 =(len(class1images)*"1 ").split(' ')[:-1]
label2 =(len(class2images)*"2 ").split(' ')[:-1]
label3 =(len(class3images)*"3 ").split(' ')[:-1]
label4 =(len(class4images)*"4 ").split(' ')[:-1]
label5 =(len(class5images)*"5 ").split(' ')[:-1]
label6 =(len(class6images)*"6 ").split(' ')[:-1]
label7 =(len(class7images)*"7 ").split(' ')[:-1]
label8 =(len(class8images)*"8 ").split(' ')[:-1]
label9 =(len(class9images)*"9 ").split(' ')[:-1]

In [None]:
labels=np.concatenate([label0[:50],label1[:50],label2[:50],label3[:50],label4[:50],label5[:50],label6[:50],label7[:50],label8[:50],label9[:50]])
labels

In [None]:
labels.shape

In [None]:
display_20_image(train)

In [None]:
from skimage.exposure import equalize_adapthist
from skimage.transform import resize
new_train=[]
for i in train:
    new_train.append((equalize_adapthist(i)))
    
display_20_image(new_train)
new_train=np.array(new_train)

In [None]:
mydataset=[]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(new_train, labels, test_size=0.2, random_state=42)


In [None]:
Base_model = VGG16(include_top= False, weights='imagenet',input_shape=(192,192,3), pooling='avg')
Base_model.trainable = False

In [None]:
data_augmentation = tf.keras.Sequential([
  RandomFlip("horizontal_and_vertical"),
  RandomRotation(0.1),
])

new_X=[]
new_y=[]
for i in range(len(X_train)):
    for j in range(8):
        new_X.append(data_augmentation(X_train[i]))
        new_y.append(y_train[i])
new_X=np.array(new_X)
new_X.shape

In [None]:
es = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)


In [None]:

model = Sequential(Base_model.layers)


model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))


# adding prediction(softmax) layer
model.add(Dense(10,activation="softmax"))
model.summary()


model.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy',tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])
y_tr=tf.keras.utils.to_categorical(new_y,num_classes=10)
y_ts=tf.keras.utils.to_categorical(y_test,num_classes=10)

history=model.fit(new_X,y_tr,epochs=20,batch_size=256,validation_split=0.2,callbacks=[es])

y_pred=model.predict(X_test)

print(classification_report(y_test.astype('int'),np.argmax(y_pred,axis=1)))

In [None]:
y_pred=model.predict(new_X)

print(classification_report(np.array(new_y).astype('int'),np.argmax(y_pred,axis=1)))

In [None]:
import matplotlib.pyplot as plt
 
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
accuracy = history_dict['accuracy']
val_accuracy = history_dict['val_accuracy']
 
epochs = range(1, len(loss_values) + 1)
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
#
# Plot the model accuracy vs Epochs
#
ax[0].plot(epochs, accuracy, 'r', label='Training accuracy')
ax[0].plot(epochs, val_accuracy, 'b', label='Validation accuracy')
ax[0].set_title('Training & Validation Accuracy', fontsize=16)
ax[0].set_xlabel('Epochs', fontsize=16)
ax[0].set_ylabel('Accuracy', fontsize=16)
ax[0].legend()
#
# Plot the loss vs Epochs
#
ax[1].plot(epochs, loss_values, 'r', label='Training loss')
ax[1].plot(epochs, val_loss_values, 'b', label='Validation loss')
ax[1].set_title('Training & Validation Loss', fontsize=16)
ax[1].set_xlabel('Epochs', fontsize=16)
ax[1].set_ylabel('Loss', fontsize=16)
ax[1].legend()

In [None]:
import matplotlib.pyplot as plt
 
history_dict = history.history
loss_values = history_dict['recall']
val_loss_values = history_dict['val_recall']
accuracy = history_dict['precision']
val_accuracy = history_dict['val_precision']
 
epochs = range(1, len(loss_values) + 1)
fig, ax = plt.subplots(1, 2, figsize=(14, 6))
#
# Plot the model accuracy vs Epochs
#
ax[0].plot(epochs, accuracy, 'r', label='Training Recall')
ax[0].plot(epochs, val_accuracy, 'b', label='Validation Recall')
ax[0].set_title('Training & Validation Recall', fontsize=16)
ax[0].set_xlabel('Epochs', fontsize=16)
ax[0].set_ylabel('Recall', fontsize=16)
ax[0].legend()
#
# Plot the loss vs Epochs
#
ax[1].plot(epochs, loss_values, 'r', label='Training precision')
ax[1].plot(epochs, val_loss_values, 'b', label='Validation precision')
ax[1].set_title('Training & Validation precision', fontsize=16)
ax[1].set_xlabel('Epochs', fontsize=16)
ax[1].set_ylabel('precision', fontsize=16)
ax[1].legend()

In [None]:
cm=confusion_matrix(np.array(new_y).astype('int'),np.argmax(y_pred,axis=1))
sns.heatmap(cm,annot=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(new_train, labels, test_size=0.2, random_state=42)

tf.random.set_seed(20)
np.random.seed(20)
model = Sequential(Base_model.layers)


model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))


# adding prediction(softmax) layer
model.add(Dense(10,activation="softmax"))
model.summary()


model.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy',tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])
y_tr=tf.keras.utils.to_categorical(y_train,num_classes=10)
y_ts=tf.keras.utils.to_categorical(y_test,num_classes=10)

history=model.fit(X_train,y_tr,epochs=20,batch_size=256,validation_split=0.2,callbacks=[es])

y_pred=model.predict(X_test)

print(classification_report(y_test.astype('int'),np.argmax(y_pred,axis=1)))

In [None]:
cm=confusion_matrix(y_test.astype('int'),np.argmax(y_pred,axis=1))
sns.heatmap(cm,annot=True)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train, labels, test_size=0.2, random_state=42)

tf.random.set_seed(20)
np.random.seed(20)
model = Sequential(Base_model.layers)


model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1000,activation='relu'))


# adding prediction(softmax) layer
model.add(Dense(10,activation="softmax"))
model.summary()


model.compile(optimizer='adam', loss= 'categorical_crossentropy', metrics=['accuracy',tf.keras.metrics.Recall(),tf.keras.metrics.Precision()])
y_tr=tf.keras.utils.to_categorical(y_train,num_classes=10)
y_ts=tf.keras.utils.to_categorical(y_test,num_classes=10)

history=model.fit(X_train,y_tr,epochs=20,batch_size=256,validation_split=0.2,callbacks=[es])

y_pred=model.predict(X_test)

print(classification_report(y_test.astype('int'),np.argmax(y_pred,axis=1)))