In [None]:
!pip -q install tensorflow==2.3.0

In [None]:
# Basics / Data manipulation
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import zipfile
import os

# Visualization
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import skimage.io

# ML
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

%matplotlib inline

# Data
10k+ of .tiff images
*    **80%** for training 
*    **20%** for internal testing
            *  10% Validation
            *  10% Testing

# Checking if GPU is being used

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
    print("Running on TPU ", tpu.cluster_spec().as_dict()["worker"])
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except ValueError:
    print("Not connected to a TPU runtime. Using CPU/GPU strategy")
    strategy = tf.distribute.MirroredStrategy()

# Unzipping the images
with zipfile.ZipFile("../input/pc-data-dataset-gen/train.zip","r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/pc-data-dataset-gen/validation.zip","r") as z:
    z.extractall(".")
    
with zipfile.ZipFile("../input/pc-data-dataset-gen/test.zip","r") as z:
    z.extractall(".")

# Set-up NASNetMobile

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers
from keras.preprocessing.image import ImageDataGenerator
from sklearn import model_selection
from tensorflow.keras import optimizers
#Use this to check if the GPU is configured correctly
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())


from tensorflow.keras.applications import NASNetMobile

In [None]:
from tensorflow.keras.applications.efficientnet import EfficientNetB6
conv_base = NASNetMobile(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

In [None]:
model = models.Sequential()
model.add(conv_base)
model.add(layers.GlobalMaxPooling2D(name="gap"))
# Avoid overfitting
model.add(layers.Dropout(rate=0.5))
model.add(layers.Dense(10, activation="softmax", name="fc_out"))
conv_base.trainable = True

model.compile(
    loss="categorical_crossentropy",
    optimizer=optimizers.RMSprop(lr=2e-5),
    metrics=["acc"],
)

In [None]:
### CAUTION ###

variations = ["A", "B", "C", "D", "E", "F", "G", "H"]
#variations = ["A", "B"]

def zippity(variant):
    print(f'Variation {variant}')

    with zipfile.ZipFile(f'../input/8-fold-pc-dataset-gen-{variations.index(variant) + 1}-8-{variant.lower()}/train{variant}.zip','r') as z:
        z.extractall(".")
    
    with zipfile.ZipFile(f'../input/8-fold-pc-dataset-gen-{variations.index(variant) + 1}-8-{variant.lower()}/validation{variant}.zip','r') as z:
        z.extractall(".")
    
    with zipfile.ZipFile(f'../input/8-fold-pc-dataset-gen-{variations.index(variant) + 1}-8-{variant.lower()}/test{variant}.zip',"r") as z:
        z.extractall(".")

In [None]:
def zappity():
    # Deleting image folders to avoid over-saturate the output
    !rm -r train
    !rm -r validation
#     !rm -r test

In [None]:
 image_gen = ImageDataGenerator(
                                width_shift_range=0.1,
                                height_shift_range=0.1,
                                rescale=1/255,
                                shear_range=0.2,
                                zoom_range=0.2,
                                horizontal_flip=True,
                                fill_mode="nearest"
                                )

from tensorflow.keras.applications import EfficientNetB6
from keras.layers import Dense
conv_base = tensorflow.keras.applications.efficientnet.EfficientNetB6(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Configuration of the NASNetMobile
conv_base = NASNetMobile(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

# Model 
The model will have the follow configuration:
______________
1st layer: NASNetMobile (224, 224, 3) input images
______________
2nd layer: GlobalMaxPooling2D
______________
3rd layer: Dropout with learning rate = 2e-5
______________
4th layer: Denser layer x 6 that will classify the image

In [None]:
model.summary()

## Data Augmentation

Before training, we preprocess a little bit the image, in order to have a better perfomance on the predictions

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
batch_size = 32

def which_image_gen(which):
    if(which == "train"):
        which_gen = image_gen.flow_from_directory("./train",
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode="categorical")
        
    
    elif(which == "valid"):
        which_gen = image_gen.flow_from_directory("./validation",
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode="categorical")
    
    elif(which == "test"):
        which_gen = image_gen.flow_from_directory("./test",
                                                  target_size=(224, 224),
                                                  batch_size=batch_size,
                                                  class_mode="categorical")
    return which_gen

In [None]:
for variety in variations:
    zippity(variety)
    
    train_image_gen = which_image_gen("train")
    validation_image_gen = which_image_gen("valid")
    test_image_gen = which_image_gen("test")

# Flowing through directories to see the classes and the number of images
print(image_gen.flow_from_directory("./train"))
print(image_gen.flow_from_directory("./validation"))
print(image_gen.flow_from_directory("./test"))

train_image_gen.class_indices
validation_image_gen.class_indices
test_image_gen.class_indices

In [None]:
NUMBER_OF_TRAINING_IMAGES = len(pd.read_csv('../input/8-fold-pc-dataset-gen-0-8/training.csv'))
NUMBER_OF_VALIDATION_IMAGES = len(pd.read_csv('../input/8-fold-pc-dataset-gen-0-8/validation.csv'))
NUMBER_OF_TESTING_IMAGES = len(pd.read_csv('../input/8-fold-pc-dataset-gen-0-8/testing.csv'))

In [None]:
# for variety in variations:
#     zippity(variety)
    
#     train_image_gen = which_image_gen("train")
#     validation_image_gen = which_image_gen("valid")
    

results = model.fit(
    train_image_gen,
    steps_per_epoch=NUMBER_OF_TRAINING_IMAGES // batch_size,
    epochs=100,
    validation_data=validation_image_gen,
    validation_steps=NUMBER_OF_VALIDATION_IMAGES // batch_size,
    verbose=1,
    use_multiprocessing=True,
    workers=4,
)

In [None]:
results = model.fit(
    train_image_gen,
    steps_per_epoch=NUMBER_OF_TRAINING_IMAGES // batch_size,
    epochs=30,
    validation_data=validation_image_gen,
    validation_steps=NUMBER_OF_VALIDATION_IMAGES // batch_size,
    verbose=1,
    use_multiprocessing=True,
    workers=4,
)

In [None]:
# Saving the synaptic weights of the model
model.save("./NASNetMobile-model.h5")

In [None]:
# zappity()

In [None]:
results_df = pd.DataFrame({"epoch":[i + 1 for i in range(len(results.history["acc"]))], "acc":results.history["acc"], "val_acc":results.history["val_acc"], "loss":results.history["loss"], "val_loss":results.history["val_loss"]})
results_df

In [None]:
def plot_hist_acc(hist):
    plt.plot(hist.history["acc"])
    plt.plot(hist.history["val_acc"])
    plt.title("Model Accuracy")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Accuracy", "Validation Accuracy"], loc="upper left")
    plt.show()

plot_hist_acc(results)

In [None]:
def plot_hist_loss(hist):
    plt.plot(hist.history["loss"])
    plt.plot(hist.history["val_loss"])
    plt.title("Model Loss")
    plt.ylabel("Accuracy")
    plt.xlabel("Epoch")
    plt.legend(["Loss", "Validation Loss"], loc="upper left")
    plt.show()

plot_hist_loss(results)

## Testing

from sklearn.metrics import confusion_matrix, classification_report


test_generator = ImageDataGenerator()
test_data_generator = test_generator.flow_from_directory(
    "./test", # Put your path here
    target_size=(224, 224),
    batch_size=32,
    shuffle=False)
test_steps_per_epoch = np.math.ceil(test_data_generator.samples / test_data_generator.batch_size)

predictions = model.predict_generator(test_data_generator, steps=test_steps_per_epoch)
# Get most likely class
predicted_classes = np.argmax(predictions, axis=1)

true_classes = test_data_generator.classes
class_labels = list(test_data_generator.class_indices.keys())   

report = classification_report(true_classes, predicted_classes, target_names=class_labels)
print(report)   

In [None]:
# Deleting image folders to avoid over-saturate the output
!rm -r train
!rm -r validation
!rm -r test