In [None]:
import numpy as np
import pandas as pd
import cv2
import os
import random

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import sys
import config

from tensorflow.keras.models import load_model

sys.path.append('model_building/create_image_folders.py')
from model_building.create_image_folders import * 
sys.path.append('model_building/cnn_model_keras.py')
from model_building.cnn_model_keras import *
sys.path.append('model_building/new_keras_model.py')
from model_building.new_keras_model import *
sys.path.append('model_test/test_model.py')
from model_test.test_model import *

import warnings
warnings.simplefilter("ignore", UserWarning)
import logging
logging.getLogger('tensorflow').disabled = True

## Load Data

In [None]:
## Set paths
img_folder = os.path.join(os.getcwd(),"data","ai_ready","images")
train_img = os.path.join(os.getcwd(),"data","ai_ready","train_images")
val_img = os.path.join(os.getcwd(),"data","ai_ready","val_images")
test_img = os.path.join(os.getcwd(),"data","ai_ready","test_images")
labels_image = os.path.join(os.getcwd(),"data","ai_ready","x-ai_data.csv")
create_images =False

## Create Subfolder

Use the fonction "subfolders" to do the same thing as the cells below, DO NOT run it twice to avoid duplicates

In [None]:
subfolders(labels_image, img_folder, train_img, val_img)

In [None]:
## Set paths
img_folder = "/home/jovyan/my_work/QB/Quantum-Black-Challenge/data/ai_ready/images/"
train_img = "/home/jovyan/my_work/QB/Quantum-Black-Challenge/data/ai_ready/train_images/"
val_img = "/home/jovyan/my_work/QB/Quantum-Black-Challenge/data/ai_ready/val_images/"
test_img = "/home/jovyan/my_work/QB/Quantum-Black-Challenge/data/ai_ready/test_images/"
labels_image = "/home/jovyan/my_work/QB/Quantum-Black-Challenge/data/ai_ready/x-ai_data.csv"

In [None]:
labels_image = pd.read_csv(labels_image)
train = labels_image.loc[labels_image['split']=='train']
val = labels_image.loc[labels_image['split']=='validation']

In [None]:
train0 = train.loc[train['class']==0]
train1 = train.loc[train['class']==1]

val0 = val.loc[val['class']==0]
val1 = val.loc[val['class']==1]


In [None]:
val0

In [None]:
#Move TRAIN images labeled 0 to the correct folder

for i in train0.index:
    im = cv2.imread(img_folder+train0.loc[i,'filename'])
    cv2.imwrite(train_img + '0/' +train0.loc[i,'filename'], im)
#Move TRAIN images labeled 1 to the correct folder
for i in train1.index:
    im = cv2.imread(img_folder+train1.loc[i,'filename'])
    cv2.imwrite(train_img+'1/' +train1.loc[i,'filename'], im)

In [None]:
#Move val images labeled 0 to the correct folder
for i in val0.index:
    im = cv2.imread(img_folder+'/'+val0.loc[i,'filename'])
    cv2.imwrite(val_img + '/0/' +val0.loc[i,'filename'], im)
#Move val images labeled 1 to the correct folder
for i in val1.index:
    im = cv2.imread(img_folder+'/'+val1.loc[i,'filename'])
    cv2.imwrite(val_img+'/1/' +val1.loc[i,'filename'], im)

In [None]:
import shutil

##Remove hidden file
shutil.rmtree(train_img+".ipynb_checkpoints")
shutil.rmtree(val_img+".ipynb_checkpoints")

## Model

In [None]:
## Train and Val dataset
tf.config.list_physical_devices()
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))
physical_devices = tf.config.list_physical_devices("GPU")
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
train_ds = train(train_img, config.image_size, config.batch_size)
val_ds = val(val_img, config.image_size, config.batch_size)

In [None]:
plt.figure(figsize=(10, 10))
# for images, labels in train_ds.__getitem__(3):
images, labels = train_ds.__getitem__(1)
for i in range(4):
    ax = plt.subplot(2, 2, i + 1)
    plt.imshow(images[i])
    plt.title(int(labels[i]))
    plt.axis("off")

In [None]:
## Model
model = make_model(input_shape=config.image_size + (3,), num_classes=2)

In [None]:
train_model(model, train_ds, val_ds, 2)

In [None]:
model.save("model2.h5")

In [None]:
#visualize data augmentation
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal_and_vertical"),
        layers.RandomRotation(0.1),
        layers.RandomContrast([0,1]),
        layers.RandomTranslation(height_factor=0.2, width_factor=0.2)
    ]
)
plt.figure(figsize=(10, 10))
images, _ in train_ds.__getitem__(4)
# for images, _ in train_ds.take(1):
for i in range(9):
    augmented_images = data_augmentation(images)
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow((255*augmented_images[0].numpy()).astype("uint8"))
    plt.axis("off")

## Second Model

In [None]:
test = labels_image.loc[labels_image['split']=='test']

In [None]:
test_img

In [None]:
#Move TEST images to the correct folder, only run once: create a test folder with another test folder with all the images inside
for i in test.index:
    im = cv2.imread(os.path.join(img_folder,test.loc[i,'filename']))
    # cv2.imwrite(test_img + 'test/' + test.loc[i,'filename'], im)
    # im = cv2.imread(os.path.join(img_folder, val1.loc[i, "filename"]))
    cv2.imwrite(os.path.join(test_img, "test", test.loc[i, "filename"]), im)

In [None]:
# import shutil
shutil.rmtree(test_img+"test/.ipynb_checkpoints")

In [None]:
from numpy import expand_dims
from keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image

image = load_img(os.path.join(img_folder,'silos_256-0-0--6-16-536-28464.png'))
data = img_to_array(image)
samples = np.expand_dims(data, 0)


datagen = ImageDataGenerator(
   preprocessing_function = myFunc,
    )

# Creating an iterator for data augmentation
it = datagen.flow(samples, batch_size=1)

# Preparing the Samples and Plot for displaying output
for i in range(2):
    # preparing the subplot
    plt.figure(figsize=(10,10))
    plt.subplot(3, 2,i+1)
    # generating images in batches
    batch = it.next()
    # Remember to convert these images to unsigned integers for viewing 
    img = batch[0].astype('uint8')
    # Plotting the data
    plt.imshow(img)
    plt.axis('off')
    
plt.show()

In [None]:
image = load_img(os.path.join(img_folder,'silos_256-0-0--6-16-536-28464.png'))
data = img_to_array(image)
samples = np.expand_dims(data, 0)

#Plot the batch images w.r.t. the dataset images.
plt.figure(figsize=(10,10))
plt.axis('off')
plt.imshow(image)

In [None]:
from numpy import expand_dims
from keras.preprocessing.image import ImageDataGenerator
from matplotlib import pyplot
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from PIL import Image

image = load_img(os.path.join(img_folder,'silos_256-0-0--6-16-536-28464.png'))
image = img_to_array(image).astype(int)
data = np.expand_dims(image, 0)

def myFunc(image):
    return cv2.cvtColor(image,cv2.COLOR_BGR2HSV)
                        #COLOR_RGB2HSV)

train_datagen = ImageDataGenerator(
        rescale=1. / 255,
        rotation_range=90,
        # shear_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        zca_whitening =True,
        # # brightness_range = [0.5, 2.0],
        preprocessing_function = myFunc,
        )

# Creating an iterator for data augmentation
it = datagen.flow(data, batch_size=1)

# Preparing the Samples and Plot for displaying output
for i in range(6):
    # preparing the subplot
    pyplot.subplot(330 + 1 + i)
    # generating images in batches
    batch = it.next()
    # Remember to convert these images to unsigned integers for viewing 
    image = batch[0].astype('uint8')
    # Plotting the data
    pyplot.imshow(image)

pyplot.show()


In [None]:
## Train, Val, and Test dataset
train_ds = train_set(train_img, config.image_size, config.batch_size)
val_ds = val_set(val_img, config.image_size, config.batch_size)
test_ds = test_set(test_img, config.image_size, config.batch_size)

In [None]:
model = keras_model(config.input_shape, train_ds, val_ds)

In [None]:
train_model("last_keras_model", train_ds, val_ds, config.number_epochs)

In [None]:
model_keras = load_model("last_keras_model.h5")
preds = test_model(test_ds, model_keras, config.batch_size)

In [None]:
output_preds(preds, train_ds, test_ds, 'last_keras_model')

In [None]:
model = load_model('last_keras_model.h5')

test_ds = test_set(test_img, config.image_size, config.batch_size)
y_preds = test_model(test_ds, model, 1)

In [None]:
y_test = labels_image[labels_image['split']=="test"]['class'].values

In [None]:
from sklearn.metrics import roc_curve,auc, roc_auc_score,accuracy_score

f1_score_test = f1_score(y_test, 1*(y_preds>0.5))
accuracy_test = accuracy_score(y_test, 1*(y_preds>0.5))
print(f"the f1_score test is {f1_score_test}, the accucary is {accuracy_test}")
fpr, tpr, threshold = roc_curve(y_test, y_preds)
roc_auc = auc(fpr, tpr)

In [None]:
df_preds = labels_image[labels_image['split']=="test"].copy()
df_preds["preds_proba"] = y_preds
df_preds.to_csv('df_preds.csv')
plt.title('Receiver Operating Characteristic')
plt.plot(fpr, tpr, color='#285430', label = f'AUC = {roc_auc :0.2f}')
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],color='#fed049',linestyle='--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.savefig('auc_curve')
plt.show()
    

## With TEST Data

In [None]:
## Set paths
img_folder = os.path.join(os.getcwd(),"data","ai_ready","images")
train_img = os.path.join(os.getcwd(),"data","ai_ready","train_images")
val_img = os.path.join(os.getcwd(),"data","ai_ready","val_images")
test_img = os.path.join(os.getcwd(),"data","ai_ready","test_images")
labels_image = os.path.join(os.getcwd(),"data","ai_ready","x-ai_data.csv")
create_images =False

In [None]:
## Train, Val, and Test dataset
train_ds = train_set(train_img, config.image_size, config.batch_size)
val_ds = val_set(val_img, config.image_size, config.batch_size)
test_ds = test_set(test_img, config.image_size, config.batch_size)

In [None]:
model = load_model('keras_model_theo.h5')

test_ds = test_set(test_img, config.image_size, config.batch_size)
y_preds = test_model(test_ds, model, 1)

In [None]:
output_preds(y_preds, train_ds, test_ds, 'predictions')

In [None]:
predicted_class_indices= np.round(y_preds).astype(int)
labels = (train_ds.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices[:,0]]

filenames=test_ds.filenames
results=pd.DataFrame({"filename":filenames,
                      "class_predicted":predictions})

results['filename'] = results['filename'].apply(lambda element: element.split('/')[1])

In [None]:
results.sort_values('filename', ascending=True)

In [None]:
results.loc[results['class_predicted']=='0']