In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.metrics import AUC
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping


In [None]:
#Functions


from keras import backend as K

def oversampler(X, y):    
    X = list(X)
    counter = int(y.mean() * len(y))
    while counter / len(y) < 0.5:
        for i in range(len(y)):
            if y[i] == 1:
                X.append(X[i])
                y = np.append(y, y[i])
                counter += 1
            if counter / len(y) >= 0.5:
                break
    X = np.array(X)
    return X, y

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
dataset_path = "hfactory_magic_folders/colas_data_challenge/computer_vision_challenge/dataset/"

In [None]:
train_labels = pd.read_csv(dataset_path + "labels_train.csv")


In [None]:
train_labels

In [None]:
train_image = []
for i in tqdm(range(train_labels.shape[0])):
    img = load_img(dataset_path + "train/" + train_labels["filename"][i], target_size=(224,224,3))
    img = img_to_array(img)
    img = img/255
    train_image.append(img)
X = np.array(train_image)

In [None]:
plt.imshow(X[2])

In [None]:
y = np.array(train_labels.drop(["filename"], axis=1))

### One Model for Each Output

In [None]:
from tensorflow.keras.applications.vgg16 import VGG16

base_model = VGG16(input_shape = (224, 224, 3), # Shape of our images
include_top = False, # Leave out the last fully connected layer
weights = 'imagenet')


for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Flatten the output layer to 1 dimension
x = layers.Flatten()(base_model.output)

# Add a fully connected layer with 512 hidden units and ReLU activation
x = layers.Dense(512, activation='relu')(x)

# Add a dropout rate of 0.5
x = layers.Dropout(0.5)(x)

# Add a final sigmoid layer with 1 node for classification output
x = layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.0001), loss = 'binary_crossentropy',metrics = ['acc', AUC()])

In [None]:
y_fissure = np.array(train_labels["FISSURE"])
y_reparation = np.array(train_labels["REPARATION"])
y_longi = np.array(train_labels["FISSURE LONGITUDINALE"])
y_faience = np.array(train_labels["FAÏENCAGE"])
y_med = np.array(train_labels["MISE EN DALLE"])

In [None]:
print(y_fissure.mean())
print(y_reparation.mean())
print(y_longi.mean())
print(y_faience.mean())
print(y_med.mean())
print(len(y_fissure))

#### FISSURE

In [None]:
X_fissure_train, X_fissure_test, y_fissure_train, y_fissure_test = train_test_split(X, y_fissure, test_size=0.2)


In [None]:
X_fissure_train, y_fissure_train = oversampler(X_fissure_train, y_fissure_train) 

In [None]:
print(len(y_fissure_train))
print(y_fissure_train.mean())
print(X_fissure_train.shape)

In [None]:
vgg1hist = model.fit(X_fissure_train, y_fissure_train, validation_data = (X_fissure_test, y_fissure_test), epochs = 20, batch_size = 32)


#### REPARATION

In [None]:
X_reparation_train, X_reparation_test, y_reparation_train, y_reparation_test = train_test_split(X, y_reparation, test_size=0.2)


In [None]:
X_reparation_train, y_reparation_train = oversampler(X_reparation_train, y_reparation_train) 

In [None]:
print(len(y_reparation_train))
print(y_reparation_train.mean())
print(X_reparation_train.shape)

In [None]:
vgg2hist = model.fit(X_reparation_train, y_reparation_train, validation_data = (X_reparation_test, y_reparation_test), epochs = 20, batch_size = 32)


#### FISSURE LONGITUDINALE

In [None]:
X_longi_train, X_longi_test, y_longi_train, y_longi_test = train_test_split(X, y_longi, test_size=0.2)


In [None]:
X_longi_train, y_longi_train = oversampler(X_longi_train, y_longi_train) 

In [None]:
print(len(y_longi_train))
print(y_longi_train.mean())
print(X_longi_train.shape)

In [None]:
vgg3hist = model.fit(X_longi_train, y_longi_train, validation_data = (X_longi_test, y_longi_test), epochs = 20, batch_size = 32)


#### FAIENÇAGE

In [None]:
X_faience_train, X_faience_test, y_faience_train, y_faience_test = train_test_split(X, y_faience, test_size=0.2)


In [None]:
X_faience_train, y_faience_train = oversampler(X_faience_train, y_faience_train) 

In [None]:
print(len(y_faience_train))
print(y_faience_train.mean())
print(X_faience_train.shape)

In [None]:
vgg4hist = model.fit(X_faience_train, y_faience_train, validation_data = (X_faience_test, y_faience_test), epochs = 20, batch_size = 32)


#### MISE EN DALLE

In [None]:
X_med_train, X_med_test, y_med_train, y_med_test = train_test_split(X, y_med, test_size=0.2)

In [None]:
X_med_train, y_med_train = oversampler(X_med_train, y_med_train) 

In [None]:
print(len(y_med_train))
print(y_med_train.mean())
print(X_med_train.shape)

In [None]:
vgg5hist = model.fit(X_med_train, y_med_train, validation_data = (X_med_test, y_med_test), epochs = 20, batch_size = 32)
