In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_df = pd.read_csv('/kaggle/input/early-detection-of-3d-printing-issues/train.csv')

In [None]:
test_df = pd.read_csv('/kaggle/input/early-detection-of-3d-printing-issues/test.csv')

In [None]:
test_df.head()

In [None]:
train_df.head()

In [None]:
labels2id = {
    '0': 0,
    '1': 1
}

id2labels = {v:k for k,v in labels2id.items()}

label_classes = labels2id.keys()

def label_encoder(x : str):
    return [1 if int(label)==x else 0 for label in label_classes]

train_df['has_under_extrusion'] = train_df['has_under_extrusion'].map(label_encoder)

In [None]:
train_df.head()

In [None]:
pip install tensorflow_addons

In [None]:
pip install seaborn

In [None]:
pip install sklearn

In [None]:
pip install scikit-learn scipy matplotlib

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
import matplotlib.pyplot as plt
import matplotlib.style as style
import seaborn as sns

import math, re, os, warnings
from PIL import Image

from functools import partial
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

print("Tensorflow version " + tf.__version__)

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print(tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()
    
print("replicas: ", strategy.num_replicas_in_sync)

In [None]:
image_size = [512,512]
batch_size = 16 * strategy.num_replicas_in_sync
channels = 3
seed = 1111
num_classes = 2
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [None]:
train_path = "/kaggle/input/early-detection-of-3d-printing-issues/images/"

In [None]:
from kaggle_datasets import KaggleDatasets
GCS_PATH = KaggleDatasets().get_gcs_path('early-detection-of-3d-printing-issues')

In [None]:
train_path = GCS_PATH + '/images/'
# very important to load from gcs else tpu will not work

In [None]:
train_path

In [None]:
image_file_path = np.array([train_path + i for i in train_df['img_path'].to_list()])
labels = train_df['has_under_extrusion'].to_numpy()

In [None]:
image_file_path_test = np.array([train_path + i for i in test_df['img_path'].to_list()])

In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

split1 = StratifiedShuffleSplit(n_splits=1, test_size=0.25, random_state=seed)

for train_index, valid_index in split1.split(image_file_path, labels):
    train_path, valid_path = image_file_path[train_index],image_file_path[valid_index]
    train_labels, valid_labels = labels[train_index],labels[valid_index]

In [None]:
print('train : ',len(train_path),'---',len(train_labels))
print('val : ',len(valid_path),'---',len(valid_labels))

In [None]:
train_path[:3]

In [None]:
train_labels[:3]

In [None]:
train_labels = [tf.constant(x) for x in train_labels]
valid_labels = [tf.constant(x) for x in valid_labels]

In [None]:
def load_image(image_path, label):
    image = tf.io.read_file(image_path)
#     with open(image_path, "rb") as local_file: # <= change here
#         image = local_file.read()
    image = tf.io.decode_jpeg(image, channels=channels)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, image_size)
    return image, label


augmentation = keras.Sequential([
    layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
    layers.experimental.preprocessing.RandomRotation(factor=0.1,fill_mode='reflect'),
    layers.experimental.preprocessing.RandomZoom(height_factor=0.2, width_factor=0.2),
    layers.experimental.preprocessing.RandomTranslation(height_factor=0.15, width_factor=0.15, fill_mode='nearest'),
])

# def augment(image, label):
#     image = tf.expand_dims(image, axis=0)
#     image = augmentation(image)[0]
#     #image = tf.image.random_brightness(image, 0.2)
#     #image = tf.image.random_contrast(image, 0.5, 2.0)
#     image = tf.image.random_saturation(image, 0.75, 1.25)
#     #image = tf.image.random_hue(image, 0.1)
#     #image = random_erasing(image)
#     return image,label

def augment(image, label):
    image = tf.expand_dims(image, axis=0)
    image = augmentation(image)[0]
    #image = tf.image.random_brightness(image, 0.2)
    #image = tf.image.random_contrast(image, 0.5, 2.0)
    #image = tf.image.random_saturation(image, 0.5, 1.5)
    #image = tf.image.random_hue(image, 0.2)
    #image = random_erasing(image)
    return image,label

In [None]:
train_ds = tf.data.Dataset.from_tensor_slices((train_path,train_labels))
train_ds = train_ds.map(load_image, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.map(augment, num_parallel_calls=AUTOTUNE)
train_ds = train_ds.cache().shuffle(2048).batch(batch_size).prefetch(AUTOTUNE)


val_ds = tf.data.Dataset.from_tensor_slices((valid_path,valid_labels))
val_ds = val_ds.map(load_image, num_parallel_calls=AUTOTUNE)
val_ds = val_ds.cache().batch(batch_size).prefetch(AUTOTUNE)

In [None]:
train_ds

In [None]:
def plot_learning_curve(history):
    history = history.history
    metrics_names = ['loss','accuracy','precision','recall','f1_score']
    plt.figure(figsize=(8, 35))
    for i,name in enumerate(metrics_names):
        plt.subplot(len(metrics_names),1,i+1)
        plt.plot(history[name], label='training '+name)
        plt.plot(history['val_'+name], label='validation '+name)
        plt.legend(loc='lower right')
        plt.ylabel(name)
        plt.ylim([0,1])
        plt.title('training and validation '+name)
    plt.show()

In [None]:
with strategy.scope():
    seed = 1200
    tf.random.set_seed(seed)
    base_model = keras.applications.ResNet101V2(weights='imagenet', include_top=False, input_shape=(512, 512, 3))
    print(base_model.input)
    print(base_model.output)
    
    base_model.trainable = False
    
    
#     new_model = tf.keras.Sequential([
#     base_model,
#     keras.layers.GlobalAveragePooling2D(),
#     #keras.layers.BatchNormalization(),
#     keras.layers.Dense(8, activation='relu'),
#     keras.layers.Dropout(0.2),
#     keras.layers.BatchNormalization(),
#     keras.layers.Dense(6, activation='sigmoid'),
#     ])

    # Freezing the weights
#     for layer in new_model.layers[:-1]:
#         layer.trainable=False
    
#     new_model.summary()

    inputs = layers.Input(shape=[*image_size,channels])
#     x = tf.cast(inputs, tf.float32)
#     x = tf.keras.applications.mobilenet_v3.preprocess_input(x)
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
#     x = layers.Dropout(0.1)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dense(64, activation='relu')(x)
    x = layers.Dropout(0.1)(x)
    x = layers.BatchNormalization()(x)
    outputs = layers.Dense(num_classes,activation='sigmoid')(x)
    
    model = keras.Model(inputs,outputs)
    
    model.summary()
    
    f1 = tfa.metrics.F1Score(num_classes=num_classes, average='macro')
    
    

    callbacks = [
                 keras.callbacks.EarlyStopping(monitor=f1, patience=3, mode='max', restore_best_weights=True),
                 keras.callbacks.ModelCheckpoint('ResNet101V2.h5', monitor='val_f1_score',mode='max',
                                        save_best_only=True,verbose=1),
                 keras.callbacks.ReduceLROnPlateau(factor=0.5,patience=4,verbose=1,min_delta=0.00001,
                                         monitor='val_f1_score',mode='max'),
                ]


    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=0.001), 
              metrics= [
                  'accuracy',
            tf.keras.metrics.Precision(name='precision'),
            tf.keras.metrics.Recall(name='recall'),
            tfa.metrics.F1Score(num_classes=num_classes,average='macro',name='f1_score')])

    history = model.fit(train_ds, epochs=25, validation_data=val_ds, callbacks=callbacks)

In [None]:
def plot_learning_curve(history):
    history = history.history
    metrics_names = ['loss','accuracy','precision','recall','f1_score']
    plt.figure(figsize=(8, 35))
    for i,name in enumerate(metrics_names):
        plt.subplot(len(metrics_names),1,i+1)
        plt.plot(history[name], label='training '+name)
        plt.plot(history['val_'+name], label='validation '+name)
        plt.legend(loc='lower right')
        plt.ylabel(name)
        plt.ylim([0,1])
        plt.title('training and validation '+name)
    plt.show()

In [None]:
plot_learning_curve(history)

In [None]:
with strategy.scope():
    
    base_model.trainable = True
    
    model.summary()
    
    model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=keras.optimizers.Adam(lr=1e-6), 
                  metrics= [
                      'accuracy',
                tf.keras.metrics.Precision(name='precision'),
                tf.keras.metrics.Recall(name='recall'),
                tfa.metrics.F1Score(num_classes=num_classes,average='macro',name='f1_score')])
    
    history_fine = model.fit(train_ds,
                         epochs=35,
                         initial_epoch=history.epoch[-1],
                         validation_data=val_ds, callbacks=callbacks)

In [None]:
plot_learning_curve(history_fine)

In [None]:
submission = pd.read_csv('/kaggle/input/early-detection-of-3d-printing-issues/sample_submission.csv')

In [None]:
def load_predict_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=channels)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, image_size)
    return image

In [None]:
test_path = GCS_PATH + '/images/'

In [None]:
test_file_path = np.array([test_path + i for i in submission['img_path'].to_list()])

In [None]:
test_ds = tf.data.Dataset.from_tensor_slices((test_file_path))
test_ds = test_ds.map(load_predict_image, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.cache().batch(batch_size).prefetch(AUTOTUNE)

In [None]:
test_ds

In [None]:
prediction = model.predict(test_ds, batch_size = batch_size)

In [None]:
predict = prediction[:,-1]

In [None]:
submission['has_under_extrusion'] = predict.tolist()

In [None]:
submission

In [None]:
submission.to_csv('submission_ResNet101V2.csv',index=False)