DD KolektorSDD2

In [None]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [None]:
import datetime
import json
import os
import pickle
import time
import tensorflow as tf
import keras
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras.layers import (
    Activation,
    AveragePooling2D,
    Conv2D,
    Dense,
    Dropout,
    Flatten,
    MaxPooling2D,
    Rescaling,
    Input,
    BatchNormalization, 
    Add, 
    Activation
)
from keras.models import Model
from keras.models import Sequential
from keras.preprocessing import image
from PIL import Image, ImageFont
from sklearn.decomposition import PCA
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,roc_auc_score,roc_curve
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier

# Disable tenforflow information messages about GPU
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# Set the random seed

random_seed = 42
data_dir = "dataset/KolektorSDD2"

batch_size = 32
image_size = (512, 512)
input_shape = (512, 512, 3)
color_mode = "rgb"
seed_train_validation = 42
shuffle_value = True
validation_split = 0.25

In [None]:
# Set memory growth for GPU
gpus = tf.config.experimental.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(gpus[0], True)

In [None]:
# Load the dataset

data_dir = "dataset/casting_512x512"

batch_size = 32
image_size = (512, 512)
seed_train_validation = 42
shuffle_value = True
validation_split = 0.3

train_ds = tf.keras.utils.image_dataset_from_directory(
    directory = data_dir,
    labels = 'inferred',
    label_mode = 'int',
    batch_size = batch_size,
    image_size = image_size,
    validation_split = validation_split,
    subset = "training",
    seed = seed_train_validation,
    color_mode = color_mode,
    shuffle = shuffle_value
    )

val_ds = tf.keras.utils.image_dataset_from_directory(
    directory = data_dir,
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size = image_size,
    validation_split = validation_split,
    subset = "validation",
    seed = seed_train_validation,
    color_mode = color_mode,
    shuffle = shuffle_value
    )

val_batches = tf.data.experimental.cardinality(val_ds)
test_ds = val_ds.take((2*val_batches) // 3)
val_ds = val_ds.skip((2*val_batches) // 3)

# test_ds = tf.keras.utils.image_dataset_from_directory(
#     directory = test_data_dir,
#     labels='inferred',
#     label_mode='int',
#     image_size = image_size,
#     seed = seed_train_validation,
#     color_mode = 'rgb',
#     shuffle = shuffle_value
#     )

In [None]:
# plot some images
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"), cmap='gray')
        plt.title(int(labels[i]))
        plt.axis("off")
        

In [None]:
# Data augmentation layer 
data_augmentation = keras.Sequential(
    [
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
        tf.keras.layers.experimental.preprocessing.RandomFlip("vertical"),
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal")
    ]
)


In [None]:
# Create a rescaling layer

scaling_layer = Rescaling(1.0 / 255)

# Scale the datasets
train_data_scaled = train_ds.map(lambda x, y: (scaling_layer(x), y))
test_data_scaled = test_ds.map(lambda x, y: (scaling_layer(x), y))
val_data_scaled = val_ds.map(lambda x, y: (scaling_layer(x), y))

# Test that the scaling has worked by printing the min and max value from one the images
image_batch, labels_batch = next(iter(train_data_scaled))


image = image_batch[1]
print(np.min(image), np.max(image))

In [None]:
# Autotune the datasets
AUTOTUNE = tf.data.AUTOTUNE

train_data_scaled = train_data_scaled.cache().prefetch(buffer_size=AUTOTUNE)
test_data_scaled = test_data_scaled.cache().prefetch(buffer_size=AUTOTUNE)
val_data_scaled = val_data_scaled.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
# define callbacks
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=15,
    verbose=1,
    mode="auto",
    restore_best_weights=True,
)
reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.1,
    patience=5,
    verbose=1,
    mode="auto",
    min_delta=0.0001,
    cooldown=0,
    min_lr=0,
)
model_checkpoint = ModelCheckpoint(
    "models/model.{epoch:02d}-{val_loss:.2f}.h5",
    monitor="val_loss",
    verbose=1,
    save_best_only=True,
    save_weights_only=False,
    mode="auto",
    save_freq="epoch",
)
callback_list = [early_stopping, reduce_lr, model_checkpoint]


Models 

In [None]:
#simple_conv_model

simple_conv_model = Sequential([
    data_augmentation,

    Conv2D(32, 3, padding='same', activation='relu', input_shape=input_shape),
    MaxPooling2D(),

    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),

    Conv2D(128, 3, padding='same', activation='relu'),
    MaxPooling2D(),

    Conv2D(256, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the simple_conv_model
simple_conv_model.compile(optimizer='adam',
                loss='binary_crossentropy',
                metrics=['accuracy'])
        
# Print the simple_conv_model summary
# simple_conv_model.summary()

# Train the simple_conv_model
epochs = 100
history = simple_conv_model.fit(
    train_data_scaled,
    validation_data=val_data_scaled,
    epochs=epochs,
    callbacks=callback_list,
)
# eval and plot simple conv model 
# Evaluate the simple_conv_model on the test data using `evaluate`
print("Evaluate on test data")
results = simple_conv_model.evaluate(test_data_scaled, batch_size=32)
print("test loss, test acc:", results)


# Plot the VGG16 model accuracy and loss curves using `matplotlib`
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="train_accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()


In [11]:
# keras vgg16 model
from keras.applications import VGG16
from keras.layers import GlobalAveragePooling2D

# Load the VGG16 model without the top classification layer
vgg16_base = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers in the VGG16 model
vgg16_base.trainable = False

# Create a sequential model
vgg16_model = Sequential([
    data_augmentation,
    vgg16_base,  # Add the VGG16 base model
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the VGG16 model
vgg16_model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Train the VGG16 model
epochs = 100
history = vgg16_model.fit(
    train_data_scaled,
    validation_data=val_data_scaled,
    epochs=epochs,
    callbacks=callback_list
)
# plot the vgg16 model performance 
# Evaluate the VGG16 model on the test data using `evaluate`
print("Evaluate on test data")
results = vgg16_model.evaluate(test_data_scaled, batch_size=32)
print("test loss, test acc:", results)

# Plot the VGG16 model accuracy and loss curves using `matplotlib`
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="train_accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()

KeyboardInterrupt: 

In [None]:
# keras Inception model
from keras.applications import InceptionV3
from keras.layers import GlobalAveragePooling2D

# Load the InceptionV3 model without the top classification layer
inception_base = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers in the InceptionV3 model
inception_base.trainable = False

# Create a sequential model
inception_model = Sequential([
    data_augmentation,
    inception_base,  # Add the InceptionV3 base model
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the InceptionV3 model
inception_model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Train the InceptionV3 model
epochs = 100
history = inception_model.fit(
    train_data_scaled,
    validation_data=val_data_scaled,
    epochs=epochs,
    callbacks=callback_list
)

# plot the inception model performance
# Evaluate the InceptionV3 model on the test data using `evaluate`
print("Evaluate on test data")
results = inception_model.evaluate(test_data_scaled, batch_size=32)
print("test loss, test acc:", results)

# Plot the InceptionV3 model accuracy and loss curves using `matplotlib` fixed 0 to 1 scale
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="train_accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()



In [None]:
# keras mobilenet model
from keras.applications import MobileNet
from keras.layers import GlobalAveragePooling2D

# Load the MobileNet model without the top classification layer
mobilenet_base = MobileNet(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers in the MobileNet model
mobilenet_base.trainable = False

# Create a sequential model
mobilenet_model = Sequential([
    data_augmentation,
    mobilenet_base,  # Add the MobileNet base model
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the MobileNet model
mobilenet_model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Train the MobileNet model
epochs = 100
history = mobilenet_model.fit(
    train_data_scaled,
    validation_data=val_data_scaled,
    epochs=epochs,
    callbacks=callback_list
)

# plot the mobilenet model performance
# Evaluate the MobileNet model on the test data using `evaluate`
print("Evaluate on test data")
results = mobilenet_model.evaluate(test_data_scaled, batch_size=32)
print("test loss, test acc:", results)

# Plot the MobileNet model accuracy and loss curves using `matplotlib` fixed 0 to 1 scale
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="train_accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()


In [None]:
# keras resnet model
from keras.applications import ResNet50
from keras.layers import GlobalAveragePooling2D

# Load the ResNet50 model without the top classification layer
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)

# Freeze the pre-trained layers in the ResNet50 model
resnet_base.trainable = False

# Create a sequential model
resnet_model = Sequential([
    data_augmentation,
    resnet_base,  # Add the ResNet50 base model
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

# Compile the ResNet50 model
resnet_model.compile(optimizer='adam',
                    loss='binary_crossentropy',
                    metrics=['accuracy'])

# Train the ResNet50 model
epochs = 100
history = resnet_model.fit(
    train_data_scaled,
    validation_data=val_data_scaled,
    epochs=epochs,
    callbacks=callback_list
)

# plot the resnet model performance
# Evaluate the ResNet50 model on the test data using `evaluate`
print("Evaluate on test data")
results = resnet_model.evaluate(test_data_scaled, batch_size=32)
print("test loss, test acc:", results)

# Plot the ResNet50 model accuracy and loss curves using `matplotlib` fixed 0 to 1 scale
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="train_accuracy")
plt.plot(history.history["val_accuracy"], label="val_accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="train_loss")
plt.plot(history.history["val_loss"], label="val_loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.show()
