In [None]:
import pandas as pd
from pathlib import Path
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
import os, sys
import shutil
from distutils.dir_util import copy_tree
from shutil import unpack_archive
from subprocess import check_output



In [None]:
# Create Output Folder. Copy Input Folder to Output Folder.
# We will be using directory '../outputs' for data manipulation and analysis.

shutil.os.mkdir("../outputs/")
fromDirectory = '../input/severstal-steel-defect-detection/'
toDirectory = '../outputs'
copy_tree(fromDirectory, toDirectory)

In [None]:
# Verify the copy has been done successfully.

fileList = os.listdir('../outputs')
for f in fileList:
    print(f)

In [None]:
train_data = pd.read_csv('../outputs/train.csv')
train_data.head()

In [None]:
# List if imageId in the Train Folder which have Defects
imageId = pd.DataFrame(train_data['ImageId'])
imageId.head()

In [None]:
imageId.info()

In [None]:
# Analyze the train_data by ClassId
train_data["ClassId"].value_counts(ascending=True, dropna = False)

In [None]:
# Visualize defect classes on a barplot

defect_visual = train_data["ClassId"].value_counts()
plt.figure(figsize=(7,4))
sns.barplot(defect_visual.index, defect_visual.values, alpha = 0.8)
plt.title("Number of Steel Defect in Train Dataset")
plt.xlabel("Defect Class")
plt.ylabel("Defect Count")

In [None]:
images_path = Path("../outputs/train_images")
images = [f for f in os.listdir(images_path)]

# No. of Images in Training Folder
len(images)

In [None]:
# Create Validation Set from Training Set
from sklearn.model_selection import train_test_split

train, val = train_test_split(images, train_size=0.8)

In [None]:
# No. of Images Used for Training
len(train)

In [None]:
# Create Validation Folder

val_folder = shutil.os.mkdir('../outputs/val_images')

In [None]:
# Confirm that val_folder has been created.
fileList = os.listdir('../outputs')
for f in fileList:
    print(f)

In [None]:
val_folder = Path("../outputs/val_images")
train_folder = Path("../outputs/train_images")

In [None]:
# Move Validation Images to Validation Folder
import shutil

for i in images:
  if (i not in train):
    old_path = "../outputs/train_images/" + i
    new_path = '../outputs/val_images/' + i
    shutil.move(old_path, new_path)

In [None]:
# Create Folders to Seperate Images With and Without Defects
shutil.os.mkdir("../outputs/train_images/y")
shutil.os.mkdir("../outputs/train_images/n")
shutil.os.mkdir("../outputs/val_images/y")
shutil.os.mkdir("../outputs/val_images/n")

In [None]:
# Extract imageId with Defects into an Array
for index, row in imageId.iteritems():
  values = row.values

values

In [None]:
# Categorize Images in Training Folder
for i in train:
  old_path = '../outputs/train_images/' + i
  if (i in values):
    new_path = '../outputs/train_images/y/' + i
  else: 
    new_path = '../outputs/train_images/n/' + i
  shutil.move(old_path, new_path)

In [None]:
# Categorize Images in Validation Folder
for i in val:
  old_path = '../outputs/val_images/' + i
  if (i in values):
    new_path = '../outputs/val_images/y/' + i
  else: 
    new_path = '../outputs/val_images/n/' + i
  shutil.move(old_path, new_path)

In [None]:
# List Folders and Number of Files (Validation)
print("Directory, Number of Files")
for root, subdirs, files in os.walk(val_folder):
    print(root, len(files))

In [None]:
# List Folders and Number of Files (Train)
print("Directory, Number of Files")
for root, subdirs, files in os.walk(train_folder):
    print(root, len(files))

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# All Images will be Rescaled by 1./255. We Apply Data Augmentation Here.
train_datagen = ImageDataGenerator(rotation_range=40,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   rescale=1./255,
                                   shear_range=0.1,
                                   zoom_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
bs = 24 
img_size = (256, 512)

train_gen = train_datagen.flow_from_directory(
    directory=train_folder,
    target_size=img_size,
    batch_size=bs,
    class_mode='binary'
)

test_gen = test_datagen.flow_from_directory(
    directory=val_folder,
    target_size=img_size,
    batch_size=bs,
    class_mode='binary'
)

In [None]:
from keras.applications import DenseNet121
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Flatten, Dense, Dropout, BatchNormalization

def buildModel1():
  dense_net = DenseNet121(
      include_top=False,
      input_shape=(256, 512, 3), # (width, height, colorchannel)
      weights='imagenet'
  )

  model = Sequential()
  model.add(dense_net)
  model.add(GlobalAveragePooling2D())
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  model.add(Dense(512, activation='relu'))
  model.add(BatchNormalization())
  model.add(Dropout(0.5))
  model.add(Dense(1, activation='sigmoid'))

  model.compile(
      loss='binary_crossentropy',
      optimizer='adam',
      metrics=['accuracy', f1_m, precision_m, recall_m]
  )

  return model

In [None]:
history1 = buildModel1().fit_generator(
          train_gen, # train generator has 12568 train images but we are not using all of them
          steps_per_epoch=524, # training 12568 images = 786 steps x 16 images per batch
          epochs=25,
          validation_data=test_gen, # validation generator has 5,000 validation images
          validation_steps=158 # validating on 2514 images = 158 steps x 16 images per batch
)

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()