In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import numpy as np
from scipy.stats import mode

from sklearn.datasets import load_digits
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.cluster import DBSCAN, MiniBatchKMeans, KMeans, AgglomerativeClustering

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import pandas as pd

from time import time
import os
import pathlib
import shutil
from skimage import io
import cv2

In [None]:
# Loading the complete dataset (including class 0)
df = pd.read_csv('data/train_complete.csv')
df.head()

In [None]:
# Loading the train dataset without class 0
df2 = pd.read_csv('data/train.csv')
df2.head()

In [None]:
# self-written scripts
import sys
sys.path.insert(0, 'Python_Scripts')

import util

In [None]:
# isolate only images that have 0 or 1 defect
util.isolate_single_defects(df)

In [None]:
# eliminate class 0 images in dataset
df = df.query('ClassId!=0')
df.shape

In [None]:
# temporarily safe all images from `ClassId` 2
path = pathlib.Path.cwd()
try:
    os.mkdir(path.joinpath('data','single_defect_train_images'))
except:
    print('Folder already exists.')
    # von Michael kopiert für Ordnererstellung basierend auf x_train
    for i in range(len(df)):
        origin_train_path = path.joinpath('data', 'train_images')
        source_file = df.iloc[i,1]
        target_directory = path.joinpath('data', 'single_defect_train_images')
        shutil.copy2(origin_train_path.joinpath(source_file) , target_directory.joinpath(source_file))


In [None]:
df.head()


In [None]:
flattened_images =[]

IMG_SIZE = 32
for image_id in df['ImageId']:
    image = io.imread('data/single_defect_train_images/' + image_id)
    #resize images
    image_resized=cv2.resize(image,(IMG_SIZE,IMG_SIZE))
    #The input data have to be converted from 3 dimensional format to 1 dimensional format
    image_flat = image_resized.reshape(len(image_resized),-1)
    flattened_images.append(image_flat)

In [None]:

images = io.imread('data/single_defect_train_images/0002cc93b.jpg')
print('Org image shape --> ',images.shape)

In [None]:
plt.rcParams['axes.grid'] = False
plt.imshow(images)

In [None]:
#images[:,0]

In [None]:
print(images.max())

In [None]:
#resize images

IMG_SIZE = 64
image_resized=cv2.resize(images,(IMG_SIZE,IMG_SIZE))
print('After resizing shape --> ',image_resized.shape)

In [None]:
plt.rcParams['axes.grid'] = False
plt.imshow(image_resized)

In [None]:
print(len(image_resized))

In [None]:
print(image_resized.max())

In [None]:
#The input data have to be converted from 3 dimensional format to 1 dimensional format to be fed into the K-Means Clustering algorithm (Reshape images)
#image_flat = image_resized.reshape(len(image_resized),-1)
image_flat = image_resized.reshape(1, 3*IMG_SIZE*IMG_SIZE)

print('After Flattening shape --> ',image_flat.shape)

In [None]:
image_flat

In [None]:
plt.imshow(image_flat)

In [None]:
print(image_flat.max())

In [None]:
image_flat[:,0]

In [None]:
# Data Normalization
# Conversion to float
x_train = x_train.astype(‘float32’) 
x_test = x_test.astype(‘float32’)
# Normalization
x_train = x_train/255.0
x_test = x_test/255.0

In [None]:
# Creating the KMeans model and predict classes (n_clusters =5 ; due to 5 classes)
kmeans = KMeans(n_clusters=5, random_state=0)
clusters = kmeans.fit_predict(df.data)
kmeans.cluster_centers_.shape

In [None]:
# Create a labels array to match the learned cluster lables with the true labels
labels = np.zeros_like(clusters)
for i in range(5):
    mask = (clusters == i)
    labels[mask] = mode(digits.target[mask])[0]

In [None]:
# Plotting the confusing matrix
mat = confusion_matrix(digits.target, labels)
sns.heatmap(mat.T, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=digits.target_names,
            yticklabels=digits.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label');

print('Accuracy: ', accuracy_score(digits.target, labels))
print('==============================================================')
print(confusion_matrix(digits.target,labels))
print('==============================================================')
print(classification_report(digits.target,labels))