In [None]:
from glob import glob
import random
import math
import os

import numpy as np
import pandas as pd 
import matplotlib.pyplot as plot
import seaborn

import time
from random import sample 
import cv2

In [None]:
dataset = "mura_pca"
normal_train_img_fn = glob(f'mura_data/RGB/{dataset}/train_data/normal/*.png')
defect_test_img_fn = glob(f'mura_data/RGB/{dataset}/test_data/defect/*.png')

In [None]:
normal_img = sample(normal_train_img_fn,1000)
defect_img = sample(defect_test_img_fn,1000)

df_analysis = pd.DataFrame(columns=['image_path','mean','std', 'class'])
df_analysis

In [None]:
for img_path in normal_img:
    image = cv2.imread(img_path)
    # print(image)
    mean = np.mean(image)
    std = np.std(image)
    # print(mean, image.mean())
    # print(std, image.std())
    data_row = {
        "image_path": img_path,
        "mean": image.mean(),
        "std": image.std(),
        "class": 0
    }
    df_analysis = df_analysis.append(data_row, ignore_index = True)
    
for img_path in defect_img:
    image = cv2.imread(img_path)
    # print(image)
    mean = np.mean(image)
    std = np.std(image)
    # print(mean, image.mean())
    # print(std, image.std())
    data_row = {
        "image_path": img_path,
        "mean": image.mean(),
        "std": image.std(),
        "class": 1
    }
    df_analysis = df_analysis.append(data_row, ignore_index = True)

In [None]:
df_analysis.tail()

In [None]:
seaborn.relplot(data=df_analysis, x='mean', y='std', hue='class', hue_order=[0,1], aspect=1.61)
plot.show()

In [None]:
images = sample(normal_train_img_fn,10000)
df = pd.DataFrame(columns=['image_path','mean','std'])
df

In [None]:
for img_path in images:
    image = cv2.imread(img_path)
    # print(image)
    mean = np.mean(image)
    std = np.std(image)
    # print(mean, image.mean())
    # print(std, image.std())
    data_row = {
        "image_path": img_path,
        "mean": image.mean(),
        "std": image.std()
    }
    df = df.append(data_row, ignore_index = True)

In [None]:
df.head()

In [None]:
# Draw a scatter plot
df.plot.scatter(x = 'mean', y = 'std', s = 3, c="red");

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors

In [None]:
# StandardScaler is used to remove the outliners and scale the data by making the mean of the data 0
# and standard deviation as 1. So we are creating an object std_scl to use standardScaler.


std_slc = StandardScaler()
X = df[["mean", "std"]]
X_std = std_slc.fit_transform(X)

In [None]:
neigh = NearestNeighbors(n_neighbors=3)
nbrs = neigh.fit(X)
distances, indices = nbrs.kneighbors(X)

In [None]:
distances = np.sort(distances, axis=0)
distances = distances[:,1]
plot.plot(distances)

In [None]:
clt = DBSCAN(eps = 0.2)

model = clt.fit(X_std)
labels = model.labels_
print(labels)

In [None]:
clusters = pd.DataFrame(model.fit_predict(X_std))
df["Cluster"] = clusters

In [None]:
df.sort_values(by=['Cluster'], inplace=True, ascending=True)
df

In [None]:
fig = plot.figure(figsize=(10,10)); ax = fig.add_subplot(111)
scatter = ax.scatter(df["mean"],df["std"], c=df["Cluster"],s=50)
ax.set_title("DBSCAN Clustering")
ax.set_xlabel("Mean")
ax.set_ylabel("Std")
plot.colorbar(scatter)
plot.show()