In [None]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
import pylab
import plotly.express as px
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
sns.set_palette('muted')
sns.set_context("notebook", font_scale=1.5,
                rc={"lines.linewidth": 2.5})

In [None]:
IMG_H = 128
IMG_W = 128
IMG_C = 3

In [None]:
def prep_stage(img):
    img= np.resize(img,(IMG_H,IMG_W,IMG_C))
    img = img.astype('float32')
    img /= 255
  
    return img.ravel()
def create_dataset_PIL(img_folder, limit=100, ext=".png"):
    img_data_array=[]
    class_name=[]
    
    for dir1 in os.listdir(img_folder):
        counter = 0
        if dir1 != ".DS_Store" and dir1 != ".ipynb_checkpoints":
            
            for file in os.listdir(os.path.join(img_folder, dir1)):

                image_path= os.path.join(img_folder, dir1,  file)
                _, file_extension = os.path.splitext(image_path)
                if file_extension == ext:
                    
                    image= np.array(Image.open(image_path))
                    
                    image = prep_stage(image)
                    
                    
                    img_data_array.append(image)
                    class_name.append(dir1)
                    counter += 1
                    if limit == counter:
                        break
                
    return np.array(img_data_array) , np.array(class_name)

In [None]:
img_folder = "mura_data/RGB/mura_march_clean/test_data"
PIL_img_data, class_name=create_dataset_PIL(img_folder, 1000)

In [None]:
print(len(PIL_img_data), len(class_name))

In [None]:
labels = ["normal", "defect"]
target_dict={k: v for v, k in enumerate(labels)}
target_dict

In [None]:
target_val =  [target_dict[class_name[i]] for i in range(len(class_name))]
len(target_val)
target_val = np.array(target_val)
target_val.shape

In [None]:
PIL_img_data.shape

In [None]:
PIL_img_data

In [None]:
from sklearn.manifold import TSNE
import time
time_start = time.time()
RS = 123

tsne_result = TSNE(n_components=3, random_state=RS).fit_transform(PIL_img_data)
    
print(f"t-SNE done! Time elapsed: {time.time()-time_start} seconds")

In [None]:
print(tsne_result.shape)

fig = px.scatter_3d(
    tsne_result, x=0, y=1, z=2,
    color=target_val, labels={'color': 'classes'}
)
fig.update_traces(marker_size=8)
fig.show()
fig.write_image("fig1.png")

In [None]:
tsne_result_2D = TSNE(n_components=2, random_state=RS).fit_transform(PIL_img_data)
df = pd.DataFrame()
df["y"] = target_val
df["comp-1"] = tsne_result_2D[:,0]
df["comp-2"] = tsne_result_2D[:,1]

In [None]:
print(f"t-SNE done! Time elapsed: {time.time()-time_start} seconds")
print(target_val)
sc = sns.scatterplot(x="comp-1", y="comp-2", hue=df.y.tolist(),
                palette=sns.color_palette("hls", len(np.unique(target_val))),
                data=df).set(title="Mura data T-SNE projection")
plt.savefig('fig2.png')
plt.show()