In [None]:
# Import library yang diperlukan
import cv2
import numpy as np
import os
from skimage.feature import graycomatrix, graycoprops
import pandas as pd 
from umap import UMAP
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

import plotly.graph_objects as go
import plotly.subplots as sp
import PIL.Image as pimage
from IPython.display import Image

# Pra proses

In [None]:
# Path ke folder yang berisi citra
folder_path = './resize_mleaves/'
raw_folder_path = './melon_leaves/'
image_files = os.listdir(folder_path)

Image(filename=folder_path+image_files[5]) 

In [None]:
imgs_matrix = [] #list image matrix 
imgs_label = [] #image path
for filename in image_files:
    image = cv2.imread(folder_path+filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    h, w = gray.shape
    ymin, ymax, xmin, xmax = h//3, h*2//3, w//3, w*2//3
    crop = gray[ymin:ymax, xmin:xmax]

    resize = cv2.resize(crop, (0,0), fx=0.5, fy=0.5)

    imgs_matrix.append(resize)
    imgs_label.append(filename)

In [None]:
# ----------------- calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135 ----------------------------------
# what happen if there is some change in parameter GLCM?, distance=4? and levels, what commonly value used?
def calc_glcm_all_agls(img, label, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    glcm = graycomatrix(
        img, 
        distances=dists, 
        angles=agls, 
        levels=lvl,
        symmetric=sym, 
        normed=norm
    )
    feature = []
    feature.append(label) #append label

    # append GLCM properties
    # glcm_props = []
    # for name in props:
    #     for property in graycoprops(glcm, name)[0]:
    #           glcm_props.append(property)
    # for item in glcm_props:
    #         feature.append(item)
    glcm_props = []
    for prop in props:
        from_graycoprops = graycoprops(glcm, prop) # shape array (1, 4)
        glcm_props.extend(from_graycoprops[0])
    feature.extend(glcm_props)

    
    return feature

In [None]:
# ----------------- call calc_glcm_all_agls() for all properties ----------------------------------
properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']

glcm_all_agls = []
for img,label in zip(imgs_matrix, imgs_label): 
    glcm_all_agls.append(
            calc_glcm_all_agls(img, label, props=properties)
            )

In [None]:
# Create the pandas DataFrame for GLCM features data
# column
columns = []
columns.append("label") #label column
# GLCM column
angles = ['0', '45', '90','135']
for name in properties:
    for ang in angles:
        columns.append(name + "_" + ang)

# create dataframe
glcm_df = pd.DataFrame(glcm_all_agls, columns = columns)
glcm_df.head(15)

In [None]:
glcm_df.shape

# Reduksi Dimensi


In [None]:
X = glcm_df.drop(['label'], axis=1)
Y = glcm_df['label']

In [None]:
# UMAP reduction
# Reduce the dimensions of entity embeddings to represent them in a 2D plane.
X_scaled = UMAP().fit_transform(X)
X_scaled=pd.DataFrame(X_scaled,columns=['feature-vector-1','feature-vector-2'])

In [None]:
# Membuat scatter plot dengan Plotly
fig = go.Figure()

# Menambahkan scatter plot
fig.add_trace(
    go.Scatter(x=X_scaled['feature-vector-1'], y=X_scaled['feature-vector-2'], mode='markers', marker=dict(size=10))
)

# Menyamakan skala sumbu x dan y
fig.update_xaxes(scaleanchor="y")
fig.update_yaxes(scaleanchor="x")

# Menambahkan judul
fig.update_layout(title="Scatter Plot")

# Menampilkan plot
fig.show()


# ELBOW Analysis 
Untuk jumlah klaster

In [None]:
#initialize kmeans parameters
kmeans_kwargs = {
"init": "random",
"n_init": 10,
"random_state": 1,
}

#create list to hold SSE values for each k
sse = []
for k in range(1, 11):
    kmeans = KMeans(n_clusters=k, **kmeans_kwargs)
    kmeans.fit(X)
    sse.append(kmeans.inertia_)

#visualize results
plt.plot(range(1, 11), sse)
plt.xticks(range(1, 11))
plt.xlabel("Number of Clusters")
plt.ylabel("SSE")
plt.show()

# K-MEANS

In [None]:
kmeans = KMeans(n_clusters=5)
cluster = kmeans.fit_predict(X_scaled)
centroid = kmeans.cluster_centers_

In [None]:
cluster

In [None]:

# Buat scatter plot untuk data
scatter = go.Scatter(x=X_scaled['feature-vector-1'], y=X_scaled['feature-vector-2'], mode='markers', marker=dict(color=cluster, size=10))

# Buat scatter plot untuk centroid
centroid_scatter = go.Scatter(
    x=centroid[:, 0], y=centroid[:, 1], mode='markers', marker=dict(
            color="white", 
            size=10, 
            symbol='triangle-up',  # Menggunakan simbol 'star' untuk tanda
            line=dict(color="black", width=2)
        )
)

# Buat figure dan tambahkan scatter plots
fig = go.Figure(data=[scatter, centroid_scatter])

# Set axis aspect ratio
fig.update_xaxes(scaleratio=1)
fig.update_yaxes(scaleratio=1)

# Tampilkan plot
fig.show()

In [None]:
X_scaled['label']=Y
X_scaled['cluster']=cluster
X_scaled

In [None]:
# Daftar path file gambar
image_paths = []
for k in X_scaled.cluster.unique():
    cluster_data = X_scaled[X_scaled['cluster'] == k]
    # random_sample = cluster_data.sample(1, random_state=42)  # Ganti random_state sesuai kebutuhan Anda
    # label = random_sample.iloc[0].label
    # image_paths.append(raw_folder_path + label)
    label = cluster_data.iloc[0].label
    image_paths.append(raw_folder_path + label)

# Buat subplot grid
fig = sp.make_subplots(rows=2, cols=3)

# Loop melalui setiap path file gambar dan tambahkan ke subplot
for i, image_path in enumerate(image_paths):
    img = pimage.open(image_path)
    trace = go.Image(z=img)
    row = i // 3 + 1
    col = i % 3 + 1
    fig.add_trace(trace, row=row, col=col)

# Konfigurasi tata letak subplot
fig.update_layout(
    title='Grid Subplots of Images',
    showlegend=False,
    height=600,
    width=900
)

# Menampilkan plot
fig.show()


# Notes
- compare 4,5,6, cluste which one is the best? add silhouete score or other evaluation method
- group the images based on cluster (folder), and ask the expert opinion
- we need to evaluate using metric to define how the performance, or validated by expert, not manualy check 
- add explanation/reasoning about GLCM. from scientific article 
- finding another work about clustering/clasification Golden Melon Leaf compare to our work.

Categorizatioin, Golden Melon Leaf

Evaluation methods than can be used:
- Silhouette Score
- Inertia (Within-Cluster Sum of Squares)
- Dunn Index

In [1]:
from sklearn.metrics import silhouette_samples, silhouette_score