In [1]:
# import library
import os, os.path
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
import csv
from scipy.stats import kurtosis, skew
import glob
from sklearn.preprocessing import MinMaxScaler
from skimage.feature import greycomatrix, greycoprops
from sklearn.cluster import KMeans
from collections import Counter
import math
import pandas as pd

In [2]:
import pickle

In [3]:
# kernel = np.ones((5,5),np.uint8)
scaler = MinMaxScaler()

In [4]:
# mengambil label gambar berdasar foldernya
labels = [os.path.dirname(file).split('\\')[1] for file in glob.glob("./downloads/*/*")]

In [5]:
# file_name = [[os.path.dirname(file).split('\\')[1], os.path.basename(file)] for file in glob.glob("./downloads/*/*")]
# new = [x for x in file_name]

# file_name

In [5]:
# read image

images = [cv2.imread(file) for file in glob.glob("./downloads/*/*")]

In [6]:
# fungsi ekstrak fitur color moment
def get_color_feature(img):
    color_feat = []
    for x in img:
        temp = [np.mean(x), np.std(x), skew(skew(x)), kurtosis(kurtosis(x))]
        for t in temp:
            color_feat.append(t)

    return color_feat

In [7]:
def get_color_hist_val(img):
    chan_hist = []
    for chan in img:
        hist = cv2.calcHist([chan],[0],None,[256],[0,256])
        norm_hist = cv2.normalize(hist, hist).flatten()
        res = [sum(norm_hist), np.mean(norm_hist), np.std(norm_hist)]
        for r in res:
            chan_hist.append(r)
            
    return chan_hist

In [272]:
# fungsi kalkulasi matriks glcm dengan sudut gambar 0, 45, 90, 135 derajat
# agls = , np.pi/4, np.pi/2, 3*np.pi/4
def calc_glcm_all_agls(img, label, props, dists=[5], agls=[0, np.pi/4, np.pi/2, 3*np.pi/4], lvl=256, sym=True, norm=True):
    
    glcm = greycomatrix(img, 
                        distances=dists, 
                        angles=agls, 
                        levels=lvl,
                        symmetric=sym, 
                        normed=norm)
    feature = []
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
            feature.append(item)
    feature.append(label) 
    
    return feature

In [273]:
# fungsi get glcm

def glcm_features(img, label="no label"):
    properties = ['dissimilarity', 'correlation', 'homogeneity', 'contrast', 'ASM', 'energy']
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    glcm_feat = calc_glcm_all_agls(gray, label, props=properties)
        
    return glcm_feat

In [274]:
def hist_equal(img):
    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)

    # equalize the histogram of the Y channel
    img_yuv[:,:,0] = cv2.equalizeHist(img_yuv[:,:,0])

    # convert the YUV image back to RGB format
    img_output = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
    
    return img_output

In [479]:
# fungsi resize citra
def resize_image(img):
    resized = cv2.resize(img, (500, 500))
    
    return resized

In [480]:
# ekstraksi fitur dari citra
def extract_feature(images, labels):
    imgs_feat = []
    # i = 0
    for image, label in zip(images, labels):
        imgs = resize_image(image)
        img = hist_equal(imgs)
        glcm_feature = glcm_features(img, label)

        split_chans = cv2.split(img)
        color_feature = get_color_feature(split_chans)
        color_hist = get_color_hist_val(split_chans)
        imgs_feat.append(color_hist + color_feature + glcm_feature)
    #     i = i + 1
    
    return imgs_feat

In [481]:
img_train_feat = extract_feature(images, labels)

In [482]:
training_image_dataset = pickle.dumps(img_train_feat)

In [483]:
training_image_feature = pickle.loads(training_image_dataset)

In [484]:
len_feat = len(training_image_feature[0])

In [485]:
# minmax scaler
# scaler.fit(rounded_scale)
train_feat_only = [[x for x in y[:len_feat-1]] for y in training_image_feature]
scaled_feat = scaler.fit_transform(train_feat_only)
# pembulatan nilai feature
rounded_scale = [[round(x, 5) for x in y] for y in scaled_feat]

In [486]:
# pd.DataFrame(scaled_feat)

In [487]:
n = 100

In [488]:
# clustering k-means dengan jumlah cluster 50 dan iterasi maksimum 200
kmeans = KMeans(n_clusters=n, max_iter=200).fit(rounded_scale)

y = kmeans.predict(rounded_scale)

In [489]:
# menyatukan feature dengan label yang sesuai

with_nama_makanan = []
for i in range(len(rounded_scale)):
    label = [labels[i]]
    with_nama_makanan.append(rounded_scale[i] + label)
    
len(with_nama_makanan)

1499

In [490]:
# menyatukan citra dengan cluster hasil clustering k-means
with_label = list(zip(with_nama_makanan, y))

# mengurutkan hasil clustering
temp = sorted(with_label, key=lambda x: x[1])

# inisialisasi variabel baru untuk menyimpan data berdasar cluster
new = []

# pemisahan data untuk setiap cluster
for i in range(0, n):
    temps = []
    for x in temp:
        if i == x[1]:
            temps.append(x[0])
    new.append(temps)

In [491]:
# centroid dari hasil k-means clustering
centroids = kmeans.cluster_centers_

In [492]:
# fungsi euclidean distance
def euclidean_distance(input_data, dataset):
    distances = []
    for data in dataset:
        dist = math.sqrt(sum([(a - b) ** 2 for a, b in zip(input_data, data)]))
        distances.append(dist)
    
    return distances

In [493]:
# fungsi klasifikasi knn
def knn(k, datatest, datatrain, nearest_cluster):
    data = datatrain[nearest_cluster]
    
    data_without_label = [x[:len_feat-1] for x in data]
    label_only = [x[-1] for x in data]
    
    distance = euclidean_distance(datatest, data_without_label)
    zipped = list(zip(distance, label_only))
    temp = sorted(zipped, key=lambda x: x[0])
    
    nearest_neighbor = temp[:k]
    
    most_label = [x[1] for x in nearest_neighbor]
    
    return Counter(most_label).most_common(1)[0][0]

In [494]:
def normalize_test(data_train, data_test):
    data_train.append(data_test)
    scaled = scaler.fit_transform(data_train)
    
    return scaled[-1]

In [495]:
# file_name = [os.path.dirname(file).split('\\')[1] for file in glob.glob("./downloads/*/*")]

In [496]:
import timeit

In [497]:
# image untuk testing
# test_images = [cv2.imread(file) for file in glob.glob("./downloads/*/*")]
start = timeit.default_timer()
file_name = [os.path.dirname(file).split('\\')[1] for file in glob.glob("./downloads/*/*")]

the_ks = [3, 5, 15, 25, 50]
the_accs = []


pred_labels = []
for x in training_image_feature:
    test_feat_only = x[:len_feat-1]
    normed = normalize_test(train_feat_only, test_feat_only)
    cluster_distance_test = euclidean_distance(normed, centroids)
    nearest_cluster_test = cluster_distance_test.index(min(cluster_distance_test))
    # #     print(nearest_cluster_test)
    predicted_label = knn(3, normed, new, nearest_cluster_test)
    pred_labels.append(predicted_label)

i = 0
for act, pred in zip(file_name, pred_labels):
        if act.lower() == pred.lower():
            i = i+1

acc = i/len(pred_labels) * 100
    
print(acc)

stop = timeit.default_timer()

print('Time: ', stop - start)  

89.92661774516344
Time:  104.62105979999978


In [498]:
test_images = [cv2.imread(file) for file in glob.glob("./sampel gambar/*")]
feature_test = []

for x in test_images:
    imgs = resize_image(x)
    img = hist_equal(imgs)
#     img_conv = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)
#     blurtest = cv2.GaussianBlur(x,(5,5),0)
    split_chans_test = cv2.split(img)
#     get_color_hist_val(split_chans_test) + 
    test_feat = get_color_hist_val(split_chans_test) + get_color_feature(split_chans_test) + glcm_features(img)
    feature_test.append(test_feat)

In [499]:
testing_image_dataset = pickle.dumps(feature_test)

In [500]:
testing_image_features = pickle.loads(testing_image_dataset)

In [501]:
start = timeit.default_timer()
fname = [os.path.basename(file).split('.')[0].lower() for file in glob.glob("./sampel gambar/*")]
# c = input()
tempus = []
for x in testing_image_features:

    test_feat_onlys = x[:len_feat-1]
    normed = normalize_test(train_feat_only, test_feat_onlys)
    cluster_distance_test = euclidean_distance(normed, centroids)
    nearest_cluster_test = cluster_distance_test.index(min(cluster_distance_test))
        # #     print(nearest_cluster_test)
    predicted_label = knn(3, normed, new, nearest_cluster_test)
    tempus.append(predicted_label.lower())
    
temp_count = 0
for a, b in list(zip(fname, tempus)):
    if a == b:
        temp_count += 1
        
print(temp_count/15 * 100)
print(list(zip(fname, tempus)))
stop = timeit.default_timer()

print("time: ", stop - start)

26.666666666666668
[('bakso', 'gado gado'), ('bika ambon', 'mie ayam'), ('cucur', 'kentang goreng'), ('gado-gado', 'serabi'), ('getuk', 'bakso'), ('kentang goreng', 'bika ambon'), ('martabak manis', 'martabak manis'), ('mie ayam', 'mie ayam'), ('nasi kuning', 'opor ayam'), ('onde-onde', 'mie ayam'), ('opor ayam', 'opor ayam'), ('pempek', 'cucur'), ('rendang daging', 'nasi kuning'), ('sate', 'getuk'), ('serabi', 'serabi')]
time:  1.044618600000831


In [502]:
# 7/15 * 100

In [503]:
# f = open('image_feature3.csv', 'w', newline='\n', encoding='utf8')

# with f:
#     writer = csv.writer(f)
#     writer.writerows(training_image_feature)