In [1]:
import os
import scipy
import numpy as np
import matplotlib.pyplot as plt

from skimage import feature
from skimage import io
from skimage import color
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import normalize
from sklearn import datasets
from sklearn import preprocessing

import warnings
warnings.filterwarnings('ignore')

In [2]:
def show(img):
    fig, ax = plt.subplots(1, 1, figsize=(8, 8))
    ax.imshow(img, cmap=plt.cm.gray)
    ax.set_axis_off()
    plt.show()

In [3]:
#selecionando as classes
MAIN_DIR = "frutas_dataset_train"
y = [name for name in os.listdir(MAIN_DIR) if os.path.isdir(os.path.join(MAIN_DIR, name))]
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(MAIN_DIR) for f in filenames if os.path.splitext(f)[1] == '.jpg']
#transformar as labels de string para ints
le = preprocessing.LabelEncoder()
encoded = le.fit_transform(y)

In [4]:
#qtde de arquivos por pasta
num_files = []
for i in y:
    DIR = "frutas_dataset_train/" + i + "/"
    num_files.append(len([name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name))]))

X = np.zeros((len(result), 10)) #n_samples x n_features

In [5]:
#criando o target label == classes das frutas
k = []
for i in range(len(y)):
    for j in range(num_files[i]):
        k.append(encoded[i])

# Gerar a LBP das imagens

In [88]:
#criando a matriz geral
for i in range(len(result)):
    im = color.rgb2gray(io.imread(result[i]))
    lbp = feature.texture.local_binary_pattern(im, 8, 2, method='uniform')
    histogram = scipy.stats.itemfreq(lbp)
    x = histogram[:,1]
    norm_hist = normalize(x[:,np.newaxis], axis=0).ravel()
    X[i,:] = x

In [90]:
#salvando estado da matriz X e do vetor de target labels k
np.savetxt("extracted_features/x_lbp.txt", X, delimiter=";")
np.savetxt("extracted_features/k_lbp.txt", k, delimiter=";")

# Ler a LBP do txt

In [6]:
#recuperar estado
X = np.genfromtxt('extracted_features/x_lbp.txt', delimiter=';')
k = np.genfromtxt('extracted_features/k_lbp.txt', delimiter=';')

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, k_train, k_test = train_test_split(X, k, test_size=0.5) #test size = 50%

In [8]:
knn = KNeighborsClassifier(n_neighbors=3)

In [9]:
knn.fit(X, k)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=3, p=2,
           weights='uniform')

In [10]:
k_pred = knn.predict(X_test)

In [11]:
from sklearn import metrics
print("Taxa de acerto:", metrics.accuracy_score(k_test, k_pred))

Taxa de acerto: 0.9075829383886256


In [12]:
def calcula_lbp_hist(im):
    lbp = feature.texture.local_binary_pattern(im, 8, 2, method='uniform')
    histogram = scipy.stats.itemfreq(lbp)
    x = histogram[:,1]
    return x

In [13]:
def get_class(encoded, y, num):
    for i in range(len(encoded)):
        if(encoded[i] == num):
            break
    return y[i]

In [14]:
im = color.rgb2gray(io.imread("cashew_039.jpg"))
x = calcula_lbp_hist(im)
a = knn.predict(x.reshape(1,-1))
print(get_class(encoded, y, a))

cashew


In [15]:
im = color.rgb2gray(io.imread("watermelon_063.jpg"))
x = calcula_lbp_hist(im)
a = knn.predict(x.reshape(1,-1))
print(get_class(encoded, y, a))

watermelon


# Gerar a matriz de confusão

In [21]:
from sklearn.metrics import confusion_matrix

k_pred = knn.predict(X_test)
conf_matrix = confusion_matrix(k_test, k_pred)
conf_matrix = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]

In [22]:
np.savetxt("confusion_matrix_script/input.csv", conf_matrix, delimiter=';', fmt='%.3f')