In [168]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import glob
import os

from skimage.feature import greycoprops as gprops
from skimage.feature import greycomatrix as gmatrix

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix  

print(os.listdir("./dataset/cell_images/"))

['.DS_Store', 'Parasitized', 'Uninfected']


In [53]:
path = os.getcwd() + '/dataset/cell_images/'
path_images = os.listdir("./dataset/cell_images/")
if '.DS_Store' in path_images:
    path_images = path_images[1:]
path_parasitized = path + path_images[0] + '/'
path_uninfected = path + path_images[1] + '/'

In [121]:
imagesParasitized = [cv2.imread(file, 0) for file in glob.glob(path_parasitized+"*.png")]

In [122]:
imagesUninfected  = [cv2.imread(file, 0) for file in glob.glob(path_uninfected+"*.png")]

In [142]:
dataParasitized = {'contrast' : [], 'dissimilarity' : [], 'homogeneity': [], \
                   'energy' : [], 'correlation' : [], 'ASM' : [], 'status': []}
for parasitized in imagesParasitized:
    # print(k)
    # dataImage = {}
    glcm = gmatrix(parasitized, [5], [0], 256, symmetric=True, normed=True)
    for feature in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM', 'status']:
        if feature == 'status':
            dataParasitized[feature].append(1)
        else:
            dataParasitized[feature].append(gprops(glcm, prop=feature)[0][0])
        # o [0][0] para pegar sómente o dado fora da lista, tá retornando array([[2311.26818367]])
    # dataParasitized.append(dataImage)

In [143]:
#dataUninfected = []
dataUninfected = {'contrast' : [], 'dissimilarity' : [], 'homogeneity': [], \
                   'energy' : [], 'correlation' : [], 'ASM' : [], 'status' : []}
for uninfected in imagesUninfected:
    # dataImage = {}
    glcm = gmatrix(parasitized, [5], [0], 256, symmetric=True, normed=True)
    for feature in ['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM', 'status']:
        #dataImage[feature] = gprops(glcm, prop=feature)
        if feature == 'status':
            dataUninfected[feature].append(0)
        else:
            dataUninfected[feature].append(gprops(glcm, prop=feature)[0][0])
    # dataUninfected.append(dataImage)

In [144]:
dfParasitized = pd.DataFrame(dataParasitized)
dfParasitized.head()

Unnamed: 0,contrast,dissimilarity,homogeneity,energy,correlation,ASM,status
0,2311.268184,17.324746,0.399396,0.20528,0.73425,0.04214,1
1,2096.609522,15.652454,0.381823,0.216699,0.766454,0.046959,1
2,2668.714404,17.056937,0.568375,0.413402,0.815043,0.170901,1
3,2018.325148,15.040112,0.419798,0.261523,0.805033,0.068394,1
4,2194.942252,17.002131,0.350344,0.180188,0.700745,0.032468,1


In [147]:
dfUnifected = pd.DataFrame(dataUninfected)
dfUnifected.head()

Unnamed: 0,contrast,dissimilarity,homogeneity,energy,correlation,ASM,status
0,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
1,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
2,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
3,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
4,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0


In [155]:
data = pd.concat([dfParasitized, dfUnifected])
data = shuffle(data) # ambaralhando os dados
data.head()

Unnamed: 0,contrast,dissimilarity,homogeneity,energy,correlation,ASM,status
10219,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
4935,1788.441789,14.885941,0.359637,0.197944,0.7554,0.039182,1
4517,2348.901139,15.30212,0.45979,0.208844,0.765417,0.043616,0
2667,2316.037229,15.103701,0.488922,0.274354,0.809403,0.07527,1
7051,2883.731612,17.576556,0.543877,0.324564,0.785889,0.105342,1


In [230]:
X = data.iloc[:, :-1].values
y = data.iloc[:, 6].values

In [260]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

In [266]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# Knn

In [267]:
from sklearn.neighbors import KNeighborsClassifier

In [268]:
knn = KNeighborsClassifier(n_neighbors=15)

In [269]:
knn.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=15, p=2,
                     weights='uniform')

In [270]:
y_pred = knn.predict(X_test)

In [271]:
print(confusion_matrix(y_test, y_pred), end='\n\n')
print(' Score '.center(60,'-'), end='\n\n\n')
print(classification_report(y_test, y_pred)) 

[[4122    0]
 [   3 4143]]

-------------------------- Score ---------------------------


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4122
           1       1.00      1.00      1.00      4146

    accuracy                           1.00      8268
   macro avg       1.00      1.00      1.00      8268
weighted avg       1.00      1.00      1.00      8268



# Random Florest

In [272]:
from sklearn.ensemble import RandomForestClassifier

In [273]:
n_tree = 100

In [274]:
rfc = RandomForestClassifier(n_estimators=n_tree)

In [275]:
rfc.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=None, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [276]:
y_pred = rfc.predict(X_test)

In [277]:
print(confusion_matrix(y_test, y_pred), end='\n\n')
print(' Score '.center(60,'-'), end='\n\n\n')
print(classification_report(y_test, y_pred)) 

[[4122    0]
 [   1 4145]]

-------------------------- Score ---------------------------


              precision    recall  f1-score   support

           0       1.00      1.00      1.00      4122
           1       1.00      1.00      1.00      4146

    accuracy                           1.00      8268
   macro avg       1.00      1.00      1.00      8268
weighted avg       1.00      1.00      1.00      8268

