In [1]:
# importa os pacotes necessários
import numpy as np
import os, cv2, random
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline 


from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import sklearn.metrics
from sklearn.metrics import classification_report
from sklearn.model_selection import  cross_val_score


from skimage.feature import greycomatrix, greycoprops
from skimage.feature import hog

In [2]:
def read_image(file_path, rows=128, cols=128, colors=True):
    img =  cv2.imread(file_path, cv2.IMREAD_COLOR if colors else cv2.IMREAD_GRAYSCALE)
    return cv2.resize(img, (rows, cols), interpolation=cv2.INTER_CUBIC)


def prep_data(images, rows=128, cols=128, colors=True):
    count = len(images)
    data = []

    for i, image_file in enumerate(images):
        image = read_image(image_file, rows=rows, cols=cols, colors=colors)
        data.append(image.T)
        if i%250 == 0: print('Processed {} of {}'.format(i, count))
    return data

# dois exemplos de descritores. Você deve criar outros mais robustos.
def image_to_feature_vector(image, size=(32, 32)):
    # resize the image to a fixed size, then flatten the image into
    # a list of raw pixel intensities
    return cv2.resize(image, size).flatten()

def extract_color_histogram(image, bins=(8, 8, 8)):     
    # extract a 3D color histogram from the HSV color space using
    # the supplied number of `bins` per channel
    #image = cv2.imread(image_file)        
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
        [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    # return the flattened histogram as the feature vector
    return hist.flatten()

In [3]:
TRAIN_DIR = 'kaggle/cifar10/train/'
TRAIN_LBL = 'kaggle/cifar10/trainLabels.csv'

In [4]:
# Carregar nome das imagens existentes
train_imgs_name = [TRAIN_DIR+i for i in os.listdir(TRAIN_DIR)]
train_imgs = prep_data(train_imgs_name, rows=128, cols=128, colors=True)

Processed 0 of 50000
Processed 250 of 50000
Processed 500 of 50000
Processed 750 of 50000
Processed 1000 of 50000
Processed 1250 of 50000
Processed 1500 of 50000
Processed 1750 of 50000
Processed 2000 of 50000
Processed 2250 of 50000
Processed 2500 of 50000
Processed 2750 of 50000
Processed 3000 of 50000
Processed 3250 of 50000
Processed 3500 of 50000
Processed 3750 of 50000
Processed 4000 of 50000
Processed 4250 of 50000
Processed 4500 of 50000
Processed 4750 of 50000
Processed 5000 of 50000
Processed 5250 of 50000
Processed 5500 of 50000
Processed 5750 of 50000
Processed 6000 of 50000
Processed 6250 of 50000
Processed 6500 of 50000
Processed 6750 of 50000
Processed 7000 of 50000
Processed 7250 of 50000
Processed 7500 of 50000
Processed 7750 of 50000
Processed 8000 of 50000
Processed 8250 of 50000
Processed 8500 of 50000
Processed 8750 of 50000
Processed 9000 of 50000
Processed 9250 of 50000
Processed 9500 of 50000
Processed 9750 of 50000
Processed 10000 of 50000
Processed 10250 of 50

In [6]:
# Carregar relação número da imagem com o label
train_data = pd.read_csv(TRAIN_LBL)
print("Labels: [" + "] [".join(train_data.label.unique()) + ']')

Labels: [frog] [truck] [deer] [automobile] [bird] [horse] [ship] [cat] [dog] [airplane]


In [7]:
def one_hot(df, cols):
    """
    @param df pandas DataFrame
    @param cols a list of columns to encode 
    @return a DataFrame with one-hot encoding
    """
    df2 = pd.DataFrame()
    for each in cols:
        dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)
        df2 = pd.concat([df, dummies], axis=1)
    return df2

df2 = one_hot(train_data,['label'])
del df2['label']
del df2['id']

print(df2.columns)

df2

Index(['label_airplane', 'label_automobile', 'label_bird', 'label_cat',
       'label_deer', 'label_dog', 'label_frog', 'label_horse', 'label_ship',
       'label_truck'],
      dtype='object')


Unnamed: 0,label_airplane,label_automobile,label_bird,label_cat,label_deer,label_dog,label_frog,label_horse,label_ship,label_truck
0,0,0,0,0,0,0,1,0,0,0
1,0,0,0,0,0,0,0,0,0,1
2,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,1,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0
6,0,0,1,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0
8,0,0,0,0,0,0,0,0,1,0
9,0,0,0,1,0,0,0,0,0,0


In [13]:
rawImages = []
descHist = []

count = len(train_imgs)

for i, image in enumerate(train_imgs):
    print(image)
    pixels = image_to_feature_vector(image)
    histogram = extract_color_histogram(image)
    
    rawImages.append(pixels)
    descHist.append(histogram)
        
    if i%1000 == 0: print('Processed {} of {}'.format(i, count))

[[[ 70  68  60 ... 117 120 120]
  [ 69  67  60 ... 116 119 120]
  [ 66  64  57 ... 111 115 116]
  ...
  [108 106  98 ...  69  76  77]
  [108 106  98 ...  67  74  75]
  [108 106  99 ...  67  73  75]]

 [[ 68  67  59 ... 145 146 146]
  [ 68  66  59 ... 144 145 146]
  [ 65  63  56 ... 141 143 143]
  ...
  [128 127 120 ...  90  97  99]
  [128 127 120 ...  86  93  94]
  [128 127 120 ...  85  92  93]]

 [[ 65  64  56 ... 178 178 178]
  [ 65  63  56 ... 178 177 177]
  [ 62  60  53 ... 176 176 176]
  ...
  [151 150 146 ... 121 128 130]
  [151 150 145 ... 117 124 125]
  [150 149 145 ... 116 123 124]]]


error: OpenCV(4.0.0) c:\projects\opencv-python\opencv\modules\imgproc\src\color.hpp:259: error: (-2:Unspecified error) in function '__cdecl cv::CvtHelper<struct cv::Set<3,4,-1>,struct cv::Set<3,-1,-1>,struct cv::Set<0,5,-1>,2>::CvtHelper(const class cv::_InputArray &,const class cv::_OutputArray &,int)'
> Invalid number of channels in input image:
>     'VScn::contains(scn)'
> where
>     'scn' is 128


In [None]:
#Avalia o primeiro descritor: as imagens raw

(X_train, X_test, y_train, y_test) = train_test_split(rawImages, df2, test_size=0.10, random_state=42)

classifiers = [
    KNeighborsClassifier(17),    
    DecisionTreeClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    print("did")
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))    

In [None]:
#Avalia o primeiro descritor: as imagens raw

(X_train, X_test, y_train, y_test) = train_test_split(rawImages, df2, test_size=0.10, random_state=42)

classifiers = [
    KNeighborsClassifier(17),    
    DecisionTreeClassifier(),
    GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    print("did")
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))    

In [None]:
#Avalia o segundo descritor: color histogram

(X_train, X_test, y_train, y_test) = train_test_split(descHist, labels, test_size=0.25, random_state=42)
classifiers = [KNeighborsClassifier(17), DecisionTreeClassifier(), GaussianNB()]

for clf in classifiers:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    
    print("="*30)
    print(name)
    
    print('****Results****')
    train_predictions = clf.predict(X_test)
    acc = clf.score(X_test, y_test)
    print("accuracy: {:.2f}%".format(acc * 100))   