In [None]:
# import the necessary packages
from sklearn.neighbors import KNeighborsClassifier
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import time

In [None]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

In [None]:
def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)
	# return the flattened histogram as the feature vector
	return hist.flatten()

In [None]:
df = pd.read_csv('../dataset/dataframes/10k.csv')

In [None]:
for set_i in df['set'].unique():
    print(f"{set_i}: {np.count_nonzero(df['set'] == set_i)}")

In [None]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
images = {}
features = {}
labels = {}
for set_str in df['set'].unique():
    images[set_str] = []
    features[set_str] = []
    labels[set_str] = []

In [None]:
# loop over the input images
print(len(df))
for i, row in tqdm(df.iterrows()):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(row['path'])
	label = row['label']
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
	# update the raw images, features, and labels matricies,
	# respectively
	set_str = row['set']
	images[set_str].append(pixels)
	features[set_str].append(hist)
	labels[set_str].append(label)

In [None]:
for set_str in df['set'].unique():
    images[set_str] = np.array(images[set_str])
    features[set_str] = np.array(features[set_str])
    labels[set_str] = np.array(labels[set_str])

    print(set_str)
    print("[INFO] pixels matrix: {:.2f}MB".format(images[set_str].nbytes / (1024 * 1000.0)))
    print("[INFO] features matrix: {:.2f}MB".format(features[set_str].nbytes / (1024 * 1000.0)))

In [9]:
data = {'input_type': [], 'k': [], 'fit_time': [], 'eval_time': [], 'accuracy': [], 'test_set': []}
for k in tqdm((1, 2, 3, 4, 5, 10, 15, 20, 30, 40, 50)):
    print(f"k: {k}")
    for input_type, input_data in ('images', images), ('features', features):
        for set_str in df['set'].unique():
            train_X = np.concatenate([d for s, d in input_data.items() if s != set_str])
            train_y = np.concatenate([d for s, d in labels.items() if s != set_str])
            test_X = input_data[set_str]
            test_y = labels[set_str]

            model = KNeighborsClassifier(n_neighbors=k, n_jobs=2)

            # Train Decision Tree Classifer
            t0 = time.time()
            model.fit(train_X, train_y)
            t1 = time.time()
            fit_time = (t1 - t0) / len(train_X)

            #Predict the response for test dataset
            t0 = time.time()
            acc = model.score(test_X, test_y)
            t1 = time.time()
            eval_time = (t1 - t0) / len(test_X)

            print(f"\tAccuracy: {acc}")
            print(f"\tFit Time: {fit_time}")
            print(f"\tEval Time: {eval_time}")

            data['input_type'].append(input_type)
            data['k'].append(k)
            data['fit_time'].append(fit_time)
            data['eval_time'].append(eval_time)
            data['accuracy'].append(acc)
            data['test_set'].append(set_str)

            result_df = pd.DataFrame.from_dict(data)
            result_df.to_csv('results/data_manual_cv.csv', index=False)

	Accuracy: 0.641
	Fit Time: 1.5726685523986817e-07
	Eval Time: 0.0009356241226196289
	Accuracy: 0.6585
	Fit Time: 4.3907761573791506e-07
	Eval Time: 0.00037012135982513426
	Accuracy: 0.6455
	Fit Time: 5.331039428710938e-07
	Eval Time: 0.00039397799968719484
	Accuracy: 0.635
	Fit Time: 4.3198466300964353e-07
	Eval Time: 0.00031164133548736573
	Accuracy: 0.6265
	Fit Time: 4.4330954551696777e-07
	Eval Time: 0.00030446982383728027


  9%|▉         | 1/11 [00:13<02:11, 13.15s/it]

	Accuracy: 0.633
	Fit Time: 4.303157329559326e-07
	Eval Time: 0.00029199624061584473
k: 2
	Accuracy: 0.62
	Fit Time: 1.6430020332336425e-07
	Eval Time: 0.0009867879152297974
	Accuracy: 0.6065
	Fit Time: 1.6814470291137696e-07
	Eval Time: 0.0009527803659439087
	Accuracy: 0.619
	Fit Time: 1.5801191329956054e-07
	Eval Time: 0.0010300974845886231
	Accuracy: 0.6125
	Fit Time: 1.652240753173828e-07
	Eval Time: 0.001026341676712036
	Accuracy: 0.6175
	Fit Time: 1.6301870346069335e-07
	Eval Time: 0.0009799071550369264
	Accuracy: 0.666
	Fit Time: 4.343390464782715e-07
	Eval Time: 0.0003208822011947632
	Accuracy: 0.6555
	Fit Time: 4.5123696327209473e-07
	Eval Time: 0.0003187791109085083
	Accuracy: 0.639
	Fit Time: 7.240772247314453e-07
	Eval Time: 0.00030015337467193605
	Accuracy: 0.6345
	Fit Time: 5.251169204711914e-07
	Eval Time: 0.0003059259653091431


 18%|█▊        | 2/11 [00:26<01:58, 13.16s/it]

	Accuracy: 0.64
	Fit Time: 4.456937313079834e-07
	Eval Time: 0.0003148980140686035
k: 3
	Accuracy: 0.647
	Fit Time: 1.583099365234375e-07
	Eval Time: 0.0009963974952697755
	Accuracy: 0.638
	Fit Time: 1.5735626220703126e-07
	Eval Time: 0.0009735426902770996
	Accuracy: 0.638
	Fit Time: 1.7049908638000489e-07
	Eval Time: 0.0010106818675994873
	Accuracy: 0.651
	Fit Time: 1.608729362487793e-07
	Eval Time: 0.0010384039878845214
	Accuracy: 0.6435
	Fit Time: 1.5398859977722167e-07
	Eval Time: 0.000982329249382019
	Accuracy: 0.6665
	Fit Time: 4.70280647277832e-07
	Eval Time: 0.0003426642417907715
	Accuracy: 0.6525
	Fit Time: 6.017088890075684e-07
	Eval Time: 0.00031383955478668214
	Accuracy: 0.651
	Fit Time: 4.730522632598877e-07
	Eval Time: 0.0003237628936767578
	Accuracy: 0.6405
	Fit Time: 4.3633580207824707e-07
	Eval Time: 0.0003452653884887695


 27%|██▋       | 3/11 [00:39<01:46, 13.28s/it]

	Accuracy: 0.63
	Fit Time: 4.348456859588623e-07
	Eval Time: 0.00033441245555877687
k: 4
	Accuracy: 0.6305
	Fit Time: 1.6823410987854003e-07
	Eval Time: 0.0010574716329574585
	Accuracy: 0.6265
	Fit Time: 1.6519427299499512e-07
	Eval Time: 0.0009385727643966675
	Accuracy: 0.634
	Fit Time: 1.652836799621582e-07
	Eval Time: 0.001008738398551941
	Accuracy: 0.624
	Fit Time: 1.697838306427002e-07
	Eval Time: 0.001086194634437561
	Accuracy: 0.6385
	Fit Time: 1.7279386520385742e-07
	Eval Time: 0.001232542872428894
	Accuracy: 0.672
	Fit Time: 4.881024360656738e-07
	Eval Time: 0.00041732704639434816
	Accuracy: 0.662
	Fit Time: 5.107820034027099e-07
	Eval Time: 0.00039538955688476563
	Accuracy: 0.6525
	Fit Time: 4.653036594390869e-07
	Eval Time: 0.00035090017318725584
	Accuracy: 0.644
	Fit Time: 4.284083843231201e-07
	Eval Time: 0.00033902597427368166


 36%|███▋      | 4/11 [00:54<01:36, 13.73s/it]

	Accuracy: 0.634
	Fit Time: 4.423856735229492e-07
	Eval Time: 0.000340546727180481
k: 5
	Accuracy: 0.649
	Fit Time: 1.576244831085205e-07
	Eval Time: 0.0010387455224990846
	Accuracy: 0.6435
	Fit Time: 1.6194581985473633e-07
	Eval Time: 0.001063275694847107
	Accuracy: 0.644
	Fit Time: 1.576840877532959e-07
	Eval Time: 0.0010318591594696044
	Accuracy: 0.6475
	Fit Time: 1.7699599266052246e-07
	Eval Time: 0.0010836174488067626
	Accuracy: 0.652
	Fit Time: 1.6051530838012696e-07
	Eval Time: 0.0011091678142547607
	Accuracy: 0.682
	Fit Time: 1.1378228664398194e-06
	Eval Time: 0.00039435279369354247
	Accuracy: 0.663
	Fit Time: 7.753372192382812e-07
	Eval Time: 0.0003930515050888062
	Accuracy: 0.6485
	Fit Time: 7.344186305999756e-07
	Eval Time: 0.0003701739311218262
	Accuracy: 0.647
	Fit Time: 4.3520331382751464e-07
	Eval Time: 0.00039420449733734134


 45%|████▌     | 5/11 [01:08<01:24, 14.08s/it]

	Accuracy: 0.632
	Fit Time: 5.804598331451416e-07
	Eval Time: 0.00040538454055786134
k: 10
	Accuracy: 0.639
	Fit Time: 2.522170543670654e-07
	Eval Time: 0.0011190981864929199
	Accuracy: 0.6335
	Fit Time: 1.6865134239196777e-07
	Eval Time: 0.0010718958377838135
	Accuracy: 0.6495
	Fit Time: 1.6137957572937012e-07
	Eval Time: 0.0011427910327911376
	Accuracy: 0.64
	Fit Time: 1.53958797454834e-07
	Eval Time: 0.001031708002090454
	Accuracy: 0.6525
	Fit Time: 1.7353892326354982e-07
	Eval Time: 0.0010530166625976561
	Accuracy: 0.6805
	Fit Time: 4.792511463165283e-07
	Eval Time: 0.00043998336791992187
	Accuracy: 0.664
	Fit Time: 5.361735820770264e-07
	Eval Time: 0.00034629178047180175
	Accuracy: 0.665
	Fit Time: 5.384385585784912e-07
	Eval Time: 0.00034641945362091063
	Accuracy: 0.6425
	Fit Time: 5.59687614440918e-07
	Eval Time: 0.00036211156845092774


 55%|█████▍    | 6/11 [01:23<01:11, 14.29s/it]

	Accuracy: 0.638
	Fit Time: 6.165504455566406e-07
	Eval Time: 0.00038988208770751956
k: 15
	Accuracy: 0.6395
	Fit Time: 2.0834803581237792e-07
	Eval Time: 0.0010618399381637574
	Accuracy: 0.6315
	Fit Time: 1.9860267639160155e-07
	Eval Time: 0.001101470708847046
	Accuracy: 0.6595
	Fit Time: 1.9112229347229005e-07
	Eval Time: 0.0011186177730560303
	Accuracy: 0.6575
	Fit Time: 1.6549229621887207e-07
	Eval Time: 0.0010907630920410157
	Accuracy: 0.651
	Fit Time: 1.838207244873047e-07
	Eval Time: 0.0010739783048629762
	Accuracy: 0.666
	Fit Time: 6.106793880462646e-07
	Eval Time: 0.0003920656442642212
	Accuracy: 0.658
	Fit Time: 4.347264766693115e-07
	Eval Time: 0.00035981738567352297
	Accuracy: 0.6725
	Fit Time: 5.352497100830078e-07
	Eval Time: 0.00037131309509277346
	Accuracy: 0.647
	Fit Time: 4.3708086013793946e-07
	Eval Time: 0.0004032251834869385


 64%|██████▎   | 7/11 [01:38<00:57, 14.46s/it]

	Accuracy: 0.6325
	Fit Time: 5.623996257781982e-07
	Eval Time: 0.00037646079063415527
k: 20
	Accuracy: 0.6245
	Fit Time: 1.8540024757385254e-07
	Eval Time: 0.0010338393449783325
	Accuracy: 0.626
	Fit Time: 1.6838312149047853e-07
	Eval Time: 0.0012957878112792968
	Accuracy: 0.667
	Fit Time: 2.863407135009766e-07
	Eval Time: 0.0010951204299926758
	Accuracy: 0.651
	Fit Time: 1.7088651657104492e-07
	Eval Time: 0.001246752381324768
	Accuracy: 0.6475
	Fit Time: 2.8502941131591797e-07
	Eval Time: 0.0013419078588485717
	Accuracy: 0.675
	Fit Time: 4.679858684539795e-07
	Eval Time: 0.000389945387840271
	Accuracy: 0.653
	Fit Time: 4.3907761573791506e-07
	Eval Time: 0.0004484677314758301
	Accuracy: 0.6675
	Fit Time: 4.94241714477539e-07
	Eval Time: 0.00040049803256988527
	Accuracy: 0.6455
	Fit Time: 4.3055415153503416e-07
	Eval Time: 0.00038032400608062746


 73%|███████▎  | 8/11 [01:54<00:44, 15.00s/it]

	Accuracy: 0.6325
	Fit Time: 4.999637603759766e-07
	Eval Time: 0.0003846970796585083
k: 30
	Accuracy: 0.622
	Fit Time: 2.906322479248047e-07
	Eval Time: 0.0011015058755874634
	Accuracy: 0.625
	Fit Time: 1.729726791381836e-07
	Eval Time: 0.0010996322631835938
	Accuracy: 0.6715
	Fit Time: 1.6945600509643555e-07
	Eval Time: 0.0010521622896194458
	Accuracy: 0.653
	Fit Time: 2.594888210296631e-07
	Eval Time: 0.0011008943319320678
	Accuracy: 0.646
	Fit Time: 1.6194581985473633e-07
	Eval Time: 0.0010949071645736694
	Accuracy: 0.6725
	Fit Time: 5.233883857727051e-07
	Eval Time: 0.00042945361137390137
	Accuracy: 0.6585
	Fit Time: 4.4125318527221677e-07
	Eval Time: 0.0003857724666595459
	Accuracy: 0.6515
	Fit Time: 5.343854427337647e-07
	Eval Time: 0.00046073079109191893
	Accuracy: 0.649
	Fit Time: 6.16908073425293e-07
	Eval Time: 0.000388993501663208


 82%|████████▏ | 9/11 [02:09<00:30, 15.04s/it]

	Accuracy: 0.626
	Fit Time: 5.356669425964356e-07
	Eval Time: 0.0003912428617477417
k: 40
	Accuracy: 0.6295
	Fit Time: 1.7461180686950684e-07
	Eval Time: 0.0010449644327163697
	Accuracy: 0.62
	Fit Time: 1.8534064292907716e-07
	Eval Time: 0.0010368508100509644
	Accuracy: 0.676
	Fit Time: 1.868009567260742e-07
	Eval Time: 0.0010305608510971069
	Accuracy: 0.648
	Fit Time: 2.2614002227783203e-07
	Eval Time: 0.0011403380632400513
	Accuracy: 0.6345
	Fit Time: 1.8021464347839355e-07
	Eval Time: 0.0011761146783828736
	Accuracy: 0.666
	Fit Time: 5.713403224945068e-07
	Eval Time: 0.0003891690969467163
	Accuracy: 0.663
	Fit Time: 6.83605670928955e-07
	Eval Time: 0.0004732702970504761
	Accuracy: 0.664
	Fit Time: 5.610883235931397e-07
	Eval Time: 0.0004732705354690552
	Accuracy: 0.643
	Fit Time: 5.382299423217773e-07
	Eval Time: 0.0003617619276046753


 91%|█████████ | 10/11 [02:24<00:15, 15.06s/it]

	Accuracy: 0.62
	Fit Time: 5.28186559677124e-07
	Eval Time: 0.000369623064994812
k: 50
	Accuracy: 0.624
	Fit Time: 1.8015503883361816e-07
	Eval Time: 0.0010609769821166993
	Accuracy: 0.618
	Fit Time: 1.7446279525756836e-07
	Eval Time: 0.0011195944547653198
	Accuracy: 0.673
	Fit Time: 1.7145276069641114e-07
	Eval Time: 0.0010836321115493775
	Accuracy: 0.641
	Fit Time: 1.557767391204834e-07
	Eval Time: 0.0011775192022323608
	Accuracy: 0.623
	Fit Time: 1.939535140991211e-07
	Eval Time: 0.001097322940826416
	Accuracy: 0.6715
	Fit Time: 5.181729793548584e-07
	Eval Time: 0.000486885666847229
	Accuracy: 0.656
	Fit Time: 5.594491958618164e-07
	Eval Time: 0.0004905900955200196
	Accuracy: 0.6515
	Fit Time: 5.314350128173828e-07
	Eval Time: 0.000404451847076416
	Accuracy: 0.644
	Fit Time: 5.265474319458008e-07
	Eval Time: 0.00035755693912506106


100%|██████████| 11/11 [02:40<00:00, 14.56s/it]

	Accuracy: 0.6295
	Fit Time: 5.300641059875489e-07
	Eval Time: 0.0003580529689788818



