In [1]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import time

In [2]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)
	# return the flattened histogram as the feature vector
	return hist.flatten()

In [4]:
df_dict = {
    'train': pd.read_csv('/home/stevie/datasets/chess_vision/256x256/dataframes/train.csv'),
    'test': pd.read_csv('/home/stevie/datasets/chess_vision/256x256/dataframes/test.csv'),
}

In [5]:
df = pd.concat(df_dict.values())

In [6]:
df['set'].unique()

array(['set_2', 'set_3', 'set_4', 'set_1'], dtype=object)

In [7]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
images = {}
features = {}
labels = {}
for set_str in df['set'].unique():
    images[set_str] = []
    features[set_str] = []
    labels[set_str] = []

[INFO] describing images...


In [8]:
print(len(df))
n = 10
new_df = pd.DataFrame()
df['combo'] = df.apply(lambda row: f"{row['set']}_{row['label']}", axis=1)
for combo in df['combo'].unique():
    selection = df[df['combo'] == combo]
    new_df = pd.concat([new_df, selection.sample(n=n)])
df = new_df
print(len(df))

54600
1280


In [9]:
# loop over the input images
print(len(df))
for i, row in tqdm(df.iterrows()):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(row['path'])
	label = row['label']
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
	# update the raw images, features, and labels matricies,
	# respectively
	set_str = row['set']
	images[set_str].append(pixels)
	features[set_str].append(hist)
	labels[set_str].append(label)

1280


1280it [00:01, 738.64it/s]


In [10]:
for set_str in df['set'].unique():
    images[set_str] = np.array(images[set_str])
    features[set_str] = np.array(features[set_str])
    labels[set_str] = np.array(labels[set_str])

    print(set_str)
    print("[INFO] pixels matrix: {:.2f}MB".format(images[set_str].nbytes / (1024 * 1000.0)))
    print("[INFO] features matrix: {:.2f}MB".format(features[set_str].nbytes / (1024 * 1000.0)))

set_2
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_3
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_4
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_1
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB


In [12]:
data = {'input_type': [], 'n': [], 'fit_time': [], 'eval_time': [], 'accuracy': [], 'test_set': []}
for n in tqdm((1, 2, 3, 4, 5, 10, 15, 20, 30, 40)):
    print(f"n: {n}")
    for input_type, input_data in ('images', images), ('features', features):
        for set_str in df['set'].unique():
            train_X = np.concatenate([d for s, d in input_data.items() if s != set_str])
            train_y = np.concatenate([d for s, d in labels.items() if s != set_str])
            test_X = input_data[set_str]
            test_y = labels[set_str]

            model = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(criterion='gini', max_depth=20), n_estimators=n, random_state=0)

            t0 = time.time()
            model.fit(train_X, train_y)
            t1 = time.time()
            fit_time = (t1 - t0) / len(train_X)

            #Predict the response for test dataset
            t0 = time.time()
            acc = model.score(test_X, test_y)
            t1 = time.time()
            eval_time = (t1 - t0) / len(test_X)

            print(f"\tAccuracy: {acc}")
            print(f"\tFit Time: {fit_time}")
            print(f"\tEval Time: {eval_time}")

            data['input_type'].append(input_type)
            data['n'].append(n)
            data['fit_time'].append(fit_time)
            data['eval_time'].append(eval_time)
            data['accuracy'].append(acc)
            data['test_set'].append(set_str)

            result_df = pd.DataFrame.from_dict(data)
            result_df.to_csv('results/data_manual_cv.csv', index=False)

  0%|          | 0/10 [00:00<?, ?it/s]

n: 1
	Accuracy: 0.053125
	Fit Time: 0.0033222009738286336
	Eval Time: 4.550069570541382e-06
	Accuracy: 0.028125
	Fit Time: 0.003240975985924403
	Eval Time: 4.135072231292725e-06
	Accuracy: 0.034375
	Fit Time: 0.002997180571158727
	Eval Time: 4.202872514724732e-06
	Accuracy: 0.0625
	Fit Time: 0.003111796826124191
	Eval Time: 5.038827657699585e-06
	Accuracy: 0.071875
	Fit Time: 0.00017397676904996236
	Eval Time: 2.4303793907165526e-06
	Accuracy: 0.03125
	Fit Time: 0.0001990668475627899
	Eval Time: 2.1889805793762208e-06
	Accuracy: 0.03125
	Fit Time: 0.00015595207611719767
	Eval Time: 2.2903084754943846e-06


 10%|█         | 1/10 [00:12<01:55, 12.87s/it]

	Accuracy: 0.0375
	Fit Time: 0.00018030678232510883
	Eval Time: 2.150237560272217e-06
n: 2
	Accuracy: 0.04375
	Fit Time: 0.006505124519268672
	Eval Time: 6.6481530666351315e-06
	Accuracy: 0.03125
	Fit Time: 0.006378007680177688
	Eval Time: 6.385147571563721e-06
	Accuracy: 0.0375
	Fit Time: 0.0059282844265302025
	Eval Time: 6.103515625e-06
	Accuracy: 0.034375
	Fit Time: 0.006227557609478632
	Eval Time: 6.1124563217163084e-06
	Accuracy: 0.075
	Fit Time: 0.0003804070254166921
	Eval Time: 3.016740083694458e-06
	Accuracy: 0.034375
	Fit Time: 0.0004309174915154775
	Eval Time: 2.8729438781738283e-06
	Accuracy: 0.021875
	Fit Time: 0.0003482287128766378
	Eval Time: 2.9340386390686036e-06


 20%|██        | 2/10 [00:38<02:42, 20.34s/it]

	Accuracy: 0.03125
	Fit Time: 0.0004102398951848348
	Eval Time: 2.9623508453369142e-06
n: 3
	Accuracy: 0.04375
	Fit Time: 0.010453630238771439
	Eval Time: 9.173154830932618e-06
	Accuracy: 0.021875
	Fit Time: 0.010635919868946075
	Eval Time: 1.136884093284607e-05
	Accuracy: 0.04375
	Fit Time: 0.008841178317864736
	Eval Time: 1.2790411710739136e-05
	Accuracy: 0.04375
	Fit Time: 0.009748672445615133
	Eval Time: 1.0428577661514283e-05
	Accuracy: 0.071875
	Fit Time: 0.0006051721672217051
	Eval Time: 4.212558269500732e-06
	Accuracy: 0.040625
	Fit Time: 0.0006492813428243001
	Eval Time: 3.983825445175171e-06
	Accuracy: 0.021875
	Fit Time: 0.0005659726758797963
	Eval Time: 3.851950168609619e-06


 30%|███       | 3/10 [01:18<03:26, 29.55s/it]

	Accuracy: 0.025
	Fit Time: 0.000643072525660197
	Eval Time: 4.297494888305664e-06
n: 4
	Accuracy: 0.059375
	Fit Time: 0.012804331382115682
	Eval Time: 1.0988861322402955e-05
	Accuracy: 0.025
	Fit Time: 0.013403187443812688
	Eval Time: 2.2994726896286012e-05
	Accuracy: 0.059375
	Fit Time: 0.013636250793933869
	Eval Time: 1.4007836580276489e-05
	Accuracy: 0.046875
	Fit Time: 0.013038543860117595
	Eval Time: 1.0419636964797974e-05
	Accuracy: 0.053125
	Fit Time: 0.0008802428841590881
	Eval Time: 4.649162292480469e-06
	Accuracy: 0.0625
	Fit Time: 0.0009128173192342122
	Eval Time: 4.53069806098938e-06
	Accuracy: 0.028125
	Fit Time: 0.0008066256841023763
	Eval Time: 4.637986421585083e-06


 40%|████      | 4/10 [02:13<03:56, 39.35s/it]

	Accuracy: 0.03125
	Fit Time: 0.0009332592288653056
	Eval Time: 4.738569259643555e-06
n: 5
	Accuracy: 0.071875
	Fit Time: 0.016708536197741826
	Eval Time: 1.9624829292297363e-05
	Accuracy: 0.03125
	Fit Time: 0.015975062797466914
	Eval Time: 1.3368576765060424e-05
	Accuracy: 0.046875
	Fit Time: 0.014911227921644846
	Eval Time: 1.2075155973434449e-05
	Accuracy: 0.04375
	Fit Time: 0.01524228552977244
	Eval Time: 1.2074410915374755e-05
	Accuracy: 0.0375
	Fit Time: 0.001058781643708547
	Eval Time: 5.523860454559326e-06
	Accuracy: 0.046875
	Fit Time: 0.00113340492049853
	Eval Time: 5.60954213142395e-06
	Accuracy: 0.03125
	Fit Time: 0.0009920897583166758
	Eval Time: 5.245208740234375e-06


 50%|█████     | 5/10 [03:17<04:02, 48.43s/it]

	Accuracy: 0.034375
	Fit Time: 0.00112651859720548
	Eval Time: 5.5730342864990234e-06
n: 10
	Accuracy: 0.040625
	Fit Time: 0.0318296159307162
	Eval Time: 2.802908420562744e-05
	Accuracy: 0.0375
	Fit Time: 0.031116638084252676
	Eval Time: 2.709031105041504e-05
	Accuracy: 0.053125
	Fit Time: 0.03033429111043612
	Eval Time: 2.398267388343811e-05
	Accuracy: 0.071875
	Fit Time: 0.030174546192089716
	Eval Time: 2.2796541452407836e-05
	Accuracy: 0.03125
	Fit Time: 0.0022219441831111907
	Eval Time: 9.565800428390504e-06
	Accuracy: 0.046875
	Fit Time: 0.002253365268309911
	Eval Time: 9.275227785110474e-06
	Accuracy: 0.0375
	Fit Time: 0.0019738649328549704
	Eval Time: 9.742379188537597e-06


 60%|██████    | 6/10 [05:24<05:00, 75.12s/it]

	Accuracy: 0.046875
	Fit Time: 0.0022149413824081423
	Eval Time: 9.388476610183715e-06
n: 15
	Accuracy: 0.040625
	Fit Time: 0.04727797110875447
	Eval Time: 3.939792513847351e-05
	Accuracy: 0.028125
	Fit Time: 0.04608006204168002
	Eval Time: 3.343448042869568e-05
	Accuracy: 0.059375
	Fit Time: 0.04463533908128738
	Eval Time: 3.2798200845718386e-05
	Accuracy: 0.06875
	Fit Time: 0.043845184892416
	Eval Time: 3.2831728458404544e-05
	Accuracy: 0.046875
	Fit Time: 0.003166938324769338
	Eval Time: 1.3271719217300415e-05
	Accuracy: 0.046875
	Fit Time: 0.0033232202132542927
	Eval Time: 1.2990087270736694e-05
	Accuracy: 0.028125
	Fit Time: 0.002943528691927592
	Eval Time: 1.3007223606109619e-05


 70%|███████   | 7/10 [08:31<05:34, 111.64s/it]

	Accuracy: 0.053125
	Fit Time: 0.003236050655444463
	Eval Time: 1.3622641563415528e-05
n: 20
	Accuracy: 0.0375
	Fit Time: 0.06081546122829119
	Eval Time: 4.476457834243774e-05
	Accuracy: 0.03125
	Fit Time: 0.061627726753552756
	Eval Time: 4.631355404853821e-05
	Accuracy: 0.053125
	Fit Time: 0.05808847397565842
	Eval Time: 4.575103521347046e-05
	Accuracy: 0.078125
	Fit Time: 0.058320926626523335
	Eval Time: 4.266351461410522e-05
	Accuracy: 0.059375
	Fit Time: 0.0041880461076895395
	Eval Time: 1.7078220844268797e-05
	Accuracy: 0.034375
	Fit Time: 0.004378109673659007
	Eval Time: 1.779422163963318e-05
	Accuracy: 0.0375
	Fit Time: 0.003962472826242447
	Eval Time: 1.695379614830017e-05


 80%|████████  | 8/10 [12:37<05:08, 154.30s/it]

	Accuracy: 0.0375
	Fit Time: 0.0043859260777632395
	Eval Time: 1.6989558935165407e-05
n: 30
	Accuracy: 0.034375
	Fit Time: 0.08999915147821108
	Eval Time: 6.694942712783813e-05
	Accuracy: 0.034375
	Fit Time: 0.0893882691860199
	Eval Time: 6.68257474899292e-05
	Accuracy: 0.05625
	Fit Time: 0.08675497770309448
	Eval Time: 6.143972277641296e-05
	Accuracy: 0.065625
	Fit Time: 0.08692016750574112
	Eval Time: 6.460845470428467e-05
	Accuracy: 0.04375
	Fit Time: 0.006169505914052327
	Eval Time: 2.4884194135665894e-05
	Accuracy: 0.053125
	Fit Time: 0.0063654599090417225
	Eval Time: 2.46196985244751e-05
	Accuracy: 0.04375
	Fit Time: 0.005887951950232188
	Eval Time: 2.563297748565674e-05


 90%|█████████ | 9/10 [18:40<03:39, 219.57s/it]

	Accuracy: 0.03125
	Fit Time: 0.006582763542731603
	Eval Time: 2.4990737438201903e-05
n: 40
	Accuracy: 0.053125
	Fit Time: 0.11931518862644831
	Eval Time: 8.51854681968689e-05
	Accuracy: 0.05625
	Fit Time: 0.11828819687167803
	Eval Time: 8.533895015716553e-05
	Accuracy: 0.05625
	Fit Time: 0.11508521934350331
	Eval Time: 8.403882384300232e-05
	Accuracy: 0.0625
	Fit Time: 0.11544565583268801
	Eval Time: 8.515715599060059e-05
	Accuracy: 0.028125
	Fit Time: 0.008207898338635762
	Eval Time: 3.3190101385116576e-05
	Accuracy: 0.046875
	Fit Time: 0.008373567461967468
	Eval Time: 3.205239772796631e-05
	Accuracy: 0.040625
	Fit Time: 0.007746419062217077
	Eval Time: 3.147423267364502e-05


100%|██████████| 10/10 [26:41<00:00, 160.18s/it]

	Accuracy: 0.03125
	Fit Time: 0.008844535052776336
	Eval Time: 3.235936164855957e-05



