In [1]:
from sklearn.svm import SVC
from imutils import paths
import numpy as np
import argparse
import imutils
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
import time

In [2]:
def image_to_feature_vector(image, size=(32, 32)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

In [3]:
def extract_color_histogram(image, bins=(8, 8, 8)):
	# extract a 3D color histogram from the HSV color space using
	# the supplied number of `bins` per channel
	hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
	hist = cv2.calcHist([hsv], [0, 1, 2], None, bins,
		[0, 180, 0, 256, 0, 256])
	# handle normalizing the histogram if we are using OpenCV 2.4.X
	if imutils.is_cv2():
		hist = cv2.normalize(hist)
	# otherwise, perform "in place" normalization in OpenCV 3 (I
	# personally hate the way this is done
	else:
		cv2.normalize(hist, hist)
	# return the flattened histogram as the feature vector
	return hist.flatten()

In [4]:
df_dict = {
    'train': pd.read_csv('/home/stevie/datasets/chess_vision/256x256/dataframes/train.csv'),
    'test': pd.read_csv('/home/stevie/datasets/chess_vision/256x256/dataframes/test.csv'),
}

In [5]:
df = pd.concat(df_dict.values())

In [6]:
df['set'].unique()

array(['set_2', 'set_3', 'set_4', 'set_1'], dtype=object)

In [7]:
# grab the list of images that we'll be describing
print("[INFO] describing images...")
# initialize the raw pixel intensities matrix, the features matrix,
# and labels list
images = {}
features = {}
labels = {}
for set_str in df['set'].unique():
    images[set_str] = []
    features[set_str] = []
    labels[set_str] = []

[INFO] describing images...


In [8]:
print(len(df))
n = 10
new_df = pd.DataFrame()
df['combo'] = df.apply(lambda row: f"{row['set']}_{row['label']}", axis=1)
for combo in df['combo'].unique():
    selection = df[df['combo'] == combo]
    new_df = pd.concat([new_df, selection.sample(n=10)])
df = new_df
print(len(df))

54600
1280


In [9]:
# loop over the input images
print(len(df))
for i, row in tqdm(df.iterrows()):
	# load the image and extract the class label (assuming that our
	# path as the format: /path/to/dataset/{class}.{image_num}.jpg
	image = cv2.imread(row['path'])
	label = row['label']
	# extract raw pixel intensity "features", followed by a color
	# histogram to characterize the color distribution of the pixels
	# in the image
	pixels = image_to_feature_vector(image)
	hist = extract_color_histogram(image)
	# update the raw images, features, and labels matricies,
	# respectively
	set_str = row['set']
	images[set_str].append(pixels)
	features[set_str].append(hist)
	labels[set_str].append(label)

1280


1280it [00:01, 751.49it/s]


In [10]:
for set_str in df['set'].unique():
    images[set_str] = np.array(images[set_str])
    features[set_str] = np.array(features[set_str])
    labels[set_str] = np.array(labels[set_str])

    print(set_str)
    print("[INFO] pixels matrix: {:.2f}MB".format(images[set_str].nbytes / (1024 * 1000.0)))
    print("[INFO] features matrix: {:.2f}MB".format(features[set_str].nbytes / (1024 * 1000.0)))

set_2
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_3
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_4
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB
set_1
[INFO] pixels matrix: 0.96MB
[INFO] features matrix: 0.64MB


In [11]:
data = {'input_type': [], 'kernel': [], 'fit_time': [], 'eval_time': [], 'accuracy': [], 'test_set': []}
for kernel in tqdm(('linear', 'poly_2', 'poly_3')):
    print(f"kernel: {kernel}")
    for input_type, input_data in ('images', images), ('features', features):
        for set_str in df['set'].unique():
            train_X = np.concatenate([d for s, d in input_data.items() if s != set_str])
            train_y = np.concatenate([d for s, d in labels.items() if s != set_str])
            test_X = input_data[set_str]
            test_y = labels[set_str]

            if 'poly' in kernel:
                model = SVC(kernel='poly', C=1, random_state=0, degree=int(kernel[-1]))
            else:
                model = SVC(kernel=kernel, C=1, random_state=0)

            t0 = time.time()
            model.fit(train_X, train_y)
            t1 = time.time()
            fit_time = (t1 - t0) / len(train_X)

            #Predict the response for test dataset
            t0 = time.time()
            acc = model.score(test_X, test_y)
            t1 = time.time()
            eval_time = (t1 - t0) / len(test_X)

            print(f"\tAccuracy: {acc}")
            print(f"\tFit Time: {fit_time}")
            print(f"\tEval Time: {eval_time}")

            data['input_type'].append(input_type)
            data['kernel'].append(kernel)
            data['fit_time'].append(fit_time)
            data['eval_time'].append(eval_time)
            data['accuracy'].append(acc)
            data['test_set'].append(set_str)

            result_df = pd.DataFrame.from_dict(data)
            result_df.to_csv('results/data_manual_cv.csv', index=False)

  0%|          | 0/3 [00:00<?, ?it/s]

kernel: linear
	Accuracy: 0.065625
	Fit Time: 0.0009967073798179626
	Eval Time: 0.0017169304192066193
	Accuracy: 0.053125
	Fit Time: 0.0009050428867340088
	Eval Time: 0.0016151495277881623
	Accuracy: 0.059375
	Fit Time: 0.000894999752442042
	Eval Time: 0.001610824465751648
	Accuracy: 0.046875
	Fit Time: 0.0008896102507909138
	Eval Time: 0.0016224384307861328
	Accuracy: 0.04375
	Fit Time: 0.00015477960308392844
	Eval Time: 0.00010523870587348938
	Accuracy: 0.025
	Fit Time: 0.00015436137715975443
	Eval Time: 0.00010835453867912293
	Accuracy: 0.053125
	Fit Time: 0.00015372062722841898
	Eval Time: 0.00011002346873283387


 33%|███▎      | 1/3 [00:06<00:12,  6.39s/it]

	Accuracy: 0.034375
	Fit Time: 0.00015488316615422566
	Eval Time: 0.00010831505060195922
kernel: poly_2
	Accuracy: 0.071875
	Fit Time: 0.0009201059738794963
	Eval Time: 0.0015964820981025697
	Accuracy: 0.04375
	Fit Time: 0.0009015460809071858
	Eval Time: 0.0016444876790046691
	Accuracy: 0.053125
	Fit Time: 0.0009273956219355265
	Eval Time: 0.001591505855321884
	Accuracy: 0.046875
	Fit Time: 0.000928284227848053
	Eval Time: 0.0017476923763751983
	Accuracy: 0.034375
	Fit Time: 0.00015959888696670532
	Eval Time: 0.00010571926832199096
	Accuracy: 0.025
	Fit Time: 0.00015921567877133687
	Eval Time: 0.00010874569416046143
	Accuracy: 0.04375
	Fit Time: 0.00016060918569564818
	Eval Time: 0.00012751668691635132


 67%|██████▋   | 2/3 [00:12<00:06,  6.40s/it]

	Accuracy: 0.040625
	Fit Time: 0.00015893702705701193
	Eval Time: 0.00010628774762153626
kernel: poly_3
	Accuracy: 0.071875
	Fit Time: 0.0010006030400594075
	Eval Time: 0.0016622960567474366
	Accuracy: 0.040625
	Fit Time: 0.000986680140097936
	Eval Time: 0.0016948238015174865
	Accuracy: 0.05
	Fit Time: 0.0009096764028072357
	Eval Time: 0.0016739308834075929
	Accuracy: 0.05625
	Fit Time: 0.000903525451819102
	Eval Time: 0.001663103699684143
	Accuracy: 0.028125
	Fit Time: 0.00015906840562820434
	Eval Time: 0.00010581612586975097
	Accuracy: 0.028125
	Fit Time: 0.00015994161367416382
	Eval Time: 0.00011280179023742676
	Accuracy: 0.04375
	Fit Time: 0.00015705029169718425
	Eval Time: 0.00011049583554267883


100%|██████████| 3/3 [00:19<00:00,  6.45s/it]

	Accuracy: 0.034375
	Fit Time: 0.0001567848026752472
	Eval Time: 0.00011159256100654603



