In [None]:
# Load libraries
import pandas as pd
import keras.utils as image # pip install keras tensorflow
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
from matplotlib.image import imread

import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from pathlib import Path

# For visualizing decision tree
# pip install graphviz pydotplus

import pydotplus 
from IPython.display import Image
from sklearn.tree import export_graphviz
from six import StringIO  



# keras causes a million warnings so use this to supress
import warnings
warnings.filterwarnings('ignore')


print("Imports successful")

In [None]:
# 250x250 image with 3 color channels (RGB) = 187500 features


# Load images from dataset
image_dir = Path("imageset2")
folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
categories = [fo.name for fo in folders]

count = 0
train_img = []
for i, direc in enumerate(folders):
    for file in direc.iterdir():
        count += 1
        img = imread(file)
        img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # Convert to grayscale
        img_pred = cv.resize(img, (250, 250), interpolation=cv.INTER_AREA)
        img_pred = image.img_to_array(img_pred)
        img_pred = img_pred / 255
        train_img.append(img_pred)

X = np.array(train_img)
y = np.concatenate((np.zeros(45), np.ones(45)), axis=0) # 90 total images <--> (Number of 3S in dataset, Number of KC in dataset)

# Generate training and test data using sklearn built-in method

# random_state simply sets a seed to the random generator, so that your train-test splits are always deterministic. If you don't set a seed, it is different each time.
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=69, test_size=0.2)
X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, random_state=69, test_size=0.5)

In [None]:
# Create Decision Tree classifer object
clf = DecisionTreeClassifier()

#print(X_train.shape) 

# In RGB, X_train is shape: (72, 250, 250, 3) but the tree needs a dim <= 2
# 72 * 250 * 250 * 3 = 13_500_000 elements in X_train
# In Grayscale, X_train is shape: (72, 250, 250, 1) but the tree needs a dim <= 2
# 72 * 250 * 250 = 4_500_000 elements in X_train


# Solution to resize issue: images must be converted to grayscale, then compressed as follows:
nsamples, nx, ny, nz = X_train.shape
X_train_reshaped = X_train.reshape((nsamples,nx*ny))

print(f"{X_train.shape} compressed into: {X_train_reshaped.shape}")

# Train Decision Tree Classifer
clf = clf.fit(X_train_reshaped,y_train)

# Apply same reshape and compression as training data above
nsamples, nx, ny, nz = X_test.shape
X_test_reshaped = X_test.reshape((nsamples,nx*ny))

# Predict the response for test dataset
y_pred = clf.predict(X_test_reshaped)

# Model Accuracy, how often is the classifier correct?
print(f"Accuracy: {round(metrics.accuracy_score(y_test, y_pred)*100, 2)}%")

In [None]:
# The export_graphviz function converts the decision tree classifier into a dot file, and pydotplus converts this dot file to png or displayable form on Jupyter.
dot_data = StringIO()
export_graphviz(clf, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True,class_names=['3 of Spades','King of Clubs'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
graph.write_png('decisionTrees.png')
Image(graph.create_png())

In [None]:
# Using custom test data
tmp = []

img = cv.imread('Testset/IMG_6056.jpg') # King of Clubs test image
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # Convert to grayscale
img_pred = cv.resize(img, (250, 250), interpolation=cv.INTER_AREA)
img_pred = image.img_to_array(img_pred)
img_pred = img_pred/255
img_pred = np.reshape(img_pred, (1, img_pred.shape[0]*img_pred.shape[1]))
tmp.append(img_pred)


img = cv.imread('Testset/IMG_6051.jpg') # 3 of Spades test image
img = cv.cvtColor(img, cv.COLOR_BGR2GRAY) # Convert to grayscale
img_pred = cv.resize(img, (250, 250), interpolation=cv.INTER_AREA)
img_pred = image.img_to_array(img_pred)
img_pred = img_pred/255
img_pred = np.reshape(img_pred, (1, img_pred.shape[0]*img_pred.shape[1]))
tmp.append(img_pred)


X_test_custom = np.array(tmp)
y_test_custom = [0, 1]
# Test your implementation:

nsamples, nx, ny = X_test_custom.shape
X_test_custom_reshaped = X_test_custom.reshape((nsamples,nx*ny))

labels = ["3 of Spades", "King of Clubs"]

y_pred_test = clf.predict(X_test_custom_reshaped)

print(f"Accuracy: {round(metrics.accuracy_score(y_test_custom, y_pred_test)*100, 2)}%")
