In [1]:
#### Dependencies

from anytree import Node, NodeMixin, RenderTree
from PIL import Image
from skimage.feature import greycomatrix, greycoprops

import glob
import math
import matplotlib.pyplot as plt
import os
import random


In [3]:
#### Global variables

model = []
class_names = []
map_8bit_to_3bit = [i // 32 for i in range(256)]


In [4]:
#### Classes

class MyBaseClass(object):
    foo = 4
    
class TreeNode(MyBaseClass, NodeMixin):
    def __init__(self, subset, purity, glcm_index=0, glcm_median=0, dominant_class='', parent=None, children=None):
        super(TreeNode, self).__init__()
        self.subset = subset
        self.purity = purity
        self.glcm_index = glcm_index
        self.glcm_median = glcm_median
        self.dominant_class = dominant_class
        if(not children == None):
            self.children = children
            

In [17]:
#### Functions

def load_img(img_path):
    return Image.open(img_path).convert('L')

def get_img_size(img):
    return img.size

def print_img(img):
    plt.imshow(img, cmap='gray')
    
def get_resized_img(img, dimension):
    return img.resize(dimension)

def get_img_colors(img):
    return list(img.getdata())

def get_3bit_img_colors(img):
    img_colors = get_img_colors(img)
    
    loop_count = 0
    for img_color in img_colors:
        img_colors[loop_count] = map_8bit_to_3bit[img_color]
        
        loop_count += 1
        
    return img_colors

def get_img_matrix(img_colors):
    img_matrix = []
    
    loop_count = 0
    img_square_dimension = int(math.sqrt(len(img_colors)))
    
    for row in range(img_square_dimension):
        temp_row = []
        for col in range(img_square_dimension):
            temp_row.append(img_colors[loop_count])
            
            loop_count += 1
        img_matrix.append(temp_row)
        
    return img_matrix

def get_img_features(img):
    img_3bit_colors = get_3bit_img_colors(img)
    img_matrix = get_img_matrix(img_3bit_colors)
    
    glcm_matrix = greycomatrix(img_matrix, distances=[1], angles=[0], levels=12, symmetric=False, normed=False)
    glcm_components = ['contrast', 'correlation', 'energy', 'homogeneity', 'ASM', 'dissimilarity']
    
    img_features = []
    for glcm_component in glcm_components:
        img_features.append(greycoprops(glcm_matrix, glcm_component)[0][0])
        
    return tuple(img_features)

# Modelling

def init_model():
    global model
    global class_names
    
    model = []
    class_names = []
    
def load_class_names(training_path):
    global class_names
    
    class_names = [class_name for class_name in os.listdir(training_path)]
            
def insert_img_features_into_model(class_name, img_features):
    global model
    
    row_tuple = (class_name,) + img_features
    model.append(row_tuple)
        
def load_preprocessed_img(img_path, dimension=(128, 128)):
    img = load_img(img_path)
    img = get_resized_img(img, dimension)
    
    return img

def train(training_path, img_type='*.jpg'):
    print('Training...')
    for class_name in class_names:
        img_paths = glob.glob(training_path + class_name + '/' + img_type)
        
        for img_path in img_paths:            
            img = load_preprocessed_img(img_path)
            img_features = get_img_features(img)

            insert_img_features_into_model(class_name, img_features)
            
    print('--> Done')
    
# Decision Tree Stuffs

def get_splitted_model(sample_rate=0.8):
    sample_model = []
    test_sample_model = []
    
    for row in model:
        random_splitter = random.uniform(0, 1)
        
        if(random_splitter <= sample_rate):
            sample_model.append(row)
        else:
            test_sample_model.append(row)
            
    return sample_model, test_sample_model


def get_subset_purity(subset, minimum_purity=0.8):
    if(len(subset) > 0):       
        total_class = len(class_names)
        total_class_score = len(subset)
        subset_class_scores = [0 for i in range(total_class)]
        
        for row in subset:
            row_class_name = row[0]
            subset_class_scores[class_names.index(row_class_name)] += 1
            
        subset_purity = 0
        for class_index in range(total_class):
            subset_purity += (subset_class_scores[class_index] / total_class_score) ** 2
            
        dominant_class = ''
        if(subset_purity >= minimum_purity):
            maximum_class_scores = max(subset_class_scores)
            dominant_class_index = subset_class_scores.index(maximum_class_scores)
            dominant_class = class_names[dominant_class_index]
            
        return (subset_purity, dominant_class)
    else:
        return (0, '')
    
def get_tree_seed(subset, minimum_purity=0.8):
    tree = TreeNode(subset, get_subset_purity(subset, minimum_purity)[0])
    
    return tree

def get_decision_tree(decision_tree, minimum_purity=0.8, n_branch=2, glcm_length=6):
    if(decision_tree.purity >= minimum_purity or len(decision_tree.subset) == 0):
        return decision_tree
    else:
        subset = decision_tree.subset
        
        branch_future_subset_purities = [[glcm_index, 0] for glcm_index in range(glcm_length)]
        branch_glcm_medians           = [[0 for glcm_index in range(glcm_length)] for i in range(n_branch)]
        branch_subsets                = [[[] for glcm_index in range(glcm_length)] for i in range(n_branch)]
        branch_purities               = [[0 for glcm_index in range(glcm_length)] for i in range(n_branch)]
        branch_dominants              = [['' for glcm_index in range(glcm_length)] for i in range(n_branch)]
        
        for glcm_index in range(glcm_length):
            sorted_subset = sorted(subset, key=lambda x: x[glcm_index + 1])
            sorted_subset_length = len(sorted_subset)
            
            if(sorted_subset_length < n_branch):
                n_branch = sorted_subset_length
                
            row_per_branch = int(sorted_subset_length / n_branch)
            branch_constraint = [row_per_branch * i for i in range(n_branch + 1)]
            branch_constraint[-1] = sorted_subset_length
            
            for branch_index in range(n_branch):
                branch_subsets[branch_index][glcm_index] = sorted_subset[branch_constraint[branch_index]:branch_constraint[branch_index + 1]]
                
                branch_glcm_medians[branch_index][glcm_index] = branch_subsets[branch_index][glcm_index][-1][glcm_index + 1]
                branch_purity_and_dominant = get_subset_purity(branch_subsets[branch_index][glcm_index], minimum_purity)
                branch_purities[branch_index][glcm_index] = branch_purity_and_dominant[0]
                branch_dominants[branch_index][glcm_index] = branch_purity_and_dominant[1]
                
                if(branch_purities[branch_index][glcm_index] >= minimum_purity):
                    branch_future_subset_purities[glcm_index][1] += 1
                    
        sorted_branch_future_subset_purities = sorted(branch_future_subset_purities, key= lambda x: -x[1])
        glcm_index = sorted_branch_future_subset_purities[0][0]
            
        for branch_index in range(n_branch):
            branch_subset = branch_subsets[branch_index][glcm_index]
            branch_purity = branch_purities[branch_index][glcm_index]
            branch_glcm_median = branch_glcm_medians[branch_index][glcm_index]
            branch_dominant = branch_dominants[branch_index][glcm_index]
                
            decision_tree_branch = TreeNode(branch_subset, branch_purity, glcm_index, branch_glcm_median, branch_dominant)
            decision_tree_branch = get_decision_tree(decision_tree_branch, minimum_purity, n_branch)
                
            if(not decision_tree_branch == None):
                decision_tree_branch.parent = decision_tree
            
        return decision_tree
    
def print_decision_tree(decision_tree):
    print(RenderTree(decision_tree))
    
def get_img_features_class(img_features, decision_tree, minimum_purity=0.8):
    temp_decision_tree = decision_tree
    
    while(temp_decision_tree.purity < minimum_purity and not temp_decision_tree.children == ()):
        childrens = temp_decision_tree.children
        total_childs = len(childrens)
        
        is_glcm_median_not_available = True
        child_index = 0
        for children in childrens:
            glcm_index = children.glcm_index
            glcm_median = children.glcm_median
            
            if(img_features[glcm_index] <= glcm_median):
                is_glcm_median_not_available = False
                break
                
            child_index += 1
                
        if(child_index >= total_childs):
            temp_decision_tree = childrens[-1]
        else:
            temp_decision_tree = childrens[child_index]
            
    img_class_name = temp_decision_tree.dominant_class
    
    return img_class_name
    
def get_img_class(img, decision_tree, minimum_purity=0.8):
    img_features = get_img_features(img)
            
    img_class_name = get_img_features_class(img_features, decision_tree, minimum_purity)
    
    return img_class_name

def test(decision_tree, test_sample_model, minimum_purity=0.8):
    print('Testing...')
    total_correct_answer = 0
    total_guess = 0

    for row in test_sample_model:
        expected_img_class_name = row[0]
        test_img_features = row[1:]
        test_img_class_name = get_img_features_class(test_img_features, decision_tree, minimum_purity)
            
        if(expected_img_class_name == test_img_class_name):
            total_correct_answer += 1
                
        total_guess += 1
            
    accuracy = (total_correct_answer / total_guess) * 100
    
    print('--> Done, accuracy = ' + str(accuracy) + ' %')
    
    return accuracy
        

def validate(validation_path, decision_tree, minimum_purity=0.8, img_type='*.jpg'):
    print('Validating...')
    total_correct_answer = 0
    total_guess = 0
    
    for class_name in class_names:
        img_paths = glob.glob(validation_path + class_name + '/' + img_type)
        for img_path in img_paths:
            img = load_preprocessed_img(img_path)
            img_class_name = get_img_class(img, decision_tree, minimum_purity)
            
            if(class_name == img_class_name):
                total_correct_answer += 1
                
            total_guess += 1
            
    print('--> Done')
    
    accuracy = (total_correct_answer / total_guess) * 100
    
    return accuracy


In [18]:
#### Main

# Defining paths
root_path       = './'
training_path   = root_path + 'training/'
validation_path = root_path + 'validation/'
test_path       = root_path + 'test/'

# Props
epochs = 5
sample_rate = 0.80
img_type = '*.jpg'
n_branch = 4
minimum_purity = 0.95

# Init
init_model()
load_class_names(training_path)

train(training_path, img_type)
sample_model = []
test_sample_model = []
for epoch in range(epochs):
    print('Epoch ' + str(epoch + 1))
    new_sample_model, new_test_sample_model = get_splitted_model(sample_rate)
    sample_model += new_sample_model
    test_sample_model += new_test_sample_model
    dtree_seed = get_tree_seed(sample_model, minimum_purity)
    dtree = get_decision_tree(dtree_seed, minimum_purity, n_branch)
    test(dtree, test_sample_model, minimum_purity)
    print()

model_accuracy = validate(validation_path, dtree, minimum_purity, img_type)

print(model_accuracy)

Training...
--> Done
Epoch 1
Testing...
--> Done, accuracy = 91.30434782608695 %

Epoch 2
Testing...
--> Done, accuracy = 98.0 %

Epoch 3
Testing...
--> Done, accuracy = 100.0 %

Epoch 4
Testing...
--> Done, accuracy = 100.0 %

Epoch 5
Testing...
--> Done, accuracy = 100.0 %

Validating...
--> Done
91.015625


In [19]:
print_decision_tree(dtree)

<__main__.TreeNode object at 0x00000296915FA088>
├── <__main__.TreeNode object at 0x0000029691483288>
│   ├── <__main__.TreeNode object at 0x000002969041A448>
│   │   ├── <__main__.TreeNode object at 0x00000296915117C8>
│   │   │   ├── <__main__.TreeNode object at 0x0000029690304048>
│   │   │   ├── <__main__.TreeNode object at 0x0000029690304A88>
│   │   │   ├── <__main__.TreeNode object at 0x0000029690304B48>
│   │   │   └── <__main__.TreeNode object at 0x0000029690304C08>
│   │   │       ├── <__main__.TreeNode object at 0x000002969149CBC8>
│   │   │       ├── <__main__.TreeNode object at 0x000002969149CB08>
│   │   │       ├── <__main__.TreeNode object at 0x000002969149C048>
│   │   │       └── <__main__.TreeNode object at 0x0000029691541088>
│   │   ├── <__main__.TreeNode object at 0x0000029690417C08>
│   │   ├── <__main__.TreeNode object at 0x0000029691496548>
│   │   └── <__main__.TreeNode object at 0x00000296914967C8>
│   ├── <__main__.TreeNode object at 0x00000296903117C8>
│   

In [20]:
img = load_img(test_path + 'cap.jpg')
print(get_img_class(img, dtree, minimum_purity))
# test correct

cap


In [21]:
img = load_img(test_path + 'cap2.jpg')
print(get_img_class(img, dtree, minimum_purity))
# test false

tulis


In [22]:
img = load_img(test_path + 'tulis.jpg')
print(get_img_class(img, dtree, minimum_purity))
# test correct

tulis
