<a href="https://colab.research.google.com/github/nisanuro/CNG483-Project1/blob/master/CNG483_Project1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier
from sklearn  import preprocessing, metrics
from google.colab import output
from google.colab import drive
import concurrent.futures
drive.mount('/content/drive')

### **Reading images**

In [0]:
def read_images(path):
    images = []

    for filename in os.listdir(path):
        img=cv2.imread(os.path.join(image_path, filename),cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)

    return images

### **Histogram for single channel**

In [0]:
def histogram(image,b):
    bins = np.array([i for i in range(0, 256, int(256/b))])
    image = np.array(image).flatten()
    inds = np.digitize(image,bins, right=False)
    temp = np.array([(inds[i]-1) for i in range(0, len(inds))])
    count = np.bincount(temp)
    
    hist = np.array([count[i] for i in range(0, len(count), int(256/b))])
    if len(hist) != b:
        z = np.zeros(b-len(hist)).astype('int64')
        hist = np.concatenate((hist, z))

    return hist
  

### **Color histogram**

In [0]:
def color_histogram(img, b):
    chans = cv2.split(img)
    colors = ("b", "g", "r")

    features = []
   
    for (chan, color) in zip(chans, colors):
        if color == 'b':
            bf = histogram(chan, b)
        elif color == 'g':
            gf = histogram(chan, b)
        else:
            rf = histogram(chan, b)

    color_hist = np.vstack(np.meshgrid(bf,gf,rf)).reshape(3,-1).T    
    color_hist = color_hist.sum(axis=1)
    color_hist = np.array(color_hist)

    return color_hist

### **Dividing image into grid**

In [0]:
def split_image_into_grids(image, grid):

    grids = []
    
    height=image.shape[0]
    width=image.shape[1]

    M = int(np.round(height/grid))
    N = int(np.round(width/grid))
    
    for y in range(0, height, M):
        for x in range(0, width, N):
            y1 = y + M
            x1 = x + N
            grids.append(image[y:y+M, x:x+N])
            

    return np.array(grids)
    

### **Creating dataset from files**

In [0]:
def create_dataset(path):
    print("Loading dataset")
    class_paths = []
    images = []
    labels = []

    with os.scandir(path) as itr: 
        for subdir in itr : 
            if subdir.is_dir():
                p = path + '/' + subdir.name
                class_paths.append(p)
    c = 0
    for p in class_paths:
        for filename in os.listdir(p):
            c += 1
            print("data: ", c)
            if filename.endswith(".jpg"):
                isNumpy = False
                labels.append(os.path.basename(os.path.normpath(p))) 
                img = cv2.imread(os.path.join(p, filename))
                if img is not None:
                    images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                
    return images, labels


### **Creating (Image,Label) tuple**

In [0]:
def create_tuples(images, labels):
    imagelabels = []

    for i,l in zip(images, labels):
        imagelabels.append((i,l))
    
    return imagelabels

### **Feature extraction**

In [0]:
def threaded_feature_extraction(imageslabels, grid, bin, isRGB):
    print("feature extraction")
    features = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futuretoimage = {executor.submit(feature_extract_single, imagelabel, grid, bin, isRGB): imagelabel for imagelabel in imageslabels}
        counter = 0
        for future in concurrent.futures.as_completed(futuretoimage):
            counter += 1        
            print("IMAGE : ", counter)

            features.append(future.result())

    imageF = []
    imageL = []
    for (i,l) in features:
        imageF.append(i)
        imageL.append(l)

    return imageF, imageL

def feature_extract_single(imagelabel, grid, b, isRGB):
    image = imagelabel[0]
    label = imagelabel[1]

    i = cv2.resize(image, (256, 256))
    i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
    grid_hists = []
    grids = split_image_into_grids(i, grid)

    for i in grids:
        if(isRGB):
            grid_hists.append(color_histogram(i, b))
        else:
            i = cv2.cvtColor(i, cv2.COLOR_RGB2GRAY)
            grid_hists.append(histogram(i, b))
    
    # Concatenation
    grids_flat = np.array(grid_hists).flatten()

    grids_flat = grids_flat.reshape(1, len(grids_flat))
    normalized = preprocessing.normalize(grids_flat, norm='l1', axis=1, copy=False)
    normalized = np.array(normalized).flatten()    

    return (normalized, label)

    


### **KNN classification**

In [0]:
def classification(trainI, trainL, testI, testL, k):
    print("classification")

    model = KNeighborsClassifier(n_neighbors=k, metric='euclidean', algorithm='auto', n_jobs=-1)
    model.fit(trainI, trainL)
    acc = model.score(testI, testL)
    print("Accuracy: ", acc*100)

    return model

### **Main**

In [0]:
if __name__ == "__main__":
    
    test_path = "/content/drive/My Drive/CNG483-Project 1/TestSet"
    train_path = "/content/drive/My Drive/CNG483-Project 1/TrainingSet"
    validation_path = "/content/drive/My Drive/CNG483-Project 1/ValidationSet"

    
    testI, testL = create_dataset(test_path)
    trainI, trainL = create_dataset(train_path)
    validationI, validationL = create_dataset(validation_path)    

In [0]:
    # grid --> 1, 2, 4
    # bins --> 1, 128, 256
    # k --> 1, 5, 10
    # isRGB --> True, False
    
    grid = 4
    bins = 40
    isRGB = True
    k = 5

    # In order not to lose order, sending images and labels as tuples
    trainI, trainL = threaded_feature_extraction(create_tuples(trainI, trainL), grid, bins, isRGB)
    validationI, validationL = threaded_feature_extraction(create_tuples(validationI, validationL), grid, bins, isRGB)
    testI, testL = threaded_feature_extraction(create_tuples(testI, testL), grid, bins, isRGB)

In [0]:
    model = classification(trainI, trainL, validationI, validationL, k)


classification


In [0]:
    acc = model.score(testI, testL)
    print("Accuracy: ", acc*100)

    output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')