<a href="https://colab.research.google.com/github/nisanuro/CNG483-Project1/blob/master/CNG483_Project1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier
from sklearn  import preprocessing, metrics
from google.colab import output
from google.colab import drive
import concurrent.futures
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### **Reading images**

In [0]:
def read_images(path):
    images = []

    for filename in os.listdir(path):
        img=cv2.imread(os.path.join(image_path, filename),cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)

    return images

In [0]:
def histogram(image,b):
    bins = np.array([i for i in range(0, 256, int(256/b))])
    image = np.array(image).flatten()
    inds = np.digitize(image,bins, right=False)
    temp = np.array([(inds[i]-1) for i in range(0, len(inds))])
    count = np.bincount(temp)
    
    hist = np.array([count[i] for i in range(0, len(count), int(256/b))])
    if len(hist) != b:
        z = np.zeros(b-len(hist)).astype('int64')
        hist = np.concatenate((hist, z))

    #print("hist: ",  len(hist), "hist shape: " , hist.shape)
    #count = count[::int(256/b)]
    #print(count)
    #print(bins)
    return hist
  

In [0]:
def color_histogram(img, b):
    chans = cv2.split(img)
    colors = ("b", "g", "r")

    features = []
   
    for (chan, color) in zip(chans, colors):
        if color == 'b':
            bf = histogram(chan, b)
        elif color == 'g':
            gf = histogram(chan, b)
        else:
            rf = histogram(chan, b)
    """
    # combination of blue and red channel bins
    temp = np.transpose([np.tile(bf, len(gf)), np.repeat(gf, len(bf))])
    
    blue_green_hist = []    
    for i in temp:
        blue_green_hist.append(sum(i))

    # same operation with red channel bins
    temp = np.transpose([np.tile(rf, len(blue_green_hist)), np.repeat(blue_green_hist, len(rf))])
    """
    #print("bf:  ", bf.shape)
    #print(bf)
    temp = np.vstack(np.meshgrid(bf,gf,rf)).reshape(3,-1).T
    #temp = np.vstack((ndmesh(bf,gf,rf))).reshape(3,-1).T 
    #print("temp:", temp.shape)
    #print(temp)
    #color_hist = []    
    color_hist = temp.sum(axis=1)
    #for i in temp:
    #    color_hist.append(sum(i))
    color_hist = np.array(color_hist)
    #print("color hist: ", color_hist.shape)
    
    return color_hist

In [0]:
def split_image_into_grids(image, grid):

    grids = []
    
    height=image.shape[0]
    width=image.shape[1]

    M = int(np.round(height/grid))
    N = int(np.round(width/grid))
    
    for y in range(0, height, M):
        for x in range(0, width, N):
            y1 = y + M
            x1 = x + N
            grids.append(image[y:y+M, x:x+N])
            
            
    #print ("GGGGGGGG: ", len(grids))
    #cv2.rectangle(image, (x, y), (x1, y1), (0, 255, 0))

    #os.chdir("/content/drive/My Drive/SaveDeneme")
    #cv2.imwrite("im" + str(i) + '_' + str(x) + '_' + str(y)+".png",tiles)
    '''
    plt.figure()
    plt.imshow(grid) 
    plt.show()''' 
    #print("split grid: ",np.array(grids).shape)
    return np.array(grids)

     

In [0]:
def create_dataset(path):
    print("Loading dataset")
    class_paths = []
    images = []
    labels = []

    with os.scandir(path) as itr: 
        for subdir in itr : 
            if subdir.is_dir():
                p = path + '/' + subdir.name
                class_paths.append(p)
    c = 0
    for p in class_paths:
        #print(p)
        for filename in os.listdir(p):
            c += 1
            print("data: ", c)
            if filename.endswith(".jpg"):
                isNumpy = False
                labels.append(os.path.basename(os.path.normpath(p))) 
                img = cv2.imread(os.path.join(p, filename))
                if img is not None:
                    images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                
    return images, labels


In [0]:
def threaded_feature_extraction(imageslabels, grid, bin, isRGB, datasetType):
    print("feature extraction")
    features = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futuretoimage = {executor.submit(feature_extract_single, imagelabel, grid, bin, isRGB): imagelabel for imagelabel in imageslabels}
        counter = 0
        for future in concurrent.futures.as_completed(futuretoimage):
            counter += 1        
            print("IMAGE : ", counter)

            features.append(future.result())

    return features

def feature_extract_single(imagelabel, grid, b, isRGB):
    image = imagelabel[0]
    label = imagelabel[1]
    #print(label)
    i = cv2.resize(image, (256, 256))
    i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
    grid_hists = []
    grids = split_image_into_grids(i, grid)
    #print("grids: ")
    #print(len(grids))
    for i in grids:
        if(isRGB):
            grid_hists.append(color_histogram(i, b))
            #g = color_histogram(i, b)
            #print("GRID: ", g.shape)
        else:
            i = cv2.cvtColor(i, cv2.COLOR_RGB2GRAY)
            grid_hists.append(histogram(i, b))
    
    # Concatenation
    grids_flat = np.array(grid_hists).flatten()
    #print("grids_flat: ",grids_flat, grids_flat.shape)
    '''
    # Sum
    if grid != 1:
        grids_flat = [ sum(x) for x in zip(*grid_hists) ]
        grids_flat = np.array(grids_flat).flatten()
        #print("grids: ", len(grids_flat))
    
    else:
        grids_flat = np.array(grid_hists).flatten()
        #print("grids: ", len(grid_hists))
    '''

    grids_flat = grids_flat.reshape(1, len(grids_flat))
    normalized = preprocessing.normalize(grids_flat, norm='l1', axis=1, copy=False)
    #normalized = np.array(normalized, dtype = 'float32').flatten()
    #print("dtype: ", np.array(normalized).flatten().dtype)  
    normalized = np.array(normalized).flatten()    
    #print("normalized: ",normalized, normalized.shape)
    return (normalized, label)

    


In [0]:
def classification(trainI, trainL, testI, testL, k):
    print("classification")
    #np.random.seed(42)

    model = KNeighborsClassifier(n_neighbors=k, metric='euclidean', algorithm='brute', n_jobs=-1)
    model.fit(trainI, trainL)
    acc = model.score(testI, testL)
    print("Accuracy: ", acc*100)

    return model

In [9]:
if __name__ == "__main__":
    
    test_path = "/content/drive/My Drive/CNG483-Project 1/TestSet"
    train_path = "/content/drive/My Drive/CNG483-Project 1/TrainingSet"
    validation_path = "/content/drive/My Drive/CNG483-Project 1/ValidationSet"

    
    testI, testL = create_dataset(test_path)
    trainI, trainL = create_dataset(train_path)
    validationI, validationL = create_dataset(validation_path)
    

    

Loading dataset
data:  1
data:  2
data:  3
data:  4
data:  5
data:  6
data:  7
data:  8
data:  9
data:  10
data:  11
data:  12
data:  13
data:  14
data:  15
data:  16
data:  17
data:  18
data:  19
data:  20
data:  21
data:  22
data:  23
data:  24
data:  25
data:  26
data:  27
data:  28
data:  29
data:  30
data:  31
data:  32
data:  33
data:  34
data:  35
data:  36
data:  37
data:  38
data:  39
data:  40
data:  41
data:  42
data:  43
data:  44
data:  45
data:  46
data:  47
data:  48
data:  49
data:  50
data:  51
data:  52
data:  53
data:  54
data:  55
data:  56
data:  57
data:  58
data:  59
data:  60
data:  61
data:  62
data:  63
data:  64
data:  65
data:  66
data:  67
data:  68
data:  69
data:  70
data:  71
data:  72
data:  73
data:  74
data:  75
data:  76
data:  77
data:  78
data:  79
data:  80
data:  81
data:  82
data:  83
data:  84
data:  85
data:  86
data:  87
data:  88
data:  89
data:  90
data:  91
data:  92
data:  93
data:  94
data:  95
data:  96
data:  97
data:  98
data:  99
dat

In [10]:
    # grid --> 1, 2, 4
    # bins --> 1, 128, 256
    # k --> 1, 5, 10
    # isRGB --> True, False
    
    grid = 4
    bins = 32
    isRGB = True

    imagelabels = []
    for i,l in zip(trainI, trainL):
        imagelabels.append((i,l))
    trainI_f = threaded_feature_extraction(imagelabels, grid, bins, isRGB, 0)

    trainF = []
    trainL = []
    for (i,l) in trainI_f:
        trainF.append(i)
        trainL.append(l)

    imagelabels = []
    for i,l in zip(validationI, validationL):
        imagelabels.append((i,l))
    
    validationI_f = threaded_feature_extraction(imagelabels, grid, bins, isRGB, 1)
    validationF = []
    validationL = []
    for (i,l) in validationI_f:      
        validationF.append(i)
        validationL.append(l)

    imagelabels = []
    for i,l in zip(testI, testL):
        imagelabels.append((i,l))
    
    testI_f = threaded_feature_extraction(imagelabels, grid, bins, isRGB, 2)
    testF = []
    testL = []
    for (i,l) in testI_f:      
        testF.append(i)
        testL.append(l)


feature extraction
IMAGE :  1
IMAGE :  2
IMAGE :  3
IMAGE :  4
IMAGE :  5
IMAGE :  6
IMAGE :  7
IMAGE :  8
IMAGE :  9
IMAGE :  10
IMAGE :  11
IMAGE :  12
IMAGE :  13
IMAGE :  14
IMAGE :  15
IMAGE :  16
IMAGE :  17
IMAGE :  18
IMAGE :  19
IMAGE :  20
IMAGE :  21
IMAGE :  22
IMAGE :  23
IMAGE :  24
IMAGE :  25
IMAGE :  26
IMAGE :  27
IMAGE :  28
IMAGE :  29
IMAGE :  30
IMAGE :  31
IMAGE :  32
IMAGE :  33
IMAGE :  34
IMAGE :  35
IMAGE :  36
IMAGE :  37
IMAGE :  38
IMAGE :  39
IMAGE :  40
IMAGE :  41
IMAGE :  42
IMAGE :  43
IMAGE :  44
IMAGE :  45
IMAGE :  46
IMAGE :  47
IMAGE :  48
IMAGE :  49
IMAGE :  50
IMAGE :  51
IMAGE :  52
IMAGE :  53
IMAGE :  54
IMAGE :  55
IMAGE :  56
IMAGE :  57
IMAGE :  58
IMAGE :  59
IMAGE :  60
IMAGE :  61
IMAGE :  62
IMAGE :  63
IMAGE :  64
IMAGE :  65
IMAGE :  66
IMAGE :  67
IMAGE :  68
IMAGE :  69
IMAGE :  70
IMAGE :  71
IMAGE :  72
IMAGE :  73
IMAGE :  74
IMAGE :  75
IMAGE :  76
IMAGE :  77
IMAGE :  78
IMAGE :  79
IMAGE :  80
IMAGE :  81
IMAGE :  82
IMAGE 

In [14]:
    from sklearn.decomposition import PCA
    pca = PCA(.99)
    pca.fit(trainF)
    print(pca.n_components_)
    train_img = pca.transform(trainF)
    test_img = pca.transform(validationF)

159


In [12]:
np.array(trainF).shape

(787, 524288)

In [13]:
    model = classification(train_img, trainL, test_img, validationL,5)


classification
Accuracy:  39.04761904761905


In [18]:
    acc = model.score(testF, testL)
    print("Accuracy: ", acc*100)

    output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

Accuracy:  46.666666666666664


In [21]:
    from sklearn.svm import SVC
    clf = SVC(C=1, degree=3, gamma="scale", coef0=0.0, shrinking=True, 
          probability=True, tol=0.001, cache_size=200, class_weight="balanced",
          max_iter=-1, decision_function_shape="ovr", random_state = 0)
    clf.fit(trainF, trainL)
    acc = clf.score(testF, testL)
    print("Accuracy: ", acc*100)

Accuracy:  42.857142857142854
