<a href="https://colab.research.google.com/github/nisanuro/CNG483-Project1/blob/master/CNG483_Project1_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd
from collections import Counter
from sklearn.neighbors import KNeighborsClassifier
from sklearn  import preprocessing
from google.colab import output
from google.colab import drive
import concurrent.futures
drive.mount('/content/drive')

In [0]:
def read_images(path):
    images = []

    for filename in os.listdir(path):
        img=cv2.imread(os.path.join(image_path, filename),cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        images.append(img)

    return images

In [0]:
"""
def histogram(image, b):
    bins = []
    width = int(256/b)

    for low in range(0, b * width, width):
        bins.append((low, low+width))

    hist = np.zeros(b).astype('int32')
    image = np.array(image).flatten()

    for value in image:
        for i in range(0, len(bins)):
            if (bins[i][0] <= value < bins[i][1]):
                hist[i] += 1
    
    return hist
"""

def histogram(image,b):
    bins = np.array([i for i in range(0, 256, int(256/b))])
    image = np.array(image).flatten()
    inds = np.digitize(image,bins, right=False)
    temp = np.array([(inds[i]-1) for i in range(0, len(inds))])
    count = np.bincount(temp)
    
    hist = np.array([count[i] for i in range(0, len(count), int(256/b))])
    if len(hist) != b:
        z = np.zeros(b-len(hist)).astype('int64')
        hist = np.concatenate((hist, z))

    #print("hist: ",  len(hist), "hist shape: " , hist.shape)
    #count = count[::int(256/b)]
    #print(count)
    #print(bins)
    return hist
  

In [0]:
def ndmesh(*xi,**kwargs):
    if len(xi) < 2:
        msg = 'meshgrid() takes 2 or more arguments (%d given)' % int(len(xi) > 0)
        raise ValueError(msg)

    args = np.atleast_1d(*xi)
    ndim = len(args)
    copy_ = kwargs.get('copy', True)

    s0 = (1,) * ndim
    output = [x.reshape(s0[:i] + (-1,) + s0[i + 1::]) for i, x in enumerate(args)]

    shape = [x.size for x in output]

    # Return the full N-D matrix (not only the 1-D vector)
    if copy_:
        mult_fact = np.ones(shape, dtype=int)
        return [x * mult_fact for x in output]
    else:
        return np.broadcast_arrays(*output)

In [0]:
def color_histogram(img, b):
    chans = cv2.split(img)
    colors = ("b", "g", "r")

    features = []
   
    for (chan, color) in zip(chans, colors):
        if color == 'b':
            bf = histogram(chan, b)
        elif color == 'g':
            gf = histogram(chan, b)
        else:
            rf = histogram(chan, b)
    """
    # combination of blue and red channel bins
    temp = np.transpose([np.tile(bf, len(gf)), np.repeat(gf, len(bf))])
    
    blue_green_hist = []    
    for i in temp:
        blue_green_hist.append(sum(i))

    # same operation with red channel bins
    temp = np.transpose([np.tile(rf, len(blue_green_hist)), np.repeat(blue_green_hist, len(rf))])
    """
    #print("bf:  ", bf.shape)
    #print(bf)
    temp = np.vstack((ndmesh(bf,gf,rf))).reshape(3,-1).T 
    #print("temp:", temp.shape)
    #print(temp)
    #color_hist = []    
    color_hist = temp.sum(axis=1)
    #for i in temp:
    #    color_hist.append(sum(i))
    color_hist = np.array(color_hist)
    #print("color hist: ", color_hist.shape)
    
    return color_hist

In [0]:
def split_image_into_grids(image, grid):

    grids = []

    height=image.shape[0]
    width=image.shape[1]

    M = height//grid
    N = width//grid

    for y in range(0, height, M):
        for x in range(0, width, N):
            y1 = y + M
            x1 = x + N
            grids.append(image[y:y+M, x:x+N])


    #cv2.rectangle(image, (x, y), (x1, y1), (0, 255, 0))

    #os.chdir("/content/drive/My Drive/SaveDeneme")
    #cv2.imwrite("im" + str(i) + '_' + str(x) + '_' + str(y)+".png",tiles)
    '''
    plt.figure()
    plt.imshow(grid) 
    plt.show()''' 

    return np.array(grids)

     

In [0]:
def create_dataset(path):
    print("Loading dataset")
    class_paths = []
    images = []
    
    numpy = np.empty((0,3), int)
    labels = []

    with os.scandir(path) as itr: 
        for subdir in itr : 
            if subdir.is_dir():
                p = path + '/' + subdir.name
                class_paths.append(p)
    c = 0
    isNumpy = False
    for p in class_paths:
        #print(p)
        for filename in os.listdir(p):
            c += 1
            print("data: ", c)
            if filename.endswith(".jpg"):
                isNumpy = False
                labels.append(os.path.basename(os.path.normpath(p))) 
                img = cv2.imread(os.path.join(p, filename))
                if img is not None:
                    images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
                 
            elif filename.endswith(".npy"):
                isNumpy = True
                labels.append(os.path.basename(os.path.normpath(p))) 
                img = np.load(os.path.join(p, filename))
                images.append(img)

    if isNumpy:
        return np.array(images, dtype='float64'), labels
    else:
        return images, labels


In [0]:
def threaded_feature_extraction(imageslabels, grid, bin, isRGB, datasetType):
    print("feature extraction")
    features = []
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futuretoimage = {executor.submit(feature_extract_single, imagelabel, grid, bin, isRGB): imagelabel for imagelabel in imageslabels}
        counter = 0
        for future in concurrent.futures.as_completed(futuretoimage):
            counter += 1
            
            if datasetType == 0:    # Training Set
                path = '/content/CNG483-Project/Features/TrainingSet' + '/' + future.result()[1]
            elif datasetType == 1:  # Validation Set
                path = '/content/CNG483-Project/Features/ValidationSet' + '/' + future.result()[1]
            elif datasetType == 2:  # Test Set
                path = '/content/CNG483-Project/Features/TestSet' + '/' + future.result()[1]                    
            
            if not os.path.exists(path):
                try: 
                    os.makedirs(path, exist_ok = True) 
                    print("Directory created successfully:  ", path) 
                except OSError as error: 
                    print("Directory can not be created")
            else:
                print("Directory already exists:   ", path)
            path =  path + '/' + str(counter)
            
        
            print("IMAGE : ", counter)
            #np.save(path, np.array(future.result()[0]))
            features.append(future.result())
    return features

def feature_extract_single(imagelabel, grid, b, isRGB):
    image = imagelabel[0]
    label = imagelabel[1]
    #print(label)
    i = cv2.resize(image, (256, 256))
    i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
    grid_hists = []
    grids = split_image_into_grids(i, grid)
    #print("grids: ")
    #print(len(grids))
    for i in grids:
        if(isRGB):
            grid_hists.append(color_histogram(i, b))
        else:
            i = cv2.cvtColor(i, cv2.COLOR_RGB2GRAY)
            grid_hists.append(histogram(i, b))
    '''
    # Concatenation
    grids_flat = np.array(grid_hists).flatten()
    '''

    # Sum
    if grid != 1:
        grids_flat = [ sum(x) for x in zip(*grid_hists) ]
        grids_flat = np.array(grids_flat).flatten()
        #print("grids: ", len(grids_flat))
        
    else:
        grids_flat = np.array(grid_hists).flatten()
        #print("grids: ", len(grid_hists))
    

    grids_flat = grids_flat.reshape(1, len(grids_flat))
    normalized = preprocessing.normalize(grids_flat, norm='l1', axis=0, copy=False)
    print("dtype: ", np.array(normalized).flatten().dtype)
    #normalized = np.array(normalized, dtype = 'float32').flatten()
    #print("dtype: ", np.array(normalized).flatten().dtype)      
    print("normalized: ", np.array(normalized).flatten(), np.array(normalized).flatten().shape)
    return (np.array(normalized).flatten(), label)

    


In [0]:
"""def feature_extraction(images, grid, bin, isRGB):
    print("feature extraction")
    features = []
    size = len(images)
    for i, j in zip(images, range(size)):
        print(j)
        i = cv2.resize(i, (256, 256))
        i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB)
        grids = split_image_into_grids(i, grid)

        grid_hists = []

        for i in grids:
            if(isRGB):
                grid_hists.append(color_histogram(i, bin))
            else:
                grid_hists.append(histogram(i, bin))
        
        # Concatenation
        grids_flat = np.array(grid_hists).flatten()
        
        '''
        # Sum
        grids_flat = np.array(grid_hists)
        grids_flat = np.sum(grids_flat, axis=0)
        '''
            
        features.append(grids_flat)
        
    return features"""


In [0]:
def classification(trainI, trainL, testI, testL, k):
    print("classification")
    model = KNeighborsClassifier(n_neighbors=k, metric='euclidean', algorithm='brute', n_jobs=-1)
    model.fit(trainI, trainL)
    acc = model.score(testI, testL)
    print(acc)

In [11]:
if __name__ == "__main__":
    
    test_path = "/content/drive/My Drive/CNG483-Project 1/TestSet"
    train_path = "/content/drive/My Drive/CNG483-Project 1/TrainingSet"
    validation_path = "/content/drive/My Drive/CNG483-Project 1/ValidationSet"

    
    # testI, testL = create_dataset(test_path)
    trainI, trainL = create_dataset(train_path)
    validationI, validationL = create_dataset(validation_path)
    
    # grid --> 1, 2, 4
    # bins --> 1, 128, 256
    # k --> 1, 5, 10
    # isRGB --> True, False
    '''
    grid = 1
    bins = 128
    isRGB = True
    k = 10
    
    imagelabels = []
    for i,l in zip(trainI, trainL):
        imagelabels.append((i,l))
    threaded_feature_extraction(imagelabels, grid, bins, isRGB, 0)

    imagelabels = []
    for i,l in zip(validationI, validationL):
        imagelabels.append((i,l))
    
    threaded_feature_extraction(imagelabels, grid, bins, isRGB, 1)

    imagelabels = []
    '''

data:  152
data:  153
data:  154
data:  155
data:  156
data:  157
data:  158
data:  159
data:  160
data:  161
data:  162
data:  163
data:  164
data:  165
data:  166
data:  167
data:  168
data:  169
data:  170
data:  171
data:  172
data:  173
data:  174
data:  175
data:  176
data:  177
data:  178
data:  179
data:  180
data:  181
data:  182
data:  183
data:  184
data:  185
data:  186
data:  187
data:  188
data:  189
data:  190
data:  191
data:  192
data:  193
data:  194
data:  195
data:  196
data:  197
data:  198
data:  199
data:  200
data:  201
data:  202
data:  203
data:  204
data:  205
data:  206
data:  207
data:  208
data:  209
data:  210
data:  211
data:  212
data:  213
data:  214
data:  215
data:  216
data:  217
data:  218
data:  219
data:  220
data:  221
data:  222
data:  223
data:  224
data:  225
data:  226
data:  227
data:  228
data:  229
data:  230
data:  231
data:  232
data:  233
data:  234
data:  235
data:  236
data:  237
data:  238
data:  239
data:  240
data:  241
data:  242

In [12]:
    grid = 1
    bins = 128
    isRGB = True
    k = 10
    
    imagelabels = []
    for i,l in zip(trainI, trainL):
        imagelabels.append((i,l))
    trainI_f = threaded_feature_extraction(imagelabels, grid, bins, isRGB, 0)
    #trainI_f = threaded_feature_extraction(trainI, grid, bins, isRGB)
    trainf = []
    trainl = []
    for (i,l) in trainI_f:
        trainf.append(i)
        trainl.append(l)

    imagelabels = []
    for i,l in zip(validationI, validationL):
        imagelabels.append((i,l))
    
    validationI_f = threaded_feature_extraction(imagelabels, grid, bins, isRGB, 1)
    validationf = []
    validationl = []
    for (i,l) in validationI_f:      
        validationf.append(i)
        validationl.append(l)

    #validationI_f = feature_extraction(validationI, grid, bins, isRGB)



feature extraction
dtype:  float64
normalized:  [0. 0. 0. ... 0. 0. 0.] (2097152,)
dtype:  float64
normalized:  dtype:  float64
Directory already exists:    /content/CNG483-Project/Features/TrainingSet/flamingo
IMAGE :  1
[1. 1. 1. ... 0. 0. 0.] (2097152,)
normalized: Directory already exists:    /content/CNG483-Project/Features/TrainingSet/flamingo
IMAGE :  2
 dtype: [1. 1. 1. ... 0. 0. 0.]dtype:  float64
 float64dtype:  float64
dtype:  float64

 (2097152,)
Directory already exists:    /content/CNG483-Project/Features/TrainingSet/flamingo
IMAGE :  3dtype:  float64

normalized:  [1. 1. 1. ... 0. 0. 0.] (2097152,)
normalized:  [1. 1. 1. ... 0. 0. 0.] (2097152,)
normalized:  [1. 1. 1. ... 0. 0. 0.] (2097152,)
dtype:  Directory already exists:    /content/CNG483-Project/Features/TrainingSet/flamingo
IMAGE : normalized:  float64 4
Directory already exists:    /content/CNG483-Project/Features/TrainingSet/flamingo
IMAGE :  5

[1. 1. 1. ... 0. 0. 0.]Directory already exists:    /content/CNG48

In [0]:
#trainl = np.array(trainl).reshape(1, len(trainl))
#validationl = np.array(validationl).reshape(1, len(validationl))

In [0]:
#trainf = np.array(trainf, dtype='float64')

In [15]:
   '''
    trainf = np.array(trainf, dtype='float64')
    trainl = np.array(trainl)
    validationf = np.array(validationf, dtype='float64')
    validationl = np.array(validationl)
    '''

"\n trainf = np.array(trainf, dtype='float64')\n trainl = np.array(trainl)\n validationf = np.array(validationf, dtype='float64')\n validationl = np.array(validationl)\n "

In [16]:
    '''
    #test_path = "/content/drive/My Drive/CNG483-Project 1/TestSet"
    train_path = "/content/CNG483-Project/Features/TrainingSet"
    validation_path = "/content/CNG483-Project/Features/ValidationSet"

    
    # testI, testL = create_dataset(test_path)
    trainF, trainL = create_dataset(train_path)
    validationF, validationL = create_dataset(validation_path)
    '''

'\n#test_path = "/content/drive/My Drive/CNG483-Project 1/TestSet"\ntrain_path = "/content/CNG483-Project/Features/TrainingSet"\nvalidation_path = "/content/CNG483-Project/Features/ValidationSet"\n\n\n# testI, testL = create_dataset(test_path)\ntrainF, trainL = create_dataset(train_path)\nvalidationF, validationL = create_dataset(validation_path)\n'

In [17]:
    '''
    print(trainF.dtype)
    print(validationF.dtype)
    print(trainL.dtype)
    print(validationL.dtype)
    '''

'\nprint(trainF.dtype)\nprint(validationF.dtype)\nprint(trainL.dtype)\nprint(validationL.dtype)\n'

In [0]:
    classification(trainf, trainl, validationf, validationl, 5)
    output.eval_js('new Audio("https://upload.wikimedia.org/wikipedia/commons/0/05/Beep-09.ogg").play()')

classification
