In [1]:
import pandas as pd
import numpy as np
import cv2
from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
import time
from sklearn.linear_model import SGDClassifier
from sklearn.linear_model import Perceptron
from sklearn.naive_bayes import MultinomialNB
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

# 讀檔

# Feature extraction 

In [3]:
hog = cv2.HOGDescriptor(_winSize = (64,64),
                        _blockSize = (16,16),
                        _blockStride=(16,16),
                        _cellSize= (8,8),
                        _nbins=8)

def feature_extraction(img_path, color_hist = True,  Edge_Detection = True, HOG_Descriptor=True):
    
    img = cv2.imread(img_path)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    img_hist_flatten,img_edges_flatten,img_hog_flatten = np.array([]),np.array([]),np.array([])
    # color_hist
    if color_hist ==   True :
        img_hist = cv2.calcHist([img_gray],[0],None,[256],[0,256])
        img_hist_flatten = img_hist.flatten()
    
    # Edge Detection
    if Edge_Detection == True:

        img_small = cv2.resize(img_gray, (16,16))
        # Blur the image for better edge detection
        img_blur = cv2.GaussianBlur(img_small, (3,3), 0) 
        # Canny Edge Detection
        img_edges = cv2.Canny(image=img_blur, threshold1=100, threshold2=200) # Canny Edge Detection
        img_edges_flatten = img_edges.flatten()

    
    # HOG Descriptor
    if HOG_Descriptor == True:
        img_resize = cv2.resize(img_gray, (64,64))
        img_hog = hog.compute(img_resize)
        img_hog_flatten = img_hog.flatten()
    
    img_fea = np.concatenate((img_hist_flatten, img_edges_flatten,img_hog_flatten), axis=None)
    
    return img_fea

# evaluation_function

In [4]:
def evaluate(name, predict_proba, y_label):
    # Top-1 
    pred_label = np.argmax(predict_proba, axis=1)
    top1  = str(round(sum(y_label == pred_label) / (1.0*len(y_label)),4))
    
    
    # Top-5
    size = y_label.shape[0]
    count = 0
    for i in range(size):
        top5 = np.argpartition(predict_proba[i],-5)[-5:]
        if y_label[i] in top5:
            count += 1
    top5 = str(round(count/size,4))
     
    return top1,top5 
    

# Image Classification pipeline

In [None]:
 # 依序為 color_hist,  Edge_Detection, HOG_Descriptor
grouping = [(True,False,False),(False,True,False),(False,False,True),(True,True,False),(True,False,True),(False,True,True),(True,True,True)]


#將第二階段的預測結果輸出至txt檔
path = 'output.txt'
f = open(path, 'w')

# 將各自feature extraction的組合進去測試
for i in grouping:
    
    #　readfile
    train = 'train.txt'
    val = 'val.txt'
    test  = 'test.txt'
    train = pd.read_csv(train, delimiter = " ",header=None)
    val = pd.read_csv(val, delimiter = " ",header=None)
    test = pd.read_csv(test, delimiter = " ",header=None)

    train.columns = ['image','label']
    val.columns = ['image','label'] 
    test.columns = ['image','label']
    
    # feature extraction
    train['image'] = train['image'].apply(feature_extraction,args= i)
    val['image'] = val['image'].apply(feature_extraction,args= i)
    test['image'] = test['image'].apply(feature_extraction,args= i)
    
    # construct input type
    X_train = np.array(list(train['image'].values))
    y_train = np.array(train['label'])
    X_val = np.array(list(val['image'].values))
    y_val = np.array(val['label'])
    X_test = np.array(list(test['image'].values))
    y_test = np.array(test['label'])
    
    
    # start training model and predict label
    f.write(f'color_hist={str(i[0])}, Edge_Detection={str(i[1])}, HOG_Descriptor={str(i[2])} \n')
    
    f.write("\n")
    
    
    # MultinomialNB-----------
    clf = MultinomialNB()

    start = time.time()
    clf.fit(X_train, y_train)
    end = time.time()
    fit_time = end-start
    
    predict_proba_val = clf.predict_proba(X_val)
    predict_proba_test = clf.predict_proba(X_test)
    f.write(f'fit time of MultinomialNB is {round(fit_time,2)}s\n')

    name = "val"
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_val, y_val)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1  +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   

    name = 'test'
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_test, y_test)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1  +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   

    f.write("\n")
    # ------------------------
    
    #  xgboost ---------------
    xgb = XGBClassifier()

    start = time.time()
    xgb=xgb.fit(X_train, y_train)
    end = time.time()
    fit_time = end-start

    predict_proba_val = xgb.predict_proba(X_val)
    predict_proba_test = xgb.predict_proba(X_test)
    f.write(f'fit time of xgboost is {round(fit_time,2)}s\n')

    name = "val"
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_val, y_val)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1  +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   

    name = 'test'
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_test, y_test)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1    +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   

    f.write("\n")
    #------------------------
    
    #  RandomForest----------
    RF_clf = RandomForestClassifier(n_estimators = 100,
                                 random_state=0,
                                 min_samples_leaf = 4,)

    start = time.time()
    RF_clf.fit(X_train, y_train)
    end = time.time()
    fit_time = end-start

    predict_proba_val = RF_clf.predict_proba(X_val)
    predict_proba_test = RF_clf.predict_proba(X_test)
    f.write(f'fit time of Random Forest is {round(fit_time,2)}s  \n')

    name = "val"
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_val, y_val)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1    +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   

    name = 'test'
    f.write(name +"\n")
    top1,top5 = evaluate(name, predict_proba_test, y_test)

    f.write(f'Top-1 Accuracy on the {name} data is ' + top1    +"\n")
    f.write(f'Top-5 Accuracy on the {name} data is ' + top5  +"\n")   
    
    f.write("\n")
    f.write("\n")
    f.write("\n")

f.close()
