In [1]:
from sklearn.neighbors import KNeighborsClassifier
import sklearn
import numpy as np
import PIL
import cv2
import os
import matplotlib.pyplot as plt
import pandas as pd
import time
from skimage import feature as ft

# Function define

In [2]:
def img_comp(graphA, graphB):
    return np.sum(np.sqrt(np.sum(np.power(graphA - graphB, 2), axis=1)))

def get_img_folder(path):
    img_list = []
    file_list = []
    for filename in os.listdir(path):
        if not filename.endswith('.jpg') and not filename.endswith('.jpeg') and not filename.endswith('.png'):
            continue
        image = cv2.resize(cv2.imread(os.path.join(path, filename)), (256, 256), interpolation=cv2.INTER_AREA)
        # image = np.multiply(image, 1/255)
        img_list.append(image)
        file_list.append(filename)
    return img_list, file_list

def get_img_folder_gray(path):
    img_list = []
    file_list = []
    for filename in os.listdir(path):
        if not filename.endswith('.jpg') and not filename.endswith('.jpeg') and not filename.endswith('.png'):
            continue
        image = cv2.resize(cv2.imread(os.path.join(path, filename), 0), (256, 256), interpolation=cv2.INTER_AREA)
        image = np.multiply(image, 1/255)
        img_list.append(image)
        file_list.append(filename)
    return img_list, file_list

def extract_hog_features(X):
    image_descriptors = []
    for i in range(len(X)): 
        print(f"{i}/{len(X)}")               
        fd, _ = ft.hog(X[i], orientations=9, pixels_per_cell=(16, 16), cells_per_block=(16, 16),
                            block_norm='L2-Hys', transform_sqrt=True, visualize=True)                 
        image_descriptors.append(fd)                                           
    return image_descriptors    

def extract_sift_features(X):
    image_descriptors = []
    det = ft.SIFT()
    for i in range(len(X)): 
        print(f"{i}/{len(X)}")               
        fd = det.detect_and_extract(X[i]) 
        image_descriptors.append(fd)                                           
    return image_descriptors   

def extract_hog_features_single(X):
    image_descriptors_single = []
    print(len(X))               
    fd, _ = ft.hog(X, orientations=9, pixels_per_cell=(16, 16), cells_per_block=(16, 16),
                            block_norm='L2-Hys', transform_sqrt=True, visualize=True)
    image_descriptors_single.append(fd)
    return image_descriptors_single

def BGR2GRAY(img_list):
    return list(map(lambda x:cv2.cvtColor(x, cv2.COLOR_BGR2GRAY), img_list))

def normalize(img_list):
    return list(map(lambda x:np.multiply(x, 1/255), img_list))

def calchist(img_list):
    img_list_hist = []
    for i in img_list:
        his = cv2.calcHist([i], [0], None, [256], [0, 256])
        his = np.concatenate((his, cv2.calcHist([i], [1], None, [256], [0, 256])), 0)
        his = np.concatenate((his, cv2.calcHist([i], [2], None, [256], [0, 256])), 0)
        his = his.flatten()
        img_list_hist.append(his)
    return img_list_hist

def gamma_correction(img_list):
    gamma_corrected = []
    for img in img_list:
        mean_intensity = np.mean(img)
        gamma = np.log(0.5) / np.log(mean_intensity / 255)
        gamma_corrected.append(np.power(img, gamma))
    return gamma_corrected


# Trainset import

In [3]:
man_train_path = './train/manmade_training'
man_train_list, man_train_file = get_img_folder(man_train_path)
nat_train_path = './train/natural_training'
nat_train_list, nat_train_file = get_img_folder(nat_train_path)

# Testset import

In [4]:
man_test_path = './test/manmade_test'
man_test_list, man_test_file = get_img_folder(man_test_path)
nat_test_path = './test/natural_test'
nat_test_list, nat_test_file = get_img_folder(nat_test_path)

In [5]:
# man_test_list[0]

In [6]:
trainset = gamma_correction(BGR2GRAY(man_train_list + nat_train_list))
lable = [0] * len(man_train_list) + [1] * len(nat_train_list)
X_train = extract_hog_features(trainset)
Y_train = lable
X_test = extract_hog_features(gamma_correction(BGR2GRAY(man_test_list + nat_test_list)))
Y_test = [0] * len(man_test_list) + [1] * len(nat_test_list)

0/1000
1/1000
2/1000
3/1000
4/1000
5/1000
6/1000
7/1000
8/1000
9/1000
10/1000
11/1000
12/1000
13/1000
14/1000
15/1000
16/1000
17/1000
18/1000
19/1000
20/1000
21/1000
22/1000
23/1000
24/1000
25/1000
26/1000
27/1000
28/1000
29/1000
30/1000
31/1000
32/1000
33/1000
34/1000
35/1000
36/1000
37/1000
38/1000
39/1000
40/1000
41/1000
42/1000
43/1000
44/1000
45/1000
46/1000
47/1000
48/1000
49/1000
50/1000
51/1000
52/1000
53/1000
54/1000
55/1000
56/1000
57/1000
58/1000
59/1000
60/1000
61/1000
62/1000
63/1000
64/1000
65/1000
66/1000
67/1000
68/1000
69/1000
70/1000
71/1000
72/1000
73/1000
74/1000
75/1000
76/1000
77/1000
78/1000
79/1000
80/1000
81/1000
82/1000
83/1000
84/1000
85/1000
86/1000
87/1000
88/1000
89/1000
90/1000
91/1000
92/1000
93/1000
94/1000
95/1000
96/1000
97/1000
98/1000
99/1000
100/1000
101/1000
102/1000
103/1000
104/1000
105/1000
106/1000
107/1000
108/1000
109/1000
110/1000
111/1000
112/1000
113/1000
114/1000
115/1000
116/1000
117/1000
118/1000
119/1000
120/1000
121/1000
122/1000
123

In [7]:
knn = KNeighborsClassifier(n_neighbors=5, metric="manhattan")
time_start = time.time()
knn.fit(X_train,Y_train)
time_end = time.time()
print('time cost', time_end-time_start,'s')

time cost 0.010931730270385742 s


In [8]:
Y_predict = knn.predict(X_test)
print('测试数据集得分：{:.2f}'.format(knn.score(X_test, Y_test)))

测试数据集得分：0.81


In [9]:
from sklearn.metrics import accuracy_score, classification_report
accuracy = accuracy_score(Y_test, Y_predict)
report = classification_report(Y_test, Y_predict)
print(f"Accuracy: {accuracy:.2f}")
print(f"Report:\n{report}")

Accuracy: 0.81
Report:
              precision    recall  f1-score   support

           0       0.92      0.68      0.78       250
           1       0.74      0.94      0.83       250

    accuracy                           0.81       500
   macro avg       0.83      0.81      0.80       500
weighted avg       0.83      0.81      0.80       500



In [10]:

svm =sklearn.svm.SVC(C=2, kernel='rbf', probability=True)
svm.fit(X_train, Y_train)

In [11]:
Y_predict = svm.predict(X_test)
correctness = 0
for i in range(len(Y_predict)):
    if Y_predict[i] == Y_test[i]:
        correctness += 1
print('测试数据集得分：{:.2f}'.format(correctness / len(Y_predict)))
accuracy = accuracy_score(Y_test, Y_predict)
report = classification_report(Y_test, Y_predict)
print(f"Accuracy: {accuracy:.2f}")
print(f"Report:\n{report}")

测试数据集得分：0.89
Accuracy: 0.89
Report:
              precision    recall  f1-score   support

           0       0.89      0.88      0.88       250
           1       0.88      0.90      0.89       250

    accuracy                           0.89       500
   macro avg       0.89      0.89      0.89       500
weighted avg       0.89      0.89      0.89       500



In [12]:
logi = sklearn.linear_model.LogisticRegression(C=2, solver='lbfgs', multi_class='multinomial')
logi.fit(X_train, Y_train)

In [13]:
Y_predict = logi.predict(X_test)
correctness = 0
for i in range(len(Y_predict)):
    if Y_predict[i] == Y_test[i]:
        correctness += 1
print('测试数据集得分：{:.2f}'.format(correctness / len(Y_predict)))
accuracy = accuracy_score(Y_test, Y_predict)
report = classification_report(Y_test, Y_predict)
print(f"Accuracy: {accuracy:.2f}")
print(f"Report:\n{report}")

测试数据集得分：0.85
Accuracy: 0.85
Report:
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       250
           1       0.86      0.84      0.85       250

    accuracy                           0.85       500
   macro avg       0.85      0.85      0.85       500
weighted avg       0.85      0.85      0.85       500



In [14]:
from sklearn import tree
rfc = sklearn.tree.DecisionTreeClassifier()
rfc.fit(X_train, Y_train)

In [15]:
Y_predict = rfc.predict(X_test)
correctness = 0
for i in range(len(Y_predict)):
    if Y_predict[i] == Y_test[i]:
        correctness += 1
print('测试数据集得分：{:.2f}'.format(correctness / len(Y_predict)))
accuracy = accuracy_score(Y_test, Y_predict)
report = classification_report(Y_test, Y_predict)
print(f"Accuracy: {accuracy:.2f}")
print(f"Report:\n{report}")

测试数据集得分：0.69
Accuracy: 0.69
Report:
              precision    recall  f1-score   support

           0       0.69      0.68      0.69       250
           1       0.69      0.70      0.69       250

    accuracy                           0.69       500
   macro avg       0.69      0.69      0.69       500
weighted avg       0.69      0.69      0.69       500



In [33]:
sift = cv2.Feature2D()
kp,des = sift.detectAndCompute(trainset[0], None)
kp_image = cv2.drawKeypoints(trainset[0], kp, None)

error: Unknown C++ exception from OpenCV code

In [17]:
# from skimage import feature as ft
# trainset = BGR2GRAY(man_train_list + nat_train_list)
# lable = [0] * len(man_train_list) + [1] * len(nat_train_list)
# X3_train = extract_sift_features(trainset)
# Y3_train = lable
# X3_test = extract_sift_features(BGR2GRAY(man_test_list + nat_test_list))
# Y3_test = [0] * len(man_test_list) + [1] * len(nat_test_list)