In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import MiniBatchKMeans
from skimage.feature import local_binary_pattern


dataset_path='Dataset'
RANDOMSEED=1277
num_type=15
N_CLUSTERS = 100    
MAX_SAMPLES = 125     
label_list = [
    "Agriculture", "Airport", "Beach", "City", "Desert", "Forest",
    "Grassland", "Highway", "Lake", "Mountain", "Parking", "Port",
    "Railway", "Residential", "River"
]

In [2]:
code = pd.read_pickle("D:/temp/index_df_code.pkl")
sift_features = pd.read_pickle("D:/temp/index_df_sift_features.pkl")

index_df = pd.concat([code, sift_features], axis=1)

In [3]:
def create_visual_vocabulary(descriptors_list, n_clusters=N_CLUSTERS, max_samples=MAX_SAMPLES):
    all_descriptors = np.vstack([d for d in descriptors_list if len(d) > 0])
    
    if len(all_descriptors) > max_samples:
        np.random.seed(RANDOMSEED)
        all_descriptors = all_descriptors[np.random.choice(len(all_descriptors), max_samples, replace=False)]
    
    # use MiniBatchKMeans
    kmeans = MiniBatchKMeans(n_clusters=n_clusters, random_state=RANDOMSEED)
    kmeans.fit(all_descriptors)
    return kmeans

In [4]:
def extract_bow_features(descriptors_list, kmeans):
    n_clusters = kmeans.n_clusters
    features = []
    total = len(descriptors_list)

    for i, descriptors in enumerate(descriptors_list):
        print(f"take BOW features:{i+1}/{total}",end='\r')

        hist = np.zeros(n_clusters)
        if descriptors is not None and len(descriptors) > 0:
            labels = kmeans.predict(descriptors)
            counts = np.bincount(labels)
            if len(counts) < n_clusters:
                hist[:len(counts)] = counts
            else:
                hist = counts[:n_clusters]
            hist = hist.astype(np.float32)
            hist /= hist.sum() + 1e-7
        features.append(hist)

    return np.array(features)


In [5]:
totsize=len(index_df)
print(totsize)
train_size=int(totsize*0.8)
test_size=int(totsize*0.2)

train_per_code=train_size//num_type
test_per_code=test_size//num_type

train_df=pd.DataFrame()
test_df=pd.DataFrame()
# use train_test_split to split test and train set
for code in index_df['code'].unique():
    # every word, each word is code
    sample_code= index_df[index_df['code'] == code]
    train_samples, test_samples = train_test_split(
        sample_code, 
        test_size=test_per_code, 
        train_size=train_per_code,
        random_state=RANDOMSEED, 
        shuffle=True
    )
 
    train_df = pd.concat([train_df, train_samples], ignore_index=True)
    test_df = pd.concat([test_df, test_samples], ignore_index=True)

print('finish')

12000
finish


In [6]:
def printresult_pro(y_true, y_pred, y_proba, label_names=None, top_n=3):
    # 1. normal score
    print("Evaluation Metrics:")
    print("Accuracy Score:", metrics.accuracy_score(y_true, y_pred))
    print("Recall Score (macro):", metrics.recall_score(y_true, y_pred, average='macro'))
    print("F1 Score (macro):", metrics.f1_score(y_true, y_pred, average='macro'))
    print("Confusion Matrix:")
    print(metrics.confusion_matrix(y_true, y_pred))

    # 2. precision / recall / f1 sorce for every class
    print("\nPer-Class Performance:")
    report = metrics.classification_report(y_true, y_pred, target_names=label_names, digits=3)
    print(report)



KNN-sift_features

In [7]:
# Extract a subset of training SIFT descriptors for clustering
train_sift_sample, _ = train_test_split(train_df['sift_features'], test_size=0.7, random_state=RANDOMSEED)
kmeans = create_visual_vocabulary(train_sift_sample)
print("Visual vocabulary created.")

# Retrieve full SIFT features from training and test sets
full_train_sift = train_df['sift_features']
test_sift_features = test_df['sift_features']

# Convert SIFT descriptors into Bag of Words (BoW) feature vectors
X_train_bow = extract_bow_features(full_train_sift, kmeans)
X_test_bow = extract_bow_features(test_sift_features, kmeans)

# Extract class labels
y_train = np.array(train_df['code'])
y_test = np.array(test_df['code'])


[WinError 2] 系统找不到指定的文件。
  File "C:\Users\lele1\anaconda3\envs\comp9417\Lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Users\lele1\anaconda3\envs\comp9417\Lib\subprocess.py", line 550, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\lele1\anaconda3\envs\comp9417\Lib\subprocess.py", line 1028, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Users\lele1\anaconda3\envs\comp9417\Lib\subprocess.py", line 1540, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^


Visual vocabulary created.
take BOW features:2400/2400

In [8]:
knn_clf_bow = KNeighborsClassifier(n_neighbors=3)
knn_clf_bow.fit(X_train_bow, y_train)
knn_clf_bow_predict_y = knn_clf_bow.predict(X_test_bow)
knn_clf_bow_proba_y = knn_clf_bow.predict_proba(X_test_bow)



In [9]:
#result for KNN-sift_features
print("KNN-sift_features:")
printresult_pro(y_test, knn_clf_bow_predict_y, knn_clf_bow_proba_y,label_names=label_list)


KNN-sift_features:
Evaluation Metrics:
Accuracy Score: 0.46291666666666664
Recall Score (macro): 0.4629166666666667
F1 Score (macro): 0.45564962084396504
Confusion Matrix:
[[ 91  11   5   8   3   0   0  14  10   1   1   0   5   5   6]
 [  9  86   1  22   0   1   0  16   3   1   1   1  16   2   1]
 [ 16  11  51  17   4   3   1  13  11  12   1   5   3   4   8]
 [  3  15   0 112   0   0   0   4   2   2   0   3  11   5   3]
 [ 27   6  20   6  53   8  12   3   4  13   3   0   1   1   3]
 [  0   1   2   0   3 136   6   2   1   4   0   0   1   2   2]
 [ 21   9  13   4  17  20  45   3   4   4   7   2   0   6   5]
 [  9  23   7  15   3   0   0  63   3   2   2   2  23   3   5]
 [ 18  22  18  16   5   4   5  11  27  14   2   0   2   9   7]
 [  4   3   9  14   7   1   2   4  11  88   0   1   4   3   9]
 [  0  18   0   7   6   2   1   5   4   0 103   0   6   5   3]
 [  5  32   3  35   1   0   0   7   1   3   1  57  13   0   2]
 [  1  25   2  21   0   0   1  17   0   2   0   3  87   0   1]
 [  2   7

In [10]:
svm_rbf_sift = SVC(kernel='rbf', random_state=RANDOMSEED,probability=True)
svm_rbf_sift.fit(X_train_bow, y_train)
svm_rbf_sift_predict = svm_rbf_sift.predict(X_test_bow)
svm_rbf_sift_proba = svm_rbf_sift.predict_proba(X_test_bow)


In [11]:
print("SVM with SIFT (BOW features):")
printresult_pro(y_test, svm_rbf_sift_predict, svm_rbf_sift_proba,label_names=label_list)


SVM with SIFT (BOW features):
Evaluation Metrics:
Accuracy Score: 0.59875
Recall Score (macro): 0.5987500000000001
F1 Score (macro): 0.5911951651059845
Confusion Matrix:
[[103   2   6   2   9   0   9   5   2   0   1   1   4   6  10]
 [ 10  83   1  10   1   2   0  11   3   1   3   7  18   4   6]
 [  3   3  85   3   9   4   5  10   8   9   1   8   1   1  10]
 [  0  11   3 105   1   1   0   4   0   1   2  13   8   9   2]
 [  6   1   5   1  83   7  26   0   4  17   0   2   1   1   6]
 [  0   0   1   0   5 135   7   2   2   2   0   0   0   3   3]
 [ 12   3   4   0  19  11  78   4   9   2   3   0   2   6   7]
 [  1   9   1   2   4   0   1 101   7   1   3   2  11   8   9]
 [  8   5  17   0   5  13   6  13  36  14   1   4   0  12  26]
 [  1   0   6   5   8   4   1   0   6 116   0   1   0   2  10]
 [  0   1   0   1   0   0   1  10   1   1 137   1   4   1   2]
 [  1  16   1  24   0   1   0   2   1   2   3  94   9   0   6]
 [  1  16   0   9   0   0   0  19   0   2   1   9 102   0   1]
 [  3   2  

In [12]:
rf_clf_bow = RandomForestClassifier(n_estimators=100, random_state=RANDOMSEED)
rf_clf_bow.fit(X_train_bow, y_train)
rf_clf_bow_predict = rf_clf_bow.predict(X_test_bow)
rf_clf_bow_prob = rf_clf_bow.predict_proba(X_test_bow)


In [13]:
print("Random Forest with SIFT (BOW features):")
printresult_pro(y_test, rf_clf_bow_predict, rf_clf_bow_prob,label_names=label_list)


Random Forest with SIFT (BOW features):
Evaluation Metrics:
Accuracy Score: 0.5833333333333334
Recall Score (macro): 0.5833333333333334
F1 Score (macro): 0.5687366879972123
Confusion Matrix:
[[ 96   9   6   3  10   0   7   8   6   1   1   2   1   6   4]
 [  7  79   0  12   2   0   0  19   5   3   2  11  15   4   1]
 [  3   2  89   4   7   1   8  11  11   7   2   6   0   4   5]
 [  1   5   0 106   1   1   0   4   0   1   1  15  12  12   1]
 [  3   0   5   0  87   6  23   3   8  15   1   3   0   1   5]
 [  1   0   0   0   4 135  10   1   2   3   0   0   0   3   1]
 [ 13   1   7   0  21   7  87   0   2   1   6   0   1   9   5]
 [  3   9   4   3   8   1   0  97   3   3   4   2  11  10   2]
 [ 11  10  25   2   5  12   6  13  22  22   4   3   0  13  12]
 [  3   0   6   4   6   6   3   0   4 110   0   1   1   9   7]
 [  0   3   0   1   1   1   3   5   0   0 134   2   5   3   2]
 [  3  12   1  18   1   1   0   4   1   2   3  98  10   0   6]
 [  1   7   0   9   0   0   0  19   0   2   0  18 103