In [1]:
import numpy as np
import scipy as sp
import sklearn as skl
import skimage as ski
import os
from glob import glob
from tqdm import tqdm
from pathlib import Path
import matplotlib.pyplot as plt

import seaborn as sns

In [3]:
dataset = Path("Y:/kau/диплом/data/dataset")
workdir = Path("Y:/kau/диплом/data/run-classification")

In [4]:
feature_paths = list(dataset.glob('hist_*')) + list(dataset.glob('reb_hist_*'))
feature_names = [ fp.stem for fp in feature_paths]
feature_names

['hist_R1_P8_d',
 'hist_R1_P8_u',
 'hist_R2_P12_d',
 'hist_R2_P12_u',
 'hist_R3_P24_u',
 'hist_R5_P36_u',
 'reb_hist_R1_P8_d',
 'reb_hist_R2_P12_d']

In [5]:
features = [np.load(fp) for fp in feature_paths]

In [6]:
labels = np.load(dataset / 'labels.npy')
y_set, y_tex = labels.T
unique_sets = np.unique(y_set)
unique_texs = np.unique(y_tex)
unique_sets, unique_texs

(array([1, 2, 3]), array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13]))

In [12]:
A = np.array([10,10,10,10,10,10,10])
B = np.array([10,10,20,20,10,20,10])


array([0.05921336, 0.05921336])

In [13]:
def build_classifier_klmean(X, y, unique_y):
    X_cl = np.array([ X[y == k] for k in unique_y ])
    means = X_cl.mean(axis=1)
    
    def classify(x):
        # x is a single vector
        distances = sp.stats.entropy(x, means, axis=1)
        return unique_y[np.argmin(distances)]

    return classify

In [15]:
X = features[0]

X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(X, y_tex, test_size=0.4, random_state=2222, stratify=y_tex)
X_train.shape, y_train.shape

((1170, 256), (1170,))

In [39]:
for f,fn in zip(features, feature_names):
    X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(f, y_tex, test_size=0.4, stratify=y_tex)
    C = build_classifier_klmean(X_train, y_train, unique_texs)
    y_pred = np.array([ C(x) for x in X_test ])
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro')

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/{fn}.png')
    plt.close()

    print(f"{fn} - precision {precision:.2f}")

hist_R1_P8_d - precision 0.81
hist_R1_P8_u - precision 0.50
hist_R2_P12_d - precision 0.59
hist_R2_P12_u - precision 0.55
hist_R3_P24_u - precision 0.60
hist_R5_P36_u - precision 0.61
reb_hist_R1_P8_d - precision 0.80
reb_hist_R2_P12_d - precision 0.81


In [None]:
s = 1
for f,fn in zip(features, feature_names):
    f = f[y_set == s]
    y = y_tex[y_set == s]

    X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(f, y, test_size=0.4, stratify=y)
    C = build_classifier_klmean(X_train, y_train, unique_texs)
    y_pred = np.array([ C(x) for x in X_test ])
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro')

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/subset{s}-{fn}.png')
    plt.close()

    print(f"{fn} - subset {s} - precision {precision:.2f}")

hist_R1_P8_d - subset 1 - precision 0.79
hist_R1_P8_u - subset 1 - precision 0.88


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


hist_R2_P12_d - subset 1 - precision 0.00
hist_R2_P12_u - subset 1 - precision 0.86
hist_R3_P24_u - subset 1 - precision 0.92
hist_R5_P36_u - subset 1 - precision 0.89
reb_hist_R1_P8_d - subset 1 - precision 0.94
reb_hist_R2_P12_d - subset 1 - precision 0.98


In [61]:
for f,fn in zip(features, feature_names):
    X_train = f[y_set == 1]
    y_train = y_tex[y_set == 1]
    X_test = f[y_set == 2]
    y_test = y_tex[y_set == 2]

    # select random subset of an image
    X_train_1, _, y_train_1, _ = skl.model_selection.train_test_split(X_train, y_train, test_size=0.4, stratify=y_train)
    
    
    C = build_classifier_klmean(X_train_1, y_train_1, unique_texs)
    y_pred = np.array([ C(x) for x in X_test ])
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro', zero_division=0.0)

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/sep-{fn}.png')
    plt.close()

    print(f"0.6 train 1 test 2 - {fn} - precision {precision:.2f}")

0.6 train 1 test 2 - hist_R1_P8_d - precision 0.86
0.6 train 1 test 2 - hist_R1_P8_u - precision 0.85
0.6 train 1 test 2 - hist_R2_P12_d - precision 0.26
0.6 train 1 test 2 - hist_R2_P12_u - precision 0.85
0.6 train 1 test 2 - hist_R3_P24_u - precision 0.87
0.6 train 1 test 2 - hist_R5_P36_u - precision 0.89
0.6 train 1 test 2 - reb_hist_R1_P8_d - precision 0.95
0.6 train 1 test 2 - reb_hist_R2_P12_d - precision 0.97


In [62]:
for f,fn in zip(features, feature_names):
    X_train = f[y_set == 1]
    y_train = y_tex[y_set == 1]
    X_test = f[y_set == 3]
    y_test = y_tex[y_set == 3]

    # select random subset of an image
    X_train, _, y_train, _ = skl.model_selection.train_test_split(X_train, y_train, test_size=0.4, stratify=y_train)
    
    C = build_classifier_klmean(X_train, y_train, unique_texs)
    y_pred = np.array([ C(x) for x in X_test ])
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro', zero_division=0.0)

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/sep-{fn}.png')
    plt.close()

    print(f"0.6 train 1 test 3 - {fn} - precision {precision:.2f}")

0.6 train 1 test 3 - hist_R1_P8_d - precision 0.23
0.6 train 1 test 3 - hist_R1_P8_u - precision 0.07
0.6 train 1 test 3 - hist_R2_P12_d - precision 0.00
0.6 train 1 test 3 - hist_R2_P12_u - precision 0.13
0.6 train 1 test 3 - hist_R3_P24_u - precision 0.07
0.6 train 1 test 3 - hist_R5_P36_u - precision 0.09
0.6 train 1 test 3 - reb_hist_R1_P8_d - precision 0.15
0.6 train 1 test 3 - reb_hist_R2_P12_d - precision 0.36


# KNN

In [44]:
def build_classifier_knn(X, y, unique_y):
    knn = skl.neighbors.KNeighborsClassifier(n_neighbors=3, metric='l2')
    knn.fit(X, y)

    def classify(x):
        # x is an arraylike
        return knn.predict(x)

    return classify

In [45]:
for f,fn in zip(features, feature_names):
    X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(f, y_tex, test_size=0.4, stratify=y_tex)
    C = build_classifier_knn(X_train, y_train, unique_texs)
    y_pred = C(X_test)
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro')

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/knn-{fn}.png')
    plt.close()

    print(f"{fn} - precision {precision:.2f}")

hist_R1_P8_d - precision 0.98
hist_R1_P8_u - precision 0.95
hist_R2_P12_d - precision 0.98
hist_R2_P12_u - precision 0.98
hist_R3_P24_u - precision 0.97
hist_R5_P36_u - precision 0.93
reb_hist_R1_P8_d - precision 0.97
reb_hist_R2_P12_d - precision 0.97


In [43]:
s = 1
for f,fn in zip(features, feature_names):
    f = f[y_set == s]
    y = y_tex[y_set == s]

    X_train, X_test, y_train, y_test = skl.model_selection.train_test_split(f, y, test_size=0.4, stratify=y)
    C = build_classifier_knn(X_train, y_train, unique_texs)
    y_pred = C(X_test)
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro')

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/knn-subset{s}-{fn}.png')
    plt.close()

    print(f"{fn} - subset {s} - precision {precision:.2f}")

hist_R1_P8_d - subset 1 - precision 0.97
hist_R1_P8_u - subset 1 - precision 0.93
hist_R2_P12_d - subset 1 - precision 0.98
hist_R2_P12_u - subset 1 - precision 0.93
hist_R3_P24_u - subset 1 - precision 0.95
hist_R5_P36_u - subset 1 - precision 0.91
reb_hist_R1_P8_d - subset 1 - precision 0.97
reb_hist_R2_P12_d - subset 1 - precision 0.97


In [69]:
for f,fn in zip(features, feature_names):
    X_train = f[y_set == 1]
    y_train = y_tex[y_set == 1]
    X_test = f[y_set == 2]
    y_test = y_tex[y_set == 2]
    
    # select random subset of an image
    X_train, _, y_train, _ = skl.model_selection.train_test_split(X_train, y_train, test_size=0.4, stratify=y_train)

    C = build_classifier_knn(X_train, y_train, unique_texs)
    y_pred = C(X_test)
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro')

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/knn-{fn}.png')
    plt.close()

    print(f"0.6 KNN train 1 test 2 - {fn} - precision {precision:.2f}")

0.6 KNN train 1 test 2 - hist_R1_P8_d - precision 0.98
0.6 KNN train 1 test 2 - hist_R1_P8_u - precision 0.91
0.6 KNN train 1 test 2 - hist_R2_P12_d - precision 0.99
0.6 KNN train 1 test 2 - hist_R2_P12_u - precision 0.96
0.6 KNN train 1 test 2 - hist_R3_P24_u - precision 0.91
0.6 KNN train 1 test 2 - hist_R5_P36_u - precision 0.75
0.6 KNN train 1 test 2 - reb_hist_R1_P8_d - precision 0.99
0.6 KNN train 1 test 2 - reb_hist_R2_P12_d - precision 0.99


In [70]:
for f,fn in zip(features, feature_names):
    X_train = f[y_set == 1]
    y_train = y_tex[y_set == 1]
    X_test = f[y_set == 3]
    y_test = y_tex[y_set == 3]

    # select random subset of an image
    X_train, _, y_train, _ = skl.model_selection.train_test_split(X_train, y_train, test_size=0.4, stratify=y_train)

    C = build_classifier_knn(X_train, y_train, unique_texs)
    y_pred = C(X_test)
    confusion = skl.metrics.confusion_matrix(y_test, y_pred)
    precision = skl.metrics.precision_score(y_test, y_pred, average='macro', zero_division=0.0)

    fig = plt.figure()
    sns.heatmap(confusion, annot=confusion, xticklabels=unique_texs, yticklabels=unique_texs)
    fig.savefig(f'plots/confusion/knn-{fn}.png')
    plt.close()

    print(f"0.6 KNN train 1 test 3 - {fn} - precision {precision:.2f}")

0.6 KNN train 1 test 3 - hist_R1_P8_d - precision 0.24
0.6 KNN train 1 test 3 - hist_R1_P8_u - precision 0.19
0.6 KNN train 1 test 3 - hist_R2_P12_d - precision 0.20
0.6 KNN train 1 test 3 - hist_R2_P12_u - precision 0.16
0.6 KNN train 1 test 3 - hist_R3_P24_u - precision 0.06
0.6 KNN train 1 test 3 - hist_R5_P36_u - precision 0.14
0.6 KNN train 1 test 3 - reb_hist_R1_P8_d - precision 0.31
0.6 KNN train 1 test 3 - reb_hist_R2_P12_d - precision 0.27


In [55]:
1/13

0.07692307692307693