In [1]:
import matplotlib.pyplot as plt
import os
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

!pip install metric-learn

Collecting metric-learn
  Downloading metric_learn-0.7.0-py2.py3-none-any.whl (67 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.8/67.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: metric-learn
Successfully installed metric-learn-0.7.0


In [4]:
import scipy.io
data = scipy.io.loadmat('PaviaU.mat')['paviaU']
gt = scipy.io.loadmat('PaviaU_gt.mat')['paviaU_gt']


In [5]:
def applyPCA(X, numComponents=15):
    newX = np.reshape(X, (-1, X.shape[2]))
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))
    return newX

In [6]:
data=applyPCA(data)

df = pd.DataFrame(data.reshape(data.shape[0]*data.shape[1], -1))
df['class'] = gt.ravel()

X = df[df['class']!=0].iloc[:, :-1].values

y = df[df['class']!=0].iloc[:, -1].values

In [7]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from metric_learn import LMNN

acc_list = []

kf = KFold(n_splits=5, shuffle=True,random_state=42)

lmnn = LMNN(n_neighbors=5)
knn = KNeighborsClassifier(n_neighbors=5)

f_index = 1
for train_index, test_index in kf.split(X):
    print(f"Fold {f_index} -> Train: {len(train_index)}, Test: {len(test_index)}")

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    lmnn.fit(X_train, y_train)
    knn.fit(lmnn.transform(X_train), y_train)


    lmnn_acc = knn.score(lmnn.transform(X_test), y_test)
    print('lmnn accuracy  {}'.format(lmnn_acc))

    acc_list.append(lmnn_acc)

    f_index += 1

print("Ortalaması Başarı:", np.mean(acc_list))
print("Standart sapma:",np.std(acc_list) )

Fold 1 -> Train: 34220, Test: 8556
lmnn accuracy  0.9114071996259935
Fold 2 -> Train: 34221, Test: 8555
lmnn accuracy  0.9042665108123904
Fold 3 -> Train: 34221, Test: 8555
lmnn accuracy  0.902863822326125
Fold 4 -> Train: 34221, Test: 8555
lmnn accuracy  0.9037989479836352
Fold 5 -> Train: 34221, Test: 8555
lmnn accuracy  0.8994739918176505
Ortalaması Başarı: 0.904362094513159
Standart sapma: 0.0039012342615313543


In [8]:
import joblib

joblib.dump(lmnn, 's_lmnn_pu_model.pkl')
joblib.dump(knn,'s_lmnn_pu_knn_model.pkl')

lmnn_loaded = joblib.load('s_lmnn_pu_model.pkl')
knn_loaded =joblib.load('s_lmnn_pu_knn_model.pkl')

lmnn_data=lmnn_loaded.transform(X)
pred=knn_loaded.predict(lmnn_data)

In [9]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score

print("accuracy:",accuracy_score(pred,y))
print("cohen kappa:", cohen_kappa_score(pred,y))
print("f1 score:",f1_score(pred,y,average='macro'))

accuracy: 0.9297970824761549
cohen kappa: 0.9059681067333695
f1 score: 0.9114392597329668
