In [1]:
import matplotlib.pyplot as plt
import os
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

!pip install metric-learn

Collecting metric-learn
  Downloading metric_learn-0.7.0-py2.py3-none-any.whl (67 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.8/67.8 kB[0m [31m752.2 kB/s[0m eta [36m0:00:00[0m
Installing collected packages: metric-learn
Successfully installed metric-learn-0.7.0


In [2]:
import scipy.io
data = scipy.io.loadmat('PaviaU.mat')['paviaU']
gt = scipy.io.loadmat('PaviaU_gt.mat')['paviaU_gt']

In [3]:
def applyPCA(X, numComponents=15):
    newX = np.reshape(X, (-1, X.shape[2]))
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))
    return newX

In [4]:
data=applyPCA(data)

df = pd.DataFrame(data.reshape(data.shape[0]*data.shape[1], -1))
df['class'] = gt.ravel()

X = df[df['class']!=0].iloc[:, :-1].values

y = df[df['class']!=0].iloc[:, -1].values

In [5]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from metric_learn import NCA


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

nca = NCA(max_iter=100,verbose=True)
knn = KNeighborsClassifier(n_neighbors=5)

nca.fit(X_train, y_train)


knn.fit(nca.transform(X_train), y_train)


nca_acc = knn.score(nca.transform(X_test), y_test)
print('NCA accuracy:', nca_acc)

[NCA]
[NCA]  Iteration      Objective Value    Time(s)
[NCA] ------------------------------------------
[NCA]          0         1.523361e+04      20.94
[NCA]          1         1.734358e+04      21.03
[NCA]          2         1.818307e+04      20.94
[NCA]          3         1.896725e+04      20.96
[NCA]          4         1.945852e+04      21.74
[NCA]          5         1.953792e+04      22.54
[NCA]          6         1.987527e+04      22.82
[NCA]          7         1.998528e+04      23.09
[NCA]          8         2.005278e+04      24.17
[NCA]          9         2.012382e+04      29.24
[NCA]         10         2.018186e+04      31.84
[NCA]         11         2.020861e+04      32.70
[NCA]         12         2.023520e+04      32.99
[NCA]         13         2.025487e+04      33.94
[NCA]         14         2.027102e+04      34.61
[NCA]         15         2.029475e+04      35.37
[NCA]         16         2.031346e+04      35.30
[NCA]         17         2.033446e+04      34.77
[NCA]         

In [6]:
import joblib

joblib.dump(nca, 's_nca_pu_model.pkl')
joblib.dump(knn,'s_nca_pu_knn_model.pkl')

nca_loaded = joblib.load('s_nca_pu_model.pkl')
knn_loaded =joblib.load('s_nca_pu_knn_model.pkl')

nca_data=nca_loaded.transform(X)
pred=knn_loaded.predict(nca_data)

In [7]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score

print("accuracy:",accuracy_score(pred,y))
print("cohen kappa:", cohen_kappa_score(pred,y))
print("f1 score:",f1_score(pred,y,average='macro'))

accuracy: 0.9593229848513185
cohen kappa: 0.9458435319864029
f1 score: 0.9489982765462867
