In [8]:
import matplotlib.pyplot as plt
import os
import numpy as np
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

!pip install metric-learn



In [9]:
import scipy.io
data = scipy.io.loadmat('PaviaU.mat')['paviaU']
gt = scipy.io.loadmat('PaviaU_gt.mat')['paviaU_gt']

In [10]:
def applyPCA(X, numComponents=15):
    newX = np.reshape(X, (-1, X.shape[2]))
    pca = PCA(n_components=numComponents, whiten=True)
    newX = pca.fit_transform(newX)
    newX = np.reshape(newX, (X.shape[0],X.shape[1], numComponents))
    return newX

In [11]:
data=applyPCA(data)

df = pd.DataFrame(data.reshape(data.shape[0]*data.shape[1], -1))
df['class'] = gt.ravel()

X = df[df['class']!=0].iloc[:, :-1].values

y = df[df['class']!=0].iloc[:, -1].values

In [12]:
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
from metric_learn import LFDA
from sklearn.impute import SimpleImputer

acc_list = []
kf = KFold(n_splits=5, shuffle=True, random_state=42)

lfda = LFDA(k=2)
knn = KNeighborsClassifier(n_neighbors=5)

fold_index = 1
for train_index, test_index in kf.split(X):
    print(f"Fold {fold_index} -> Train: {len(train_index)}, Test: {len(test_index)}")


    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    lfda.fit(X_train, y_train)
    knn.fit(lfda.transform(X_train), y_train)


    lfda_acc = knn.score(lfda.transform(X_test), y_test)
    print('lfda accuracy {}'.format( lfda_acc))

    acc_list.append(lfda_acc)

    fold_index += 1

Fold 1 -> Train: 34220, Test: 8556
lfda accuracy 0.937120149602618
Fold 2 -> Train: 34221, Test: 8555
lfda accuracy 0.9316189362945646
Fold 3 -> Train: 34221, Test: 8555
lfda accuracy 0.9305669199298656
Fold 4 -> Train: 34221, Test: 8555
lfda accuracy 0.9292811221507891
Fold 5 -> Train: 34221, Test: 8555
lfda accuracy 0.9260081823495032


In [13]:
import joblib

joblib.dump(lfda, 's_lfda_pu_model.pkl')
joblib.dump(knn,'s_lfda_pu_knn_model.pkl')

lfda_loaded = joblib.load('s_lfda_pu_model.pkl')
knn_loaded =joblib.load('s_lfda_pu_knn_model.pkl')

lfda_data=lfda_loaded.transform(X)
pred=knn_loaded.predict(lfda_data)

In [14]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score

print("accuracy:",accuracy_score(pred,y))
print("cohen kappa:", cohen_kappa_score(pred,y))
print("f1 score:",f1_score(pred,y,average='macro'))

accuracy: 0.9455302038526276
cohen kappa: 0.9272740423976582
f1 score: 0.932146047984101
