In [3]:
import scipy.io
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.discriminant_analysis import StandardScaler
from sklearn.decomposition import PCA

In [4]:
pavia_mat = scipy.io.loadmat("Pavia/Pavia.mat")
pavia_gt_mat = scipy.io.loadmat("Pavia/Pavia_gt.mat")

FileNotFoundError: [Errno 2] No such file or directory: 'Pavia/Pavia.mat'

In [None]:
pavia = pavia_mat["pavia"]
pavia.shape

(1096, 715, 102)

In [None]:
pavia[:, :, 0]

array([[ 854,  527,  374, ...,  367,  261, 1059],
       [1060,  707,  143, ...,  465,  884,  756],
       [ 532,  523,  816, ...,  408,  393,  798],
       ...,
       [ 689,  497,  947, ...,  812,  840,  187],
       [ 895,  211,  971, ...,  802,  328,  897],
       [ 610,  961,  443, ...,  592,  406,  631]], dtype=uint16)

In [None]:
pavia_gt = pavia_gt_mat["pavia_gt"]
pavia_gt.shape

(1096, 715)

In [None]:
# count the number of repeat elements in the ground truth
unique, counts = np.unique(pavia_gt, return_counts=True)
dict(zip(unique, counts))

{0: 635488,
 1: 65971,
 2: 7598,
 3: 3090,
 4: 2685,
 5: 6584,
 6: 9248,
 7: 7287,
 8: 42826,
 9: 2863}

## lets select 1 Vs 8

In [None]:
df = pd.DataFrame(pavia.reshape(-1, 102))
df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,854,601,350,266,138,118,178,194,257,269,...,3752,3759,3773,3779,3752,3690,3671,3664,3636,3643
1,527,642,575,294,123,168,207,154,209,299,...,3907,3873,3902,3921,3861,3854,3882,3834,3725,3768


In [None]:
labels = np.zeros(pavia.shape[0] * pavia.shape[1])
for i in range(pavia.shape[0]):
    for j in range(pavia.shape[1]):
        label = pavia_gt[i, j]
        labels[i * pavia.shape[1] + j] = label
df["label"] = labels
df.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,101,label
0,854,601,350,266,138,118,178,194,257,269,...,3759,3773,3779,3752,3690,3671,3664,3636,3643,0.0
1,527,642,575,294,123,168,207,154,209,299,...,3873,3902,3921,3861,3854,3882,3834,3725,3768,0.0


## Classify by SVM

In [None]:
# balance data
df_1 = df[df["label"] == 1].sample(2000)
df_8 = df[df["label"] == 8].sample(2000)
df2 = pd.concat([df_1, df_8])
df2.head(2)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,93,94,95,96,97,98,99,100,101,label
60508,344,278,270,231,108,136,221,250,254,295,...,40,19,0,17,29,21,26,34,31,1.0
73342,276,127,36,40,0,70,227,244,278,314,...,32,41,47,27,25,47,47,41,40,1.0


In [None]:
def SVM(df):
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(
        df.drop("label", axis=1), df["label"], test_size=0.2, random_state=42
    )

    # Preprocess the data by scaling it
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Apply PCA
    pca = PCA(n_components=0.95).fit(X_train_scaled)
    X_train_pca = pca.transform(X_train_scaled)
    X_test_pca = pca.transform(X_test_scaled)
    # Initialize the SVM classifier
    # defining parameter range
    param_grid = {
        "C": [1e-4, 1e-2, 1, 1e2, 1e4],
        "gamma": [1e-6, 1e-3, 1, 1e3, 1e6],
        "kernel": ["rbf", "linear"],
    }

    grid = GridSearchCV(SVC(), param_grid, cv=5)
    # fitting the model for grid search
    grid.fit(X_train_pca, y_train)
    print("Best Parameters :", grid.best_params_)

    # Make predictions on the testing set
    y_pred = grid.predict(X_test_pca)

    # Evaluate the accuracy of the classifier
    accuracy = accuracy_score(y_test, y_pred)
    return "{:.2f}%".format(accuracy * 100)

In [None]:
SVM(df2)

Best Parameters : {'C': 0.01, 'gamma': 1e-06, 'kernel': 'linear'}


'99.50%'

## Use neighbors data too

In [None]:
df3 = df.drop("label", axis=1)
df3

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,92,93,94,95,96,97,98,99,100,101
0,854,601,350,266,138,118,178,194,257,269,...,3752,3759,3773,3779,3752,3690,3671,3664,3636,3643
1,527,642,575,294,123,168,207,154,209,299,...,3907,3873,3902,3921,3861,3854,3882,3834,3725,3768
2,374,322,179,87,169,268,360,339,286,309,...,4404,4443,4472,4428,4353,4306,4284,4318,4311,4321
3,706,520,560,572,425,243,271,272,258,276,...,3992,3972,4006,4032,3975,3946,3954,3944,3936,3939
4,1120,1027,592,414,407,463,417,365,332,334,...,4555,4502,4485,4479,4445,4364,4290,4268,4235,4272
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783635,361,505,389,230,239,336,345,317,327,333,...,2765,2770,2768,2743,2709,2675,2657,2640,2548,2498
783636,311,165,251,148,133,110,40,140,279,307,...,2182,2168,2155,2146,2155,2194,2197,2152,2051,2006
783637,592,589,659,540,492,466,318,242,241,246,...,2707,2726,2731,2769,2785,2753,2735,2688,2584,2541
783638,406,416,395,453,391,322,348,403,413,409,...,2789,2820,2847,2841,2846,2873,2905,2849,2714,2707


In [None]:
df4 = pd.DataFrame(columns=[i for i in range(9 * pavia.shape[2])])
df4

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,908,909,910,911,912,913,914,915,916,917


In [None]:
data_1_indexes = list(df[df["label"] == 1].index)
selected_1_indexes = list(df[df["label"] == 1].sample(2000).index)
data_8_indexes = list(df[df["label"] == 8].index)
selected_8_indexes = list(df[df["label"] == 8].sample(2000).index)

In [None]:
zeroes = np.zeros(pavia.shape[2])
for i in range(pavia.shape[0]):
    for j in range(pavia.shape[1]):
        if i
        indexes = []
        indexes.append((i-1)*pavia.shape[1]+j-1)
        indexes.append((i)*pavia.shape[1]-1+j)
        indexes.append((i-1)*pavia.shape[1]+j+1)
        indexes.append(i*pavia.shape[1]+j-1)
        indexes.append(i*pavia.shape[1]+j)
        indexes.append(i*pavia.shape[1]+j+1)
        indexes.append((i+1)*pavia.shape[1]+j-1)
        indexes.append((i+1)*pavia.shape[1]+j)
        indexes.append((i+1)*pavia.shape[1]+j+1)
        print(indexes)