In [20]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import time
from utils import somCluster
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import KernelPCA

startTime = time.time()

# Loading Dataset and preprocessing

x = pd.read_csv('DataSet/indianPines_X.csv')
y = pd.read_csv('DataSet/indianPines_Y.csv')
print('Initial Data: ', x.shape[0])
sc = MinMaxScaler(feature_range=(0, 1))
x = sc.fit_transform(x)
nClasses = len(y.value_counts())

# Dimensionality Reduction
x = KernelPCA(n_components=15, eigen_solver='arpack', n_jobs=-1).fit_transform(x)
print('Reduction Done')
nFeatures = x.shape[1]
columns = [i for i in range(nFeatures)]

Initial Data:  21025
Reduction Done


In [21]:
# Splitting the data set and SOM

df = []
for i in range(nClasses):
    df.append(x[y['class'] == i])

X = []
Y = []
for i in range(nClasses):
    if i == 0:
        X = somCluster(df[i], nFeatures)
        Y = [i] * len(X)
    else:
        tp = somCluster(df[i], nFeatures)
        X = np.concatenate((X, tp), axis=0)
        Y = np.concatenate((Y, [i] * len(tp)), axis=0)

In [27]:

# Dataset without Outliers
X1 = pd.DataFrame(X, columns=columns)
Y1 = pd.DataFrame(Y, columns=['class'])
# Shape after Removing Outliers
print('After Outlier Reduction: ', X.shape[0])

# K-MEANS HOMOGENEOUS ##########################################################
df = pd.concat([X1, Y1], axis=1, join='inner')
df = df.sample(frac=1).reset_index(drop=True)
queue = [df]
final = []
# p = list(df['class'].value_counts().keys())
# print(p)
while len(queue) > 0:
    cluster = queue.pop(0)
    if len(cluster['class'].value_counts()) == 1:  # If homogeneous
        final.append(list(cluster.mean()))
    else:
        classCentroid = []
        classes = list(cluster['class'].value_counts().keys())
        for i in classes:
            centroid = list(cluster[cluster['class'] == i].mean())
            classCentroid.append(centroid)
        kMeans = KMeans(n_clusters=len(classes), init=np.array(classCentroid), n_init=1, max_iter=500)
        label = kMeans.fit_predict(cluster)
        for i in range(len(classes)):
            queue.append(cluster[label == i])

# Final Reduced Dataset
print('Final Training Dataset length: ', len(final))
final = np.array(final)
# print(final)

After Outlier Reduction:  19553
Final Training Dataset length:  49


In [33]:
from sklearn.neighbors import KNeighborsClassifier

# Training SVM
x_train = final[:, :-1]
y_train = final[:, -1]
# x_train = x
# y_train = np.ravel(y)
# SVM = SVC(kernel='rbf', gamma='scale',  cache_size=2000, decision_function_shape='ovo')
SVM = KNeighborsClassifier(n_neighbors=6)
SVM.fit(x_train, y_train)
yPredict = SVM.predict(x)
ac_svm = accuracy_score(y, yPredict) * 100
print('SVM Accuracy', ac_svm)
print('Runtime:', time.time() - startTime)

SVM Accuracy 41.835909631391196
Runtime: 471.10844922065735
