# Classification

This page shows how I classified the Raman spectral data after denoised.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import normalize

## Import pre-processed data



In [None]:
all_denoised_data = pd.read_csv('___________', index_col=0)
all_denoised_data

In [None]:
X = all_denoised_data.drop(columns='diagnosis')
Y = all_denoised_data['diagnosis']

# Normalization
X = normalize(X)

# Scale the features to have zero mean and standard devisation of 1
X = StandardScaler().fit_transform(X)

## Classification

In [None]:
from sklearn.decomposition import PCA
#Fit the spectral data and extract the explained variance ratio
pca1 = PCA(n_components=________)
trained_pca = pca1.fit_transform(X)
var1 = pca1.explained_variance_ratio_


In [None]:
# plot explained variance of PC
fig, ax = plt.subplots()
ax.plot(var1, label="Explained Variance %")
ax.plot(np.cumsum(var1), label = 'Cumulative variance %')
ax.set_xlabel("PC number")
ax.set_title('Spectral data')

plt.legend()
plt.show()

In [None]:
pca_result = pd.DataFrame(data=trained_pca)
pca_result['Cluster']=________

In [None]:
# plot PCA in 2D
p = sns.scatterplot(x=_________, y=_________, hue=pca_result['Cluster'])

In [None]:
# plot PCA in 3D
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

mpl.rcParams['figure.figsize'] = 10, 8
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')

pca_neg = pca_result.loc[pca_result['Cluster']=='N']
x1 = pca_neg[0]
y1 = pca_neg[1]
z1 = pca_neg[2]
pca_pos = pca_result.loc[pca_result['Cluster']=='P']
x2 = pca_pos[0]
y2 = pca_pos[1]
z2 = pca_pos[2]

ax.scatter(x1,y1,z1)
ax.scatter(x2,y2,z2)

# label the axes
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_zlabel("PC3")
ax.set_title("PCA on the depparaffined Raman Spectral data set")
ax.view_init(elev=20., azim=-45)

plt.show()

In [None]:
# Define x and y
x = pca_result.drop(columns='Cluster')

#Encoding categorical data values
from sklearn.preprocessing import LabelEncoder
labelencoder_Y = LabelEncoder()
y = labelencoder_Y.fit_transform(Y)

In [None]:
# split train data and test data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = __________, random_state=0)
x_train.shape

#### Supoort Vector Machine

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV 
  
# defining parameter range 
param_grid = {'C': [________________], 'gamma': [__________________-]}    

grid = GridSearchCV(estimator=SVC(kernel=__________), param_grid=param_grid, verbose=3) 
  
# fitting the model for grid search 
grid.fit(________, _________) 


In [None]:
# print best parameter after tuning 
print(grid.best_params_) 
  
# print how our model looks after hyper-parameter tuning 
print(grid.best_estimator_) 

In [None]:
grid_pred = grid.predict(________)

In [None]:
print(grid.score(_______, _______))

In [None]:
pip install mlxtend 

In [None]:
# plot condusion matrix
from sklearn.metrics import confusion_matrix
from mlxtend.plotting import plot_confusion_matrix
matrix = confusion_matrix(_______, ________)
fig, ax = plot_confusion_matrix(conf_mat=matrix)
plt.show()

|             | Predicted: Neg | Predicted: Pos |
|-------------|-------------------|-----------------------|
| Actual: Neg | True Negative (TN) | False Positive (FP)  |
| Actual: Pos | False Negative (FN) | True Positive (TP)   |

The predicted classes are represented in the columns of the matrix, whereas the actual classes are in the rows of the matrix. We then have four cases:

* True Negative (TN): the classifier predicted negative and the samples actually did not infect CWD.
* True Positive (TP): the classifier predicted postive and the samples actually infected CWD.
* False Negatives (FN): the classifier predicted negative but the samples actually infected CWD.
* False Positives (FP): tthe classifier predicted positive but the samples actually did not infect CWD.

#### Neural Network

In [None]:
pip install keras

In [None]:
pip install tensorflow

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd

In [None]:
# create model
def create_model():
    model = Sequential()
    model.add(Dense(_____, input_dim=______, activation='relu'))
    _________________
    _________________
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
    return model


In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0)
model.fit(_______, _______, batch_size=_____, epochs=_____, validation_data=(______, ______))

In [None]:
# evaluate the keras model
score = model.score(_________, _________)
print(score)

In [None]:
# plot confusion matrix
y_pred=model.predict(________)
matrix = confusion_matrix(_______, _______)
fig, ax = plot_confusion_matrix(conf_mat=matrix)
plt.show()