jupyter | ||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
LDA is a widely used dimensionality reduction technique built on Fisher’s linear discriminant.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
raw_data = pd.read_csv('data/A_multivariate_study_of_variation_in_two_species_of_rock_crab_of_genus_Leptograpsus.csv')
data = raw_data.rename(columns={
'sp': 'Species',
'sex': 'Sex',
'index': 'Index',
'FL': 'Frontal Lobe',
'RW': 'Rear Width',
'CL': 'Carapace Midline',
'CW': 'Maximum Width',
'BD': 'Body Depth'})
data['Species'] = data['Species'].map({'B':'Blue', 'O':'Orange'})
data['Sex'] = data['Sex'].map({'M':'Male', 'F':'Female'})
data['Class'] = data.Species + data.Sex
data_columns = ['Frontal Lobe',
'Rear Width',
'Carapace Midline',
'Maximum Width',
'Body Depth']
# generate a class variable for all 4 classes
data['Class'] = data.Species + data.Sex
print(data['Class'].value_counts())
data.head(5)
# normalize data columns
data_norm = data.copy()
data_norm[data_columns] = MinMaxScaler().fit_transform(data[data_columns])
data_norm.describe().T
no_components = 2
lda = LinearDiscriminantAnalysis(n_components = no_components)
data_lda = lda.fit_transform(data_norm[data_columns].values , y=data_norm['Class'])
data_norm[['LDA1', 'LDA2']] = data_lda
data_norm.head(1)
Species | Sex | Index | Frontal Lobe | Rear Width | Carapace Midline | Maximum Width | Body Depth | Class | LDA1 | LDA2 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | Blue | Male | 1 | 0.056604 | 0.014599 | 0.042553 | 0.050667 | 0.058065 | BlueMale | 1.538869 | -0.808137 |
fig = plt.figure(figsize=(10, 8))
sns.scatterplot(x='LDA1', y='LDA2', hue='Class', data=data_norm)
no_components = 3
lda = LinearDiscriminantAnalysis(n_components = no_components)
data_lda = lda.fit_transform(data_norm[data_columns].values , y=data_norm['Class'])
data_norm[['LDA1', 'LDA2', 'LDA3']] = data_lda
data_norm.head(1)
Species | Sex | Index | Frontal Lobe | Rear Width | Carapace Midline | Maximum Width | Body Depth | Class | LDA1 | LDA2 | LDA3 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Blue | Male | 1 | 0.056604 | 0.014599 | 0.042553 | 0.050667 | 0.058065 | BlueMale | 1.538869 | -0.808137 | 1.18642 |
class_colours = {
'BlueMale': '#0027c4', #blue
'BlueFemale': '#f18b0a', #orange
'OrangeMale': '#0af10a', # green
'OrangeFemale': '#ff1500', #red
}
colours = data_norm['Class'].apply(lambda x: class_colours[x])
x=data_norm.LDA1
y=data_norm.LDA2
z=data_norm.LDA3
fig = plt.figure(figsize=(10,10))
plt.title('Linear Discriminant Analysis')
ax = fig.add_subplot(projection='3d')
ax.scatter(xs=x, ys=y, zs=z, s=50, c=colours)