In [672]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [673]:
X = np.array([[4,2],[2,4],[2,3],[3,6],[4,4],[9,10],[6,8],[9,5],[8,7],[10,8]])
X

array([[ 4,  2],
       [ 2,  4],
       [ 2,  3],
       [ 3,  6],
       [ 4,  4],
       [ 9, 10],
       [ 6,  8],
       [ 9,  5],
       [ 8,  7],
       [10,  8]])

In [674]:
y = np.array([1,1,1,1,1,2,2,2,2,2])
y

array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])

In [675]:
lda = LinearDiscriminantAnalysis(n_components=1)
lda.fit(X, y)

LinearDiscriminantAnalysis(n_components=1, priors=None, shrinkage=None,
              solver='svd', store_covariance=False, tol=0.0001)

In [676]:
lda.transform(X)

array([[-2.34990154],
       [-3.09778091],
       [-3.41522611],
       [-1.77150562],
       [-1.71501114],
       [ 3.64658447],
       [ 0.93753942],
       [ 2.05935848],
       [ 2.00286399],
       [ 3.70307896]])

In [677]:
lda.get_params()

{'n_components': 1,
 'priors': None,
 'shrinkage': None,
 'solver': 'svd',
 'store_covariance': False,
 'tol': 0.0001}

In [678]:
lda.predict([[4,2]])

array([1])

In [679]:
lda.predict(X)

array([1, 1, 1, 1, 1, 2, 2, 2, 2, 2])

In [680]:
lda.explained_variance_ratio_

array([ 1.])

# Example with wine quality prediction using LDA

In [681]:
from sklearn.datasets import load_wine

In [682]:
data = load_wine()

In [683]:
# data.head()

In [684]:
X_wine = pd.DataFrame(data.data, columns=data.feature_names)
X_wine.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [685]:
X_wine.shape

(178, 13)

In [686]:
y = data.target
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [687]:
wine_lda= LinearDiscriminantAnalysis(n_components=2)

In [688]:
X_lda = wine_lda.fit_transform(X_wine, y)

In [689]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, mean_absolute_error, accuracy_score

## without LDA

In [690]:
#X_train, X_test, y_train, y_test = train_test_split(X_wine, y, random_state=1)

## with LDA

In [691]:
X_train, X_test, y_train, y_test = train_test_split(X_lda, y, random_state=1)

In [692]:
dec_tree = DecisionTreeClassifier()
dec_tree.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')

In [693]:
y_pred = dec_tree.predict(X_test)
y_pred

array([2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 1, 2,
       1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0])

In [694]:
y_test

array([2, 1, 0, 1, 0, 2, 1, 0, 2, 1, 0, 0, 1, 0, 1, 1, 2, 0, 1, 0, 0, 1, 2,
       1, 0, 2, 0, 0, 0, 2, 1, 2, 2, 0, 1, 1, 1, 1, 1, 0, 0, 1, 2, 0, 0])

In [695]:
confusion_matrix(y_test, y_pred)

array([[18,  0,  0],
       [ 0, 17,  0],
       [ 0,  0, 10]])

In [696]:
mean_absolute_error(y_test, y_pred)

0.0

In [697]:
ac = accuracy_score(y_test, y_pred)

In [None]:
print("Final Accuracy : ",ac * 100, '%')

('Final Accuracy : ', 100.0, '%')


In [None]:
sns.pairplot(X_wine, palette="husl")

In [None]:
load_iris = sns.load_dataset('iris')

In [None]:
sns.pairplot(load_iris, hue="species")