In [1]:
#Importing the required libraries
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from itertools import combinations

In [2]:
# Load the dataset
df = pd.read_csv("iris.csv",names=["Sepal Length","Sepal Width","Petal Length", "Petal Width", "Class"])

In [3]:
df

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width,Class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [4]:
#Shape of the dataset
df.shape

(150, 5)

In [5]:
# Check and Display for missing values
missing_values = df.isnull().sum()
print("Missing Values:\n",missing_values)
#There is no missing values

Missing Values:
 Sepal Length    0
Sepal Width     0
Petal Length    0
Petal Width     0
Class           0
dtype: int64


In [6]:
#Feature variables (Sepal Length, Sepal Width, Petal Length and Petal Width)
X = df.drop("Class", axis=1)

In [7]:
X

Unnamed: 0,Sepal Length,Sepal Width,Petal Length,Petal Width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [8]:
#Target variable (Class)
y = df["Class"]

In [9]:
y

0         Iris-setosa
1         Iris-setosa
2         Iris-setosa
3         Iris-setosa
4         Iris-setosa
            ...      
145    Iris-virginica
146    Iris-virginica
147    Iris-virginica
148    Iris-virginica
149    Iris-virginica
Name: Class, Length: 150, dtype: object

In [10]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [11]:
# Initialize Linear Discriminant Analysis
lda = LinearDiscriminantAnalysis()

In [12]:
# Iterate over each pair of classes
class_pairs = list(combinations(set(y_train), 2))
for class_pair in class_pairs:
    # Filter the data for the current pair of classes
    X_pair = X_train[(y_train == class_pair[0]) | (y_train == class_pair[1])]
    y_pair = y_train[(y_train == class_pair[0]) | (y_train == class_pair[1])]
    
    # Train the LDA model
    lda.fit(X_pair, y_pair)
    
    # Predict on the test set
    y_pred = lda.predict(X_test)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)
    
    # Report the results
    print(f"Accuracy for classes {class_pair}: {accuracy}")

Accuracy for classes ('Iris-setosa', 'Iris-versicolor'): 0.7111111111111111
Accuracy for classes ('Iris-setosa', 'Iris-virginica'): 0.7111111111111111
Accuracy for classes ('Iris-versicolor', 'Iris-virginica'): 0.5555555555555556


In [13]:
# Calculating the class means, variance ratio and coefficients
for i in range(len(set(y))):
    for j in range(i + 1, len(set(y))):
        class_1 = list(set(y))[i]
        class_2 = list(set(y))[j]
        
        # Filter data for the current pair of classes
        X_pair = X[(y == class_1) | (y == class_2)]
        y_pair = y[(y == class_1) | (y == class_2)]
        
        # Fit LDA
        lda.fit(X_pair, y_pair)
        
        # Report the results
        print(f"Pair: {class_1} vs {class_2}")
        print(f"Class means: {lda.means_}")
        print(f"Explained variance ratio: {lda.explained_variance_ratio_}")
        print("Coefficients:")
        for class_, coef in zip(lda.classes_, lda.coef_):
            print(f"{class_}: {coef}")
        print()

Pair: Iris-setosa vs Iris-versicolor
Class means: [[5.006 3.418 1.464 0.244]
 [5.936 2.77  4.26  1.326]]
Explained variance ratio: [1.]
Coefficients:
Iris-setosa: [ -2.8464883  -18.41369035  21.23175795  32.58985887]

Pair: Iris-setosa vs Iris-virginica
Class means: [[5.006 3.418 1.464 0.244]
 [6.588 2.974 5.552 2.026]]
Explained variance ratio: [1.]
Coefficients:
Iris-setosa: [-15.68234004 -12.32047312  36.37394104  37.31684345]

Pair: Iris-versicolor vs Iris-virginica
Class means: [[5.936 2.77  4.26  1.326]
 [6.588 2.974 5.552 2.026]]
Explained variance ratio: [1.]
Coefficients:
Iris-versicolor: [-3.55630269 -5.57862064  6.97012768 12.38604115]

