In [11]:
import numpy as np
import pandas as pd

In [16]:
data=pd.read_csv('iris.csv')

In [17]:
data.drop('Id',axis=1,inplace=True)

In [45]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [18]:
data.describe()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [19]:
data['Species'].value_counts()

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64

In [20]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


In [21]:
X = data.drop(columns=['Species'])
y = data['Species']
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [32]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [33]:
models = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'SVM': SVC()
}

In [34]:
def evaluate_models(X_train, X_test, title=""):
    results = {}
    for name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        results[name] = {
            "Accuracy": accuracy_score(y_test, y_pred),
            "Precision": precision_score(y_test, y_pred, average='weighted'),
            "Recall": recall_score(y_test, y_pred, average='weighted'),
            "F1-Score": f1_score(y_test, y_pred, average='weighted'),
            "Confusion Matrix": confusion_matrix(y_test, y_pred)
        }
        print(f"\n--- {title} | {name} ---")
        print(classification_report(y_test, y_pred))
    return results

In [35]:
results_original = evaluate_models(X_train, X_test, "Original Data")


--- Original Data | Logistic Regression ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.90      0.90      0.90        10
 Iris-virginica       0.90      0.90      0.90        10

       accuracy                           0.93        30
      macro avg       0.93      0.93      0.93        30
   weighted avg       0.93      0.93      0.93        30


--- Original Data | Decision Tree ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.90      0.90      0.90        10
 Iris-virginica       0.90      0.90      0.90        10

       accuracy                           0.93        30
      macro avg       0.93      0.93      0.93        30
   weighted avg       0.93      0.93      0.93        30


--- Original Data | Random Forest ---
                 precision    recall  f1-score   support

    Iris-setosa

In [37]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

In [38]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
results_pca = evaluate_models(X_train_pca, X_test_pca, "PCA Reduced Data")


--- PCA Reduced Data | Logistic Regression ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.82      0.90      0.86        10
 Iris-virginica       0.89      0.80      0.84        10

       accuracy                           0.90        30
      macro avg       0.90      0.90      0.90        30
   weighted avg       0.90      0.90      0.90        30


--- PCA Reduced Data | Decision Tree ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       0.80      0.80      0.80        10
 Iris-virginica       0.80      0.80      0.80        10

       accuracy                           0.87        30
      macro avg       0.87      0.87      0.87        30
   weighted avg       0.87      0.87      0.87        30


--- PCA Reduced Data | Random Forest ---
                 precision    recall  f1-score   support

    Ir

In [40]:
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)
results_lda = evaluate_models(X_train_lda, X_test_lda, "LDA Reduced Data")


--- LDA Reduced Data | Logistic Regression ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30


--- LDA Reduced Data | Decision Tree ---
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00        10
 Iris-virginica       1.00      1.00      1.00        10

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30


--- LDA Reduced Data | Random Forest ---
                 precision    recall  f1-score   support

    Ir

In [41]:
def summarize_results(results_dict, title):
    df = pd.DataFrame.from_dict(results_dict, orient='index')
    return df.drop("Confusion Matrix", axis=1)

In [42]:
summary_original = summarize_results(results_original, "Original")
summary_pca = summarize_results(results_pca, "PCA")
summary_lda = summarize_results(results_lda, "LDA")

In [43]:
comparison_df = pd.concat([
    summary_original.add_suffix(" (Original)"),
    summary_pca.add_suffix(" (PCA)"),
    summary_lda.add_suffix(" (LDA)")
], axis=1)

In [44]:
print("\n=== Combined Comparison ===")
print(comparison_df.round(3))


=== Combined Comparison ===
                     Accuracy (Original)  Precision (Original)  \
Logistic Regression                0.933                 0.933   
Decision Tree                      0.933                 0.933   
Random Forest                      0.900                 0.902   
SVM                                0.967                 0.970   

                     Recall (Original)  F1-Score (Original)  Accuracy (PCA)  \
Logistic Regression              0.933                0.933           0.900   
Decision Tree                    0.933                0.933           0.867   
Random Forest                    0.900                0.900           0.900   
SVM                              0.967                0.967           0.900   

                     Precision (PCA)  Recall (PCA)  F1-Score (PCA)  \
Logistic Regression            0.902         0.900           0.900   
Decision Tree                  0.867         0.867           0.867   
Random Forest                  0.9