## Problem 1

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report




In [2]:
data_path = Path("./wine/wine.data")

columns = [
    "Class",
    "Alcohol",
    "Malic acid",
    "Ash",
    "Alcalinity of ash",
    "Magnesium",
    "Total phenols",
    "Flavanoids",
    "Nonflavanoid phenols",
    "Proanthocyanins",
    "Color intensity",
    "Hue",
    "OD280/OD315 of diluted wines",
    "Proline",
]

df = pd.read_csv(data_path, header=None, names=columns)
df.head()

Unnamed: 0,Class,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


In [3]:
X = df.drop(columns=["Class"])
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=598, stratify=y
)

Part A:

In [9]:
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
mlog = LogisticRegression(
    multi_class="multinomial", solver="lbfgs", max_iter=5000, random_state=598
)

lda.fit(X_train, y_train)
qda.fit(X_train, y_train)
mlog.fit(X_train, y_train)

models = {
    "LDA": lda,
    "QDA": qda,
    "Multinomial Logistic": mlog
}


## Part B:

In [10]:
rows = []
for name, model in models.items():
    y_pred_tr = model.predict(X_train)
    y_pred_te = model.predict(X_test)
    rows.append({
        "Model": name,
        "Train Accuracy": accuracy_score(y_train, y_pred_tr),
        "Test Accuracy": accuracy_score(y_test, y_pred_te)
    })

acc_df = pd.DataFrame(rows)
acc_df


Unnamed: 0,Model,Train Accuracy,Test Accuracy
0,LDA,1.0,1.0
1,QDA,1.0,1.0
2,Multinomial Logistic,1.0,0.962963


In [7]:
labels = [1, 2, 3]

for name, model in models.items():
    print(f"\n=== {name} ===")
    cm_train = pd.DataFrame(
        confusion_matrix(y_train, model.predict(X_train), labels=labels),
        index=[f"True_{c}" for c in labels],
        columns=[f"Pred_{c}" for c in labels]
    )
    print("\nTrain:")
    display(cm_train)

    cm_test = pd.DataFrame(
        confusion_matrix(y_test, model.predict(X_test), labels=labels),
        index=[f"True_{c}" for c in labels],
        columns=[f"Pred_{c}" for c in labels]
    )
    print("Test:")
    display(cm_test)



=== LDA ===

Train:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,41,0,0
True_2,0,50,0
True_3,0,0,33


Test:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,18,0,0
True_2,0,21,0
True_3,0,0,15



=== QDA ===

Train:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,41,0,0
True_2,0,50,0
True_3,0,0,33


Test:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,18,0,0
True_2,0,21,0
True_3,0,0,15



=== Multinomial Logistic ===

Train:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,40,1,0
True_2,0,50,0
True_3,0,0,33


Test:


Unnamed: 0,Pred_1,Pred_2,Pred_3
True_1,17,1,0
True_2,0,20,1
True_3,0,0,15


## Part C:

All three models LDA, QDA, and Multinomial Logistic Regression performed extremely well on the Wine dataset. LDA and QDA both achieved 100% accuracy on the training and testing data, while the logistic regression model reached 100% on training and about 96% on testing. This shows that the dataset is highly separable, with clear differences between the three wine classes. The misclassifications from logistic regression occurred between Class 1 and Class 2, while Class 3 was predicted perfectly by all models. Overall, LDA and QDA performed best with no misclassifications and better accuracy.