In [None]:
# analysis.ipynb - kodlar .py formatinda burada sunulmustur

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, classification_report
from knn import KNN  # kendi yazdığımız sınıf

# Veri seti yükleme
column_names = ['Class', 'Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium',
                'Total phenols', 'Flavanoids', 'Nonflavanoid phenols', 'Proanthocyanins',
                'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']

file_path = "wine.data"  # aynı dizindeyse

df = pd.read_csv(file_path, header=None, names=column_names)

# Görselleştirme
selected_features = ['Alcohol', 'Flavanoids', 'Color intensity', 'Hue', 'Proline']
sns.pairplot(df[selected_features + ['Class']], hue="Class", diag_kind="kde")
plt.suptitle("Selected Feature Distributions", y=1.02)
plt.show()

# Preprocessing
X = df.drop("Class", axis=1)
y = df["Class"]
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# K vs Accuracy - 3 distance metric
ks = [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]
metrics = ["euclidean", "manhattan", "minkowski"]
results = {}

for metric in metrics:
    accs = []
    for k in ks:
        model = KNN(k=k, distance_metric=metric)
        model.fit(X_train, y_train)
        preds = model.predict(X_test)
        acc = np.mean(preds == y_test)
        accs.append(acc)
        print(f"[{metric.upper()}] k={k} -> Acc: {acc:.3f}")
    results[metric] = accs

# Accuracy plot
display_labels = {"euclidean": "Euclidean", "manhattan": "Manhattan", "minkowski": "Minkowski (p=3)"}
plt.figure(figsize=(10,6))
for metric in metrics:
    plt.plot(ks, results[metric], marker='o', label=display_labels[metric])
plt.xlabel("K Value")
plt.ylabel("Accuracy")
plt.title("Accuracy vs K for Different Distance Metrics")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Confusion Matrix + Report visualization
best_k = 5
for metric in metrics:
    print(f"\n>>> Metric: {metric.upper()} | K = {best_k}")
    model = KNN(k=best_k, distance_metric=metric)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    cm = confusion_matrix(y_test, preds)
    report = classification_report(y_test, preds, output_dict=True)
    report_df = pd.DataFrame(report).transpose().drop(['accuracy', 'macro avg', 'weighted avg'])

    # Confusion Matrix Plot
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[1,2,3], yticklabels=[1,2,3])
    plt.title(f"Confusion Matrix ({metric.title()})")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.show()

    # Classification Report Plot
    plt.figure(figsize=(8,3))
    sns.heatmap(report_df.iloc[:, :-1], annot=True, cmap="YlGnBu", fmt=".2f")
    plt.title(f"Classification Report ({metric.title()})")
    plt.tight_layout()
    plt.show()
