In [None]:
import numpy as np
from PIL import Image
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree

data_folder = '/Users/mustafacayli/Desktop/veri madenciligi/DM_dataset_hw1'

all_images = []
labels = []

for folder_name in os.listdir(data_folder):
    if folder_name != '.DS_Store':
        label = folder_name
        for image_file in os.listdir(os.path.join(data_folder, folder_name)):
            if image_file.endswith('.bmp'):
                img_path = os.path.join(data_folder, folder_name, image_file)
                img = Image.open(img_path)
                img = img.resize((28, 28)) 
                img = np.array(img)
                img = img.flatten() 
                all_images.append(img)
                labels.append(label)

fixed_images = []
for img in all_images:
    if img.shape != (28, 28):
        img = np.array(Image.fromarray(img).resize((28, 28)))
    fixed_images.append(img)

X = np.array(fixed_images)

num_samples, height, width = X.shape
X = X.reshape(num_samples, height * width)

y = np.array(labels)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

k_values = [3, 5, 7]
metrics = ['euclidean', 'manhattan', 'chebyshev']

for k in k_values:
    for metric in metrics:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
        knn.fit(X_train, y_train)
        try:
            y_pred = knn.predict(X_test)
            
            accuracy = accuracy_score(y_test, y_pred)
            print(f"KNN (k={k}, metric={metric}) - Accuracy: {accuracy}")
            
            cm = confusion_matrix(y_test, y_pred)
            plt.figure(figsize=(8, 6))
            sns.heatmap(cm, annot=True, cmap='Blues', fmt='d')
            plt.title(f"Confusion Matrix - KNN (k={k}, metric={metric})")
            plt.xlabel('Predicted Labels')
            plt.ylabel('True Labels')
            plt.show()
        except ValueError as e:
            print(f"KNN (k={k}, metric={metric}) - Error: {e}")



dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

plt.figure(figsize=(12, 8))
plot_tree(dt, filled=True, feature_names=[str(i) for i in range(X_train.shape[1])], class_names=dt.classes_)
plt.title('Decision Tree')
plt.show()

dt_entropy = DecisionTreeClassifier(criterion='entropy')
dt_entropy.fit(X_train, y_train)
y_pred_dt_entropy = dt_entropy.predict(X_test)

dt_gini = DecisionTreeClassifier(criterion='gini')
dt_gini.fit(X_train, y_train)
y_pred_dt_gini = dt_gini.predict(X_test)

nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)

svm = SVC()
svm.fit(X_train, y_train)
y_pred_svm = svm.predict(X_test)

accuracy_dt_entropy = accuracy_score(y_test, y_pred_dt_entropy)
precision_dt_entropy = precision_score(y_test, y_pred_dt_entropy, average='weighted')
recall_dt_entropy = recall_score(y_test, y_pred_dt_entropy, average='weighted')
f1_dt_entropy = f1_score(y_test, y_pred_dt_entropy, average='weighted')

accuracy_dt_gini = accuracy_score(y_test, y_pred_dt_gini)
precision_dt_gini = precision_score(y_test, y_pred_dt_gini, average='weighted')
recall_dt_gini = recall_score(y_test, y_pred_dt_gini, average='weighted')
f1_dt_gini = f1_score(y_test, y_pred_dt_gini, average='weighted')

accuracy_nb = accuracy_score(y_test, y_pred_nb)
precision_nb = precision_score(y_test, y_pred_nb, average='weighted')
recall_nb = recall_score(y_test, y_pred_nb, average='weighted')
f1_nb = f1_score(y_test, y_pred_nb, average='weighted')

accuracy_svm = accuracy_score(y_test, y_pred_svm)
precision_svm = precision_score(y_test, y_pred_svm, average='weighted')
recall_svm = recall_score(y_test, y_pred_svm, average='weighted')
f1_svm = f1_score(y_test, y_pred_svm, average='weighted')

print("Decision Tree (Entropy) - Accuracy:", accuracy_dt_entropy)
print("Decision Tree (Entropy) - Precision:", precision_dt_entropy)
print("Decision Tree (Entropy) - Recall:", recall_dt_entropy)
print("Decision Tree (Entropy) - F1 Score:", f1_dt_entropy)

print("Decision Tree (Gini) - Accuracy:", accuracy_dt_gini)
print("Decision Tree (Gini) - Precision:", precision_dt_gini)
print("Decision Tree (Gini) - Recall:", recall_dt_gini)
print("Decision Tree (Gini) - F1 Score:", f1_dt_gini)

print("Naive Bayes - Accuracy:", accuracy_nb)
print("Naive Bayes - Precision:", precision_nb)
print("Naive Bayes - Recall:", recall_nb)
print("Naive Bayes - F1 Score:", f1_nb)

print("SVM - Accuracy:", accuracy_svm)
print("SVM - Precision:", precision_svm)
print("SVM - Recall:", recall_svm)
print("SVM - F1 Score:", f1_svm)