In [None]:
### 1. Baseline KNN (no feature selection)###

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

distance_metrics = ['euclidean',  'cosine']
k = 5

for metric in distance_metrics:
    print(f"\nüîç Testing metric: {metric}")
    try:
        knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
        knn.fit(X_train, y_train)
        y_pred = knn.predict(X_test)






        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        recall = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)

        print(f"Accuracy:  {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall:    {recall:.4f}")
        print(f"F1 Score:  {f1:.4f}")


        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(2.5,2.5))
        sns.heatmap(cm, annot=True, fmt='g', cmap="Blues")
        plt.title(f"Confusion Matrix - {metric}")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.tight_layout()
        plt.show()

    except ValueError as e:
        print(f"‚ùå Error with metric '{metric}': {e}")

In [None]:
### 2. Baseline KNN (no feature selection, diffrent k)###
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import pandas as pd

# The range of values you want to test
distance_metrics = ['euclidean', 'cosine']
# The k values you want to test
k_values = [3, 5, 7, 9, 11]

# Save the results
results = {
    'k': [],
    'Metric': [],
    'Accuracy': [],
    'Precision': [],
    'Recall': [],
    'F1 Score': []
}

for k in k_values:
    for metric in distance_metrics:
        try:
            knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)

            # Save the results
            results['k'].append(k)
            results['Metric'].append(metric)
            results['Accuracy'].append(accuracy_score(y_test, y_pred))
            results['Precision'].append(precision_score(y_test, y_pred))
            results['Recall'].append(recall_score(y_test, y_pred))
            results['F1 Score'].append(f1_score(y_test, y_pred))

            # confusion matrix
            cm = confusion_matrix(y_test, y_pred)
            plt.figure(figsize=(2.5,2.5))
            sns.heatmap(cm, annot=True, fmt='g', cmap="Blues")
            plt.title(f"Confusion Matrix - k={k}, metric={metric}")
            plt.xlabel("Predicted")
            plt.ylabel("Actual")
            plt.tight_layout()
            plt.show()

        except ValueError as e:
            print(f"‚ùå Error with metric '{metric}': {e}")

#   to DataFrame
df_results = pd.DataFrame(results)
print(df_results)

# üìäcompre diagramm
plt.figure(figsize=(10, 6))
for metric in ['Accuracy', 'Precision', 'Recall', 'F1 Score']:
    for dist in distance_metrics:
        subset = df_results[df_results['Metric'] == dist]
        plt.plot(subset['k'], subset[metric], marker='o', label=f"{metric} - {dist}")

plt.title('KNN-Leistung mit verschiedenen Distanzmetriken und k-Werten')
plt.xlabel('k (Anzahl der Nachbarn)')
plt.ylabel('Bewertung')
plt.ylim(0.5, 1.05)
plt.legend()
plt.grid(True)
plt.show()



In [None]:
### 3. KNN with Mutual Information Feature Selection ###
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

#Different values of k for the KNN algorithm
k_values = [3, 5, 7, 9, 11]
metrics = ["euclidean", "cosine"]

results = {
    "k": [],
    "Metric": [],
    "Accuracy": [],
    "Precision": [],
    "Recall": [],
    "F1-Score": []
}

for k in k_values:
    for metric in metrics:
        pipe_kbest = Pipeline([
            ("scaler", StandardScaler()),
            ("kbest", SelectKBest(score_func=mutual_info_classif, k=10)),  # ÿ™ÿπÿØÿßÿØ ŸÅ€å⁄Üÿ± ÿßŸÜÿ™ÿÆÿßÿ®€å
            ("knn", KNeighborsClassifier(n_neighbors=k, metric=metric))
        ])

        # Train
        pipe_kbest.fit(X_train, y_train)

        # Predict
        y_pred = pipe_kbest.predict(X_test)

        #calculate the Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="weighted", zero_division=0)
        rec = recall_score(y_test, y_pred, average="weighted", zero_division=0)
        f1 = f1_score(y_test, y_pred, average="weighted", zero_division=0)

        results["k"].append(k)
        results["Metric"].append(metric)
        results["Accuracy"].append(acc)
        results["Precision"].append(prec)
        results["Recall"].append(rec)
        results["F1-Score"].append(f1)

        # Confusion Matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(3,3))
        sns.heatmap(cm, annot=True, fmt='g', cmap="Blues")
        plt.xlabel("Predicted")
        plt.ylabel("Actual")
        plt.title(f"Confusion Matrix - k={k}, metric={metric}")
        plt.tight_layout()
        plt.show()
#tables
df_results = pd.DataFrame(results)
print(df_results)
