In [113]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [114]:
def classifier_comparison(X, y, multiclass=False, red_perf=False):
    x_train, x_test, y_train, y_test = train_test_split(X, y)

    knn = KNeighborsClassifier()
    lda = LinearDiscriminantAnalysis()
    
    classifiers = [knn, lda]

    results = []
    for classifier in classifiers:
        classifier.fit(x_train, y_train)
        predictions = classifier.predict(x_test)
        # Use weighted F1 score across multiclass inputs, otherwise default binary score
        if multiclass:
            results.append([accuracy_score(y_test, predictions), precision_score(y_test, predictions, average='weighted'), recall_score(y_test, predictions, average='weighted')])
        else:
            results.append([accuracy_score(y_test, predictions), precision_score(y_test, predictions), recall_score(y_test, predictions)])
                           
    return pd.DataFrame(results, ["KNN", "LDA"], ["Accuracy", "Precision", "Recall"])

In [115]:
iris = load_iris()
X_iris, y_iris = iris.data, iris.target
heart = pd.read_csv("../Datasets/heart_cleveland_upload.csv")
y_heart = heart["condition"]
X_heart = heart.drop("condition", axis=1)
stars = pd.read_csv("../Datasets/star_classification.csv")
y_stars = stars["class"]
X_stars = stars.drop("class", axis=1)

In [130]:
print(classifier_comparison(X_iris, y_iris, True))

     Accuracy  Precision    Recall
KNN  0.947368   0.955466  0.947368
LDA  0.947368   0.947368  0.947368


In [118]:
print(classifier_comparison(X_stars, y_stars, True, True))

     Accuracy  Precision   Recall
KNN   0.70860   0.699967  0.70860
LDA   0.84996   0.853551  0.84996


In [134]:
print(classifier_comparison(X_heart, y_heart))

     Accuracy  Precision    Recall
KNN  0.746667   0.722222  0.742857
LDA  0.853333   0.900000  0.771429
