In [6]:
import pandas as pd
import time
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [7]:
def load_emnist_data():
    df_emnist_train = pd.read_csv("data/emnist-balanced-train.csv")
    df_emnist_test = pd.read_csv("data/emnist-balanced-test.csv")
    return df_emnist_train, df_emnist_test

# Testar vilket antal träd som ger bäst träffsäkerhet.

In [8]:
def train_RF():
    df_emnist_train, df_emnist_test = load_emnist_data()

    X_train = df_emnist_train.drop(columns=['label'])
    y_train = df_emnist_train['label']
    X_test = df_emnist_test.drop(columns=['label'])
    y_test = df_emnist_test['label']

    n_estimators_options = [50, 100, 200]
    best_accuracy = 0
    best_n_estimators = 0
    best_rf_model = None
    
    for n_estimators in n_estimators_options:
        start_time = time.time()  # Starta tidtagningen 1
        rf = RandomForestClassifier(n_estimators=n_estimators)
        rf.fit(X_train, y_train)
        end_time = time.time()  # Stoppa tidtagningen 1

        start_time_pred = time.time() # Starta tidtagningen 2
        y_pred = rf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        end_tme_pred = time.time() # Stoppa tidtagningen 2

        training_time = end_time - start_time  # Beräkna tidenerna
        pred_time = end_tme_pred - start_time_pred
        total_time = training_time + pred_time

        print(f"RF Model with n_estimators = {n_estimators}, Accuracy: {accuracy:.3f}")
        print(f'Training Time: {training_time:.2f} seconds')
        print(f'Prediction Time: {pred_time:.2f} seconds')
        print(f'Total Time: {total_time:.2f} seconds')
        print(f'--------------------------------------------------------------------')
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_n_estimators = n_estimators
            best_rf_model = rf
    
    print(f"Best RF Model n_estimators: {best_n_estimators}")
    print(f"Best RF Model Accuracy: {best_accuracy:.3f}")
    
    return best_rf_model

# Testar vilket värde på k som ger bäst träffsäkerhet.

In [9]:
def train_KNN():
    df_emnist_train, df_emnist_test = load_emnist_data()

    X_train = df_emnist_train.drop(columns=['label'])
    y_train = df_emnist_train['label']
    X_test = df_emnist_test.drop(columns=['label'])
    y_test = df_emnist_test['label']

    n_neighbors_options = [3, 5, 7, 9]
    best_accuracy = 0
    best_n_neighbors = 0
    best_knn_model = None
    
    for n_neighbors in n_neighbors_options:
        start_time = time.time()  # Starta tidtagningen 1
        knn = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn.fit(X_train, y_train)
        end_time = time.time()  # Stoppa tidtagningen 1

        start_time_pred = time.time() # Starta tidtagningen 2
        y_pred = knn.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        end_tme_pred = time.time() # Stoppa tidtagningen 2

        training_time = end_time - start_time  # Beräkna tidenerna
        pred_time = end_tme_pred - start_time_pred
        total_time = training_time + pred_time

        print(f"KNN Model with n_neighbors = {n_neighbors}, Accuracy: {accuracy:.3f}")
        print(f'Training Time: {training_time:.2f} seconds')
        print(f'Prediction Time: {pred_time:.2f} seconds')
        print(f'Total Time: {total_time:.2f} seconds')
        print(f'--------------------------------------------------------------------')
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_n_neighbors = n_neighbors
            best_knn_model = knn
    
    print(f"Best KNN Model n_neighbors: {best_n_neighbors}")
    print(f"Best KNN Model Accuracy: {best_accuracy:.3f}")
    
    return best_knn_model

In [10]:
best_rf_model = train_RF()

RF Model with n_estimators = 50, Accuracy: 0.803
Training Time: 56.85 seconds
Prediction Time: 0.50 seconds
Total Time: 57.34 seconds
--------------------------------------------------------------------
RF Model with n_estimators = 100, Accuracy: 0.813
Training Time: 117.38 seconds
Prediction Time: 1.09 seconds
Total Time: 118.47 seconds
--------------------------------------------------------------------
RF Model with n_estimators = 200, Accuracy: 0.817
Training Time: 235.60 seconds
Prediction Time: 2.16 seconds
Total Time: 237.76 seconds
--------------------------------------------------------------------
Best RF Model n_estimators: 200
Best RF Model Accuracy: 0.817


In [11]:
best_knn_model = train_KNN()

KNN Model with n_neighbors = 3, Accuracy: 0.780
Training Time: 0.25 seconds
Prediction Time: 41.21 seconds
Total Time: 41.46 seconds
--------------------------------------------------------------------
KNN Model with n_neighbors = 5, Accuracy: 0.785
Training Time: 0.30 seconds
Prediction Time: 50.57 seconds
Total Time: 50.87 seconds
--------------------------------------------------------------------
KNN Model with n_neighbors = 7, Accuracy: 0.784
Training Time: 0.27 seconds
Prediction Time: 49.54 seconds
Total Time: 49.82 seconds
--------------------------------------------------------------------
KNN Model with n_neighbors = 9, Accuracy: 0.782
Training Time: 0.31 seconds
Prediction Time: 48.94 seconds
Total Time: 49.26 seconds
--------------------------------------------------------------------
Best KNN Model n_neighbors: 5
Best KNN Model Accuracy: 0.785
