# Evaluating LabelAId with Project Sidewalk Data

This notebook is evaluating the LabelAId model with the Project Sidewalk data. LabelAId is compared to other baseline models including XGBoost, Random Forest, and Logistic Regression. The evaluation metrics include accuracy, precision, recall, and F1 score.

In [1]:
import math
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import cross_validate

from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import train_test_split
from sklearn import metrics # Import train_test_split function
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import resample

from imblearn.under_sampling import RandomUnderSampler

import matplotlib.pyplot as plt

import xgboost as xgb

In [None]:
# Load the dataset
df_train = pd.read_csv('data/test_set_seattle_encoded.csv')

label_counts = df_train['label_type'].value_counts()

print(label_counts)

10/20/50/100/200 of each label_type

In [None]:
df_train = pd.read_csv("data/labelModel_outputs_seattle_encoded.csv")
df_test = pd.read_csv('data/test_set_seattle_encoded.csv')
feature_cols = ['label_CurbRamp', 'label_NoCurbRamp', 'label_NoSidewalk', 'label_Obstacle', 'label_SurfaceProblem', 
                'severity', 'zoom', 'clustered', 'distance_to_road', 'distance_to_intersection', 'tag', 'description', 
                'way_living_street', 'way_primary', 'way_residential', 'way_secondary', 'way_tertiary', 'way_trunk', 'way_unclassified']

LABEL = 'verified'

# Separate majority and minority classes
df_majority = df_train[df_train[LABEL]==1]
df_minority = df_train[df_train[LABEL]==0]

# Downsample majority class
df_majority_downsampled = resample(df_majority, 
                                    replace=False,    # sample without replacement
                                    n_samples=len(df_minority),     # to match minority class
                                    random_state=14) # reproducible results
 
# Combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])

# samples = [10, 20, 50, 100, 200]
samples = [5, 10, 25, 50, 100]
random_states = [14, 48, 69]
results = []


for n in samples:
    for random_state in random_states:
        # print(n)
        # df_train_shuffled = df_train.sample(frac=1, random_state=random_state)
        train_df_verified = df_test[df_test['verified'] == 1].groupby('label_type').apply(lambda x: x.sample(n=n, random_state=random_state)).reset_index(drop=True)
        train_df_not_verified = df_test[df_test['verified'] == 0].groupby('label_type').apply(lambda x: x.sample(n=n, random_state=random_state)).reset_index(drop=True)

        df_temp = pd.concat([train_df_verified, train_df_not_verified])
        # train_df = df_train_shuffled.groupby('label_type').apply(lambda x: x.sample(n=n, random_state=1)).reset_index(drop=True)
        train_df = pd.concat([df_temp, df_downsampled])

        test_df = pd.concat([df_test, df_temp]).drop_duplicates(keep=False)
        # test_df = df_remaining.groupby('label_type').apply(lambda x: x.sample(n=n, random_state=1)).reset_index(drop=True)

        X_train = train_df[feature_cols]
        y_train = train_df['verified']
        # print(y_train.value_counts())

        X_test = test_df[feature_cols]
        y_test = test_df['verified']

        # clf = LogisticRegression(C= 0.1, penalty="l1", solver="liblinear").fit(X_train, y_train)
        # clf = MLPClassifier(hidden_layer_sizes=(24, 10), max_iter=100, alpha=1e-5, activation='relu', solver='adam', random_state=14, early_stopping=True).fit(X_train, y_train)
        # clf = RandomForestClassifier(n_estimators= 4, max_depth=5, min_samples_leaf=4, min_samples_split=10).fit(X_train, y_train)
        clf = xgb.XGBClassifier(n_estimators=5, max_depth=5, learning_rate=0.1, random_state=14).fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average='weighted')
        recall = recall_score(y_test, y_pred, average='weighted')
        results.append([n, random_state, accuracy, precision, recall])

df_results = pd.DataFrame(results, columns=['Samples', 'random_state', 'Accuracy', 'Precision', 'Recall'])