In [35]:
import import_files as MrtRecommendationDependencies
from ridership_transformer import RidershipTransformer
from ridership_cleaner import RidershipCleaner
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
sns.set_style("whitegrid")
sns.set_context("poster")

%matplotlib inline

from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier,AdaBoostClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report, make_scorer, f1_score, recall_score, precision_score, accuracy_score

In [41]:
file = MrtRecommendationDependencies.get_dataset_path("trip_classes.csv")
df = pd.read_csv(file)
df.head()

Unnamed: 0,num_train_running,num_train_operational,headway,entry,exit,temp,humidity,rain_1h,rain_3h,hour_of_day,day_of_week,recommendation
0,12.0,13.0,8.5,2410.0,3426.0,26.41,87,0.0,0.0,19,6,take with caution
1,8.0,8.0,8.0,2752.0,2.0,31.29,60,0.0,0.0,5,0,avoid
2,7.0,7.0,8.0,2773.0,0.0,30.09,66,0.0,0.0,5,1,avoid
3,15.0,15.0,7.0,2678.0,4303.0,27.23,84,0.0,0.0,18,0,take
4,9.0,9.0,8.0,3013.0,3.0,31.24,60,0.0,0.0,5,2,avoid


In [42]:
X = df.drop(['recommendation', 'hour_of_day', 'day_of_week'], axis=1)
Y = df.recommendation

In [49]:
def metrics_change(metric):
    scoring = {'accuracy': make_scorer(accuracy_score),
           'precision': make_scorer(precision_score, average = 'macro'),
           'recall': make_scorer(recall_score, average = 'macro'),
           'f1_macro': make_scorer(f1_score, average = 'macro'),
           'f1_weighted': make_scorer(f1_score, average = 'weighted')}
    pipeline = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components=2)), ('classifier', RandomForestClassifier())])
    param_grid_pipe = {#'classifier': [RandomForestClassifier(), AdaBoostClassifier(), GradientBoostingClassifier()],
                    'classifier__n_estimators': [*range(50,61)],
                   'classifier__random_state': [3], 
                   'classifier__criterion': ['gini', 'entropy'], 
                    'pca__n_components': [2,0.99]
                  }
    grid_search = GridSearchCV(pipeline,param_grid=param_grid_pipe,cv=5,scoring=scoring[metric])
    grid_search.fit(X,Y)
    print(f"Results for {metric}")
    print(grid_search.best_estimator_)
    print(grid_search.best_score_)

In [50]:
metrics_change("accuracy")
metrics_change("precision")
metrics_change("recall")
metrics_change("f1_macro")
metrics_change("f1_weighted")

Results for accuracy
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=2)),
                ('classifier',
                 RandomForestClassifier(n_estimators=56, random_state=3))])
0.9906401440763097
Results for precision
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=2)),
                ('classifier',
                 RandomForestClassifier(n_estimators=56, random_state=3))])
0.990650992121296
Results for recall
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=0.99)),
                ('classifier',
                 RandomForestClassifier(criterion='entropy', n_estimators=55,
                                        random_state=3))])
0.988612499841194
Results for f1_macro
Pipeline(steps=[('scaler', StandardScaler()), ('pca', PCA(n_components=2)),
                ('classifier',
                 RandomForestClassifier(n_estimators=56, random_state=3))])
0.9894046917718751
Results for f1_weighted
Pipeline(steps=[('scale

In [None]:
import pandas as pd
import import_files as MrtRecommendationDependencies
from util import Util
from datetime import datetime
import numpy as np
from recommender import Recommender
from trip_classifier import TripClassifier

In [None]:
station = 'north'
triptime = datetime(2020,9,5,14)

In [None]:
model = Recommender()
pred = model.predict(station,datetime)