In [1]:
import numpy as np
import pandas as pd
import joblib
import sklearn

In [2]:
def create_features(data): 
    def get_season(month):
        SUMMER_MONTHS = [6, 7, 8] 
        SPRING_MONTHS = [3, 4, 5]
        WINTER_MONTHS = [12, 1, 2]
        FALL_MONTHS = [9, 10, 11]
        if month in WINTER_MONTHS:
            return "Winter"
        elif month in FALL_MONTHS:
            return "Autumn"
        elif month in SUMMER_MONTHS:
            return "Summer"
        else:
            return "Spring"
    
    def get_school_term(month):
        SUMMER_TERM = [5, 6, 7, 8]
        WINTER_TERM = [9, 10, 11, 12]
        SPRING_TERM = [1, 2, 3, 4]

        if month in SUMMER_TERM:
            return "Summer_term"
        elif month in WINTER_TERM:
            return "Winter_term"
        else:
            return "Spring_term"
        
    def create_lag_df(df, lag, cols):
        return df.assign(
            **{f"{col}_{n}_days_ago": df[col].shift(n) for n in range(1, lag + 1) for col in cols}
        )

    new_df = pd.DataFrame(data)
    new_df['year'] = pd.DatetimeIndex(new_df['Timestamp']).year
    new_df['month'] = pd.DatetimeIndex(new_df['Timestamp']).month
    new_df['day'] = pd.DatetimeIndex(new_df['Timestamp']).day
    new_df['day_of_week'] = pd.DatetimeIndex(new_df['Timestamp']).day_name()
    new_df = new_df.assign(season=new_df["month"].apply(get_season))
    new_df = new_df.assign(term=new_df["month"].apply(get_school_term))
    new_df = create_lag_df(new_df, 7, ['total_count'])
    
    return new_df

In [3]:
new_data = [{"Timestamp": "2023-02-03", "total_count": 1233, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-04", "total_count": 2341, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-05", "total_count": 2413, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-06", "total_count": 3145, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-07", "total_count": 1546, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-08", "total_count": 1752, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-09", "total_count": 1123, "parkade": "Thunderbird"},
           {"Timestamp": "2023-02-10", "total_count": 2341, "parkade": "Thunderbird"}]

new_df = create_features(new_data)

In [4]:
model = joblib.load("saved_model/tree_model.joblib") # load the saved model

In [5]:
model.predict(new_df)

array([ 495.65530692,  479.05973695, 1120.922655  , 1009.70573053,
       1324.07234449, 1188.42088359, 1122.50319497, 1009.97926391])