### model training and testing

In [None]:
import os
import warnings
from math import sqrt

import numpy as np
import pandas as pd
import pins
import pyodbc
from dotenv import load_dotenv
from rsconnect.api import RSConnectServer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from vetiver import VetiverModel, deploy_rsconnect, pin_read_write

In [None]:
warnings.filterwarnings('ignore')
load_dotenv(override=True)

rsc_server = os.getenv("CONNECT_SERVER")
rsc_key = os.getenv("CONNECT_API_KEY")
connect_server = RSConnectServer(url=rsc_server,api_key=rsc_key)

## Raw data

Read in the raw data from the database.

In [None]:
# read table built by R ETL process in the database
connection = pyodbc.connect('DSN=Content DB')

sql = "select * from bike_model_data where date in (select distinct date from bike_model_data order by date desc limit 12);"

all_days = pd.read_sql_query(sql, connection)
all_days.sort_values(by='date', inplace=True, ascending=False)
all_days=all_days.reset_index(drop=True)
all_days

## Data processing

Clean and transform the data.

In [None]:
def feature_split(df):
    '''split the df provided into X features and y output'''
    X = df.drop(['n_bikes','date','lat','lon'], axis=1)
    y = df[['n_bikes']]
    return X,y

def add_dow_as_int(df):
    '''One hot encoding the day of the week'''
    df['date'] = pd.to_datetime(df['date'])
    one_hot = pd.get_dummies(df['dow'])
    df = df.join(one_hot)
    df = df.drop('dow',axis=1)
    return df

def add_missing_dow(df):
    ''' add encoding for missing dow in testing dataset'''
    data = df.copy()
    all_dow = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
    dow_in_data = data.columns.drop(['n_bikes','id','date','hour','month','lat','lon']).to_list()
    dow_not_in_data = np.setdiff1d(all_dow, dow_in_data, assume_unique=False)
    for i in dow_not_in_data:
        data[i] = False
    # Arrange columns
    data = data[['n_bikes','id','date','hour','month','lat','lon', 'Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']]
    return data

def preprocess_data(df: pd.DataFrame):
    data = df.copy()
    data = add_dow_as_int(data)
    data = add_missing_dow(data)
    return data

In [None]:
# divide in training and testing datasets
all_dates = all_days.date.unique()
testing_dates = all_dates[0:3]
training_dates = all_dates[3:]

In [None]:
df_testing = (
    all_days
    .loc[all_days['date'].isin(testing_dates)]
    .pipe(preprocess_data)
)

X_test, y_test = feature_split(df_testing)

display(X_test)
display(y_test)

In [None]:
df_training = (
    all_days
    .loc[all_days['date'].isin(training_dates)]
    .pipe(preprocess_data)
)

X_train, y_train = feature_split(df_training)

display(X_train)
display(y_train)

In [None]:
[type(i) for i in X_train.columns.to_list()]

## Train model

In [None]:
# build a random forest model
regressor = RandomForestRegressor(n_estimators=100, random_state=0, n_jobs=-1)
regressor.fit(X_train, y_train)


In [None]:
# test the random forest model
y_pred = regressor.predict(X_test.values)

# compare predictions
test_mse = mean_squared_error(y_test, y_pred)
rmse = sqrt(test_mse)
print('RMSE: %f' % rmse)

## Deploy model

Deploy the model with vetiver.

In [None]:
# convert the random forest model into a vetiver model
v = VetiverModel(
    regressor,
    "gagan/bikeshare-rf-python",
    save_ptype=True, 
    ptype_data=
    X_train
)

In [None]:
v

In [None]:
# create a board on RStudio Connect
board = pins.board_rsconnect(
    server_url="https://colorado.posit.co/rsc",
    allow_pickle_read=True
)

In [None]:
# write the vetiver model as pin to RStudio Connect
pin_read_write.vetiver_pin_write(
    board,
    v
)

In [None]:
# use Vetiver provided RStudio Connect deployment function 
# to deploy the model as a FASTApi
deploy_rsconnect(
    connect_server=connect_server,
    board=board,
    pin_name="gagan/bikeshare-rf-python",
    #python=".bike_predict_python/bin/python",
    title = "Random Forest model for Bikeshare Python"
)