In [2]:
%load_ext autoreload
%autoreload 2

from markovBike.forecast.forecast import calculate_daily_rides, get_daily_rides_with_weather, forecast_number_users
from markovBike.data_source.source import get_stations_data, database_queries
from markovBike.data_source.preprocess import preprocess_trips_data

import pandas as pd
import os

2023-02-22 15:25:27.293630: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
n_trips = 500

trips_raw = get_stations_data(database_queries(n_trips)['trips'], verbose=True)

trips_raw['date'] = pd.to_datetime(trips_raw['starttime']).dt.date

daily_rides = calculate_daily_rides(trips_raw)

merged_df = trips_raw.merge(daily_rides, on='date')

weather_dataframe = get_daily_rides_with_weather(merged_df, os.environ.get('API_KEY'))

Bike station table with shape (500, 15). Columns are: 

tripduration                             Int64
starttime                  datetime64[ns, UTC]
stoptime                   datetime64[ns, UTC]
start_station_id                         Int64
start_station_name                      object
start_station_latitude                 float64
start_station_longitude                float64
end_station_id                           Int64
end_station_name                        object
end_station_latitude                   float64
end_station_longitude                  float64
bikeid                                   Int64
usertype                                object
birth_year                               Int64
gender                                  object
dtype: object

Weather for 2014-03-15 retrieved

In [4]:
weather_dataframe.head(3)

Unnamed: 0,tripduration,starttime,stoptime,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,...,bikeid,usertype,birth_year,gender,date,daily_n_rides,temperature,description,humidity,wind_speed
0,107,2014-06-06 13:02:01+00:00,2014-06-06 13:03:48+00:00,520,W 52 St & 5 Ave,40.759923,-73.976485,520,W 52 St & 5 Ave,40.759923,...,16692,Subscriber,1977.0,male,2014-06-06,1,41.36,clear sky,48,9.22
1,1105,2013-08-24 17:03:52+00:00,2013-08-24 17:22:17+00:00,520,W 52 St & 5 Ave,40.759923,-73.976485,520,W 52 St & 5 Ave,40.759923,...,15352,Customer,,unknown,2013-08-24,2,41.36,clear sky,48,9.22
2,545,2013-08-24 18:31:38+00:00,2013-08-24 18:40:43+00:00,520,W 52 St & 5 Ave,40.759923,-73.976485,469,Broadway & W 53 St,40.763441,...,16195,Subscriber,1981.0,male,2013-08-24,2,41.27,clear sky,48,9.22


In [5]:
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

numerical_features = weather_dataframe.select_dtypes(
    include=numerics).columns.drop(
        ['start_station_id', 'end_station_id', 'bikeid'])

categorical_features = weather_dataframe.select_dtypes(
    include='object').columns.drop([
        'start_station_name', 'end_station_name'
    ]).append(pd.Index(['start_station_id', 'end_station_id',
                        'bikeid'])).drop(['bikeid'])

boolean_features = weather_dataframe.select_dtypes(include='bool').columns

In [6]:
weather_dataframe_preproc = preprocess_trips_data(
    weather_dataframe,
    index=None,
    numerical_features=numerical_features,
    categorical_features=categorical_features,
    boolean_features=boolean_features,
    drops=None,
    verbose=True)


 ğŸ“¶ [36mColumns['tripduration', 'starttime', 'stoptime', 'start_station_id', 'start_station_name', 'start_station_latitude', 'start_station_longitude', 'end_station_id', 'end_station_name', 'end_station_latitude', 'end_station_longitude', 'bikeid', 'usertype', 'birth_year', 'gender', 'date', 'daily_n_rides', 'temperature', 'description', 'humidity', 'wind_speed'][0m

 ğŸ“¶ [36mNumericals['tripduration', 'start_station_latitude', 'start_station_longitude', 'end_station_latitude', 'end_station_longitude', 'birth_year', 'daily_n_rides', 'temperature', 'humidity', 'wind_speed'][0m

 ğŸ“¶ [36mCategoricals['usertype', 'gender', 'date', 'description', 'start_station_id', 'end_station_id', 'bikeid'][0m

 ğŸ“¶ [36mBooleans[][0m

 ğŸ“¶ [36mArray shaped (500, 918)[0m

 ğŸ“¶ [36mColumns names are 918: ['tripduration', 'start_station_latitude', 'start_station_longitude', 'end_station_latitude', 'end_station_longitude', 'birth_year', 'daily_n_rides', 'temperature', 'humidity', 'wind_sp



In [7]:
weather_dataframe_preproc = pd.DataFrame(
    weather_dataframe_preproc[0],
    columns=weather_dataframe_preproc[1]).dropna()


In [8]:
model, mae = forecast_number_users(
    weather_dataframe_preproc,
    X=['temperature', 'humidity', 'wind_speed', 'description_clear sky'],
    y='daily_n_rides',
    test_size=0.2,
    lstm_units=64,
    lstm_epochs=50,
    verbose=0)


 ğŸ“¶ [36mX Train shape is: (331, 4)[0m

 ğŸ“¶ [36mY train shape is: (331,)[0m

 ğŸ“¶ [36mX Test shape is: (83, 4)[0m

 ğŸ“¶ [36mY Test shape is: (83,)[0m


2023-02-22 15:35:25.194655: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.




In [9]:
mae

0.7134184791363677