In [87]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from skopt import BayesSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler, QuantileTransformer

import warnings
warnings.filterwarnings("ignore")

In [2]:
df_bikes = pd.read_csv('../availability.csv')
df_bikes['time_stamp'] = pd.to_datetime(df_bikes['time_stamp']).dt.round('H')
df_bikes = df_bikes.groupby(['id', 'time_stamp']).agg({'bikes': 'mean', 'bike_stands': 'mean'}).reset_index()

In [3]:
def round_nearest(value):
    integer_part = int(value)
    fractional_part = value - integer_part
    if fractional_part >= 0.5:
        return integer_part + 1
    else:
        return integer_part

In [4]:
df_bikes['bikes'] = df_bikes['bikes'].apply(lambda x: round_nearest(x))
df_bikes['bike_stands'] = df_bikes['bike_stands'].apply(lambda x: round_nearest(x))

In [5]:
df_bikes.head()

Unnamed: 0,id,time_stamp,bikes,bike_stands
0,1,2024-01-01 00:00:00,23,8
1,1,2024-01-01 01:00:00,23,8
2,1,2024-01-01 02:00:00,23,8
3,1,2024-01-01 03:00:00,23,8
4,1,2024-01-01 04:00:00,23,8


In [6]:
len(df_bikes)

164160

In [7]:
df_weather = pd.read_csv('../hourlyWeather.csv')
df_weather['time_stamp'] = pd.to_datetime(df_weather['time_stamp']).dt.round('H')

In [8]:
df_weather.head()

Unnamed: 0,time_stamp,temp_c,feelslike_c,wind_kph,humidity,precip_mm,gust_kph,wind_degree,pressure_mb,cloud,uv
0,2024-01-01 00:00:00,5.8,1.1,30.1,84.33,0.078,50.0,257,994.4,64.9,0
1,2024-01-01 01:00:00,5.0,0.1,29.9,86.73,0.0,68.8,257,994.8,25.0,0
2,2024-01-01 02:00:00,4.8,-0.1,28.5,83.05,0.0,48.2,257,995.5,17.2,0
3,2024-01-01 03:00:00,4.5,0.0,24.0,83.87,0.0,42.5,254,996.4,17.3,0
4,2024-01-01 04:00:00,4.7,0.5,22.3,82.67,0.0,61.6,240,997.2,17.2,0


In [9]:
len(df_weather) * 114

164160

In [10]:
df = pd.merge(df_bikes, df_weather, on='time_stamp', how='inner')
df['time_stamp'] = pd.to_datetime(df['time_stamp'])
df.head()

Unnamed: 0,id,time_stamp,bikes,bike_stands,temp_c,feelslike_c,wind_kph,humidity,precip_mm,gust_kph,wind_degree,pressure_mb,cloud,uv
0,1,2024-01-01 00:00:00,23,8,5.8,1.1,30.1,84.33,0.078,50.0,257,994.4,64.9,0
1,1,2024-01-01 01:00:00,23,8,5.0,0.1,29.9,86.73,0.0,68.8,257,994.8,25.0,0
2,1,2024-01-01 02:00:00,23,8,4.8,-0.1,28.5,83.05,0.0,48.2,257,995.5,17.2,0
3,1,2024-01-01 03:00:00,23,8,4.5,0.0,24.0,83.87,0.0,42.5,254,996.4,17.3,0
4,1,2024-01-01 04:00:00,23,8,4.7,0.5,22.3,82.67,0.0,61.6,240,997.2,17.2,0


In [11]:
X = df[['time_stamp', 'id', 'temp_c', 'feelslike_c', 'wind_kph', 'humidity', 'precip_mm', 'gust_kph', 'wind_degree', 'pressure_mb', 'cloud', 'uv']]
X['time_stamp'] = (X['time_stamp'].astype(int) / 10**9).astype(int)
y = df[['bikes', 'bike_stands']]

In [12]:
X.head()

Unnamed: 0,time_stamp,id,temp_c,feelslike_c,wind_kph,humidity,precip_mm,gust_kph,wind_degree,pressure_mb,cloud,uv
0,1704067200,1,5.8,1.1,30.1,84.33,0.078,50.0,257,994.4,64.9,0
1,1704070800,1,5.0,0.1,29.9,86.73,0.0,68.8,257,994.8,25.0,0
2,1704074400,1,4.8,-0.1,28.5,83.05,0.0,48.2,257,995.5,17.2,0
3,1704078000,1,4.5,0.0,24.0,83.87,0.0,42.5,254,996.4,17.3,0
4,1704081600,1,4.7,0.5,22.3,82.67,0.0,61.6,240,997.2,17.2,0
