In [233]:
import pandas as pd
import numpy as np
import datetime

In [234]:
weather = pd.read_csv('data/weather.csv')
train = pd.read_csv('data/train_processed.csv')
test = pd.read_csv('data/test_processed.csv')

In [235]:
train["timestamp"] = pd.to_datetime(train["timestamp"])
test["timestamp"] = pd.to_datetime(test["timestamp"])
weather.columns, weather.shape

(Index(['name', 'datetime', 'temp', 'feelslike', 'dew', 'humidity', 'precip',
        'precipprob', 'preciptype', 'snow', 'snowdepth', 'windgust',
        'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
        'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'conditions',
        'icon', 'stations'],
       dtype='object'),
 (1488, 24))

In [236]:
type(weather["temp"][0])

numpy.float64

In [237]:
weather["datetime"] = pd.to_datetime(weather["datetime"])

In [238]:
unique_conditions = weather["conditions"].unique()
unique_conditions

array(['Rain, Partially cloudy', 'Partially cloudy', 'Rain, Overcast',
       'Overcast', 'Clear', 'Rain', 'Rain, Fog'], dtype=object)

In [239]:
def get_weather_row(weather_df, bike_timestamp):
    timestamp = np.datetime64(bike_timestamp)
    bike_datetime = timestamp.astype(datetime.datetime)
    
    # Get row with the same month, day, hour
    row = weather_df.loc[abs(weather_df["datetime"] - bike_datetime) < datetime.timedelta(hours=1)]
    if len(row) > 1:
        row = row.iloc[0]
    return row


In [240]:
def get_preciptype(weather_df, bike_timestamp):
    row = get_weather_row(weather_df, bike_timestamp)
    precip = row["preciptype"]
    if type(precip) == str and precip == "rain":
        return 1
    else:
        return 0

In [241]:
def get_conditions(weather_df, bike_timestamp):
    # 1 means bad weather, 0 means good weather
    
    row = get_weather_row(weather_df, bike_timestamp)
    conditions = row["conditions"]
    
    if type(conditions) != str:
        return 0
    
    
    if conditions == "Rain, Partially cloudy" or conditions == "Rain" or conditions == "Rain, Fog":
        return 1
    else:
        return 0

In [250]:
def get_temp(weather_df, bike_timestamp):
    row = get_weather_row(weather_df, bike_timestamp)
    
    temp = row["temp"] 
    
    if type(temp) != np.float64:
        return np.mean(weather_df["temp"]) // 10

    return temp // 10

In [245]:
row = get_weather_row(weather, train["timestamp"][17])
precip = get_preciptype(weather, train["timestamp"][17])
precip

0

In [246]:
# test["rain"] = test["timestamp"].apply(lambda x: get_preciptype(weather, x))
# train["rain"] = train["timestamp"].apply(lambda x: get_preciptype(weather, x))

In [247]:
# test["rain_condition"] = test["timestamp"].apply(lambda x: get_conditions(weather, x))
# train["rain_condition"] = train["timestamp"].apply(lambda x: get_conditions(weather, x))

In [251]:
test["temp"] = test["timestamp"].apply(lambda x: get_temp(weather, x))
train["temp"] = train["timestamp"].apply(lambda x: get_temp(weather, x))

In [252]:
train.head()

Unnamed: 0,timestamp,PREŠERNOV TRG-PETKOVŠKOVO NABREŽJE,POGAČARJEV TRG-TRŽNICA,KONGRESNI TRG-ŠUBIČEVA ULICA,CANKARJEVA UL.-NAMA,BREG,GRUDNOVO NABREŽJE-KARLOVŠKA C.,MIKLOŠIČEV PARK,BAVARSKI DVOR,TRG OF-KOLODVORSKA UL.,...,hour_19,hour_20,hour_21,hour_22,hour_23,is_weekend,is_night,rain,rain_condition,temp
0,2022-08-02 15:00:00,19,8,11,18,14,8,17,4,18,...,0,0,0,0,0,0,0,0,0,1.0
1,2022-08-02 15:05:00,16,7,10,15,15,9,16,4,12,...,0,0,0,0,0,0,0,0,0,2.0
2,2022-08-02 15:10:00,16,8,8,9,14,8,13,6,15,...,0,0,0,0,0,0,0,0,0,2.0
3,2022-08-02 15:15:00,19,8,8,8,14,7,13,6,17,...,0,0,0,0,0,0,0,0,0,2.0
4,2022-08-02 15:25:00,18,5,5,1,14,4,12,4,18,...,0,0,0,0,0,0,0,0,0,2.0


In [230]:
train.to_csv("data/train_processed.csv", index=False)
test.to_csv("data/test_processed.csv", index=False)

##### Get school holidays, one-hot encode days of week