In [1]:
from sklearn.pipeline import Pipeline

from src.base.column_name import RentDataCN, TimeDataCN, WeatherDataCN
from src.base.regression_model_base import RegressionModelBase
from src.repository.rent_data_loader import RentDataLoader
from src.repository.weather_data_loader import WeatherDataLoader
from src.transform.transformer.column_renamer import ColumnRenamer
from src.transform.transformer.data_concater import DataConcater
from src.transform.transformer.datetime_to_category import DatetimeToCategory
from src.transform.transformer.location_column_extender import LocationColumnExtender
from src.transform.transformer.custom_one_hot_encoder import CustomOneHotEncoder
from src.transform.transformer.weather_aggregator import WeatherAggregator
from src.transform.transformer.weather_column_extender import WeatherColumnExtender
from src.transform.transformer.string_to_datetime_converter import StringToDatetimeConverter
from src.transform.transformer.weather_data_preprocessor import WeatherDataPreprocessor

In [16]:
data_loader = RentDataLoader()
weather_data_loader = WeatherDataLoader()

weather_pipline = Pipeline([
    ('data_concatenate', DataConcater(data_category='weather')),
    ('renamer', ColumnRenamer()),
    ('str2datetime', StringToDatetimeConverter(data_category='weather')),
    ('preprocessor', WeatherDataPreprocessor())
])

weather_data = weather_pipline.fit_transform(weather_data_loader.all_data)

pipline = Pipeline([
    ('data_concatenate', DataConcater()),
    ('renamer', ColumnRenamer()),
    ('str2datatime', StringToDatetimeConverter(data_category='rent', per_hour=True)),
    ('location_extender', LocationColumnExtender(year="2021", only_rent_location=True)),
    ('weather_extender', WeatherColumnExtender(preprocessed_data=weather_data)),
    ('datetime2category', DatetimeToCategory()),
    ('aggregate', WeatherAggregator()),
])

In [17]:
processed_data = pipline.fit_transform(data_loader.all_data)

In [18]:
processed_data

Unnamed: 0,rent_station,month,day,hour,weekday,precipitation,sunshine_duration,time_category,rent_count,rainfall
0,92.0,1,1,5,4,0.0,0.0,nighttime,6,non_rain
1,152.0,1,1,5,4,0.0,0.0,nighttime,7,non_rain
2,133.0,1,1,5,4,0.0,0.0,nighttime,8,non_rain
4,46.0,1,1,5,4,0.0,0.0,nighttime,12,non_rain
5,39.0,1,1,5,4,0.0,0.0,nighttime,1,non_rain
...,...,...,...,...,...,...,...,...,...,...
4038522,251.0,9,2,23,4,0.0,0.0,nighttime,1,non_rain
4038552,123.0,9,3,6,5,0.0,0.0,nighttime,1,non_rain
4038563,103.0,9,3,9,5,0.0,0.9,morning_peak,1,non_rain
4038564,133.0,9,3,9,5,0.0,0.9,morning_peak,1,non_rain


In [19]:
processed_data[processed_data['rainfall'] == 1]['rent_count'].mean()

KeyError: 'rain'

In [None]:
processed_data[processed_data['rain'] == 0]['rent_count'].mean()

In [None]:
processed_data[processed_data['rain'] == 1]