In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Library

In [None]:
import os
import sys
import numpy as np
import pandas as pd

import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000
import matplotlib.pyplot as plt

import datetime

from IPython.display import display
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 200)
pd.set_option("display.max_colwidth", 10000)
pd.set_option("display.float_format", lambda x: "%.2f" % x)
    
plt.rcParams['font.size'] = 15

# Load csv Data

In [None]:
df_train = pd.read_csv("/kaggle/input/bike-sharing-demand/train.csv")
df_test = pd.read_csv("/kaggle/input/bike-sharing-demand/test.csv")

In [None]:
print(df_train.shape)
print(df_test.shape)

# 前処理

## datetimeについて

In [None]:
df_train['datetime'] = pd.to_datetime(df_train['datetime'])

In [None]:
df_train['year'] = df_train['datetime'].apply(lambda x: x.strftime('%Y'))
df_train['month']= df_train['datetime'].apply(lambda x: x.strftime('%m'))
df_train['day'] = df_train['datetime'].apply(lambda x: x.strftime('%d'))
df_train['hour'] = df_train['datetime'].apply(lambda x: x.strftime('%H'))

In [None]:
df_train.head()

## 不要な列削除

In [None]:
df_train.shape

In [None]:
df_train = df_train.drop(['atemp', 'casual', 'registered'], axis=1)
#df_train = df_train.drop(['datetime', 'month', 'day'], axis=1)

In [None]:
df_train.shape

## 学習準備

In [None]:
y_train = df_train['count'].copy()       # Target
X_train = df_train.drop('count', axis=1)

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

num_pipeline = Pipeline([
        #('imputer', SimpleImputer(strategy='constant', fill_value=0)), //今回欠損値がない
        #('attribs_adder', CombinedAttributesAdder()) // これを追加することでFeatureの重要性を示す
        ('std_scaler', StandardScaler()), 
    ])

In [None]:
from sklearn.compose import ColumnTransformer

num_feats = ['temp', 'humidity', 'windspeed']
cat_feats = ['season','workingday', 'holiday', 'weather','year','hour'] 

full_pipeline = ColumnTransformer([
    ('num', num_pipeline, num_feats),
    ('cat', OneHotEncoder(), cat_feats)
])

In [None]:
X_train_prepared = full_pipeline.fit_transform(X_train)

In [None]:
encoded_cols = full_pipeline.named_transformers_.cat.get_feature_names().tolist()
cols_prepared = num_feats + encoded_cols

# Modeling 

## DecisionTreeRegressor

In [None]:
from sklearn.tree import DecisionTreeRegressor

tree_reg = DecisionTreeRegressor()
tree_reg.fit(X_train_prepared, y_train)

In [None]:
from sklearn.metrics import mean_squared_log_error

count_predictions = tree_reg.predict(X_train_prepared)
tree_msle = np.sqrt(mean_squared_log_error(y_train, count_predictions))
tree_msle

# Submissions

In [None]:
df_test['datetime'] = pd.to_datetime(df_test['datetime'])
df_test['year'] = df_test['datetime'].apply(lambda x: x.strftime('%Y'))
df_test['month']= df_test['datetime'].apply(lambda x: x.strftime('%m'))
df_test['day'] = df_test['datetime'].apply(lambda x: x.strftime('%d'))
df_test['hour'] = df_test['datetime'].apply(lambda x: x.strftime('%H'))

In [None]:
df_test = df_test.drop(['atemp'], axis=1)

In [None]:
X_test_prepared = full_pipeline.fit_transform(df_test)

In [None]:
final_preds = tree_reg.predict(X_test_prepared)

In [None]:
submission_dt = df_test.loc[:, 'datetime']

In [None]:
submission = pd.DataFrame({"datetime":submission_dt, "count":final_preds})
submission.to_csv("tree_reg_v1.csv", index = False)