# 1) Libraries & Toolkits 

In [None]:
import pandas as pd
import seaborn as sns
import warnings

from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.ensemble import RandomForestRegressor


%matplotlib inline
warnings.filterwarnings('ignore')
sns.set(rc={'figure.figsize':(12,10)})

# 2) Importing Dataset and exploration

In [None]:
train_df = pd.read_csv("../input/bike-sharing-demand/train.csv")
train_df

In [None]:
test_df = pd.read_csv("../input/bike-sharing-demand/test.csv")
test_df

In [None]:
train_df.info()

In [None]:
test_df.info()

In [None]:
train_df.describe()

In [None]:
train_df['datetime'] = pd.to_datetime(train_df['datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
test_df['datetime'] = pd.to_datetime(test_df['datetime'], format='%Y-%m-%d %H:%M:%S', errors='coerce')

print(train_df.dtypes)
print('\n-----------')
test_df.dtypes

In [None]:
train_df['month'] = train_df['datetime'].dt.month
train_df['hour'] = train_df['datetime'].dt.hour
train_df['week_day'] = train_df['datetime'].dt.day_name()
train_df.sample(5)

In [None]:
test_df['month'] = test_df['datetime'].dt.month
test_df['hour'] = test_df['datetime'].dt.hour
test_df['week_day'] = test_df['datetime'].dt.day_name()
test_df.sample(5)

In [None]:
def map_hours(x):
    if x in range(0, 13):
        return 'morning'
    elif x in range(13, 19):
        return 'afternoon'
    elif x in range(19, 23):
        return 'evening'
    else:
        return 'night'
    

In [None]:
train_df['period'] = train_df['hour'].apply(map_hours)
train_df.sample(5)

In [None]:
test_df['period'] = test_df['hour'].apply(map_hours)
test_df.sample(5)

# 3) Exploratory data analysis

In [None]:
sns.barplot(x = train_df["holiday"], y = train_df["count"] , palette='Set2')

In [None]:
sns.barplot(x = train_df["season"], y = train_df["count"] , palette='Set2')

In [None]:
sns.barplot(x = train_df["weather"], y = train_df["count"] , palette='Set2')

In [None]:
sns.barplot(x = train_df["period"], y = train_df["count"] , palette='Set2')

In [None]:
sns.barplot(x = train_df["week_day"], y = train_df["count"] , palette='Set2')

In [None]:
sns.heatmap(train_df.corr(), annot=True, fmt='.2f')

In [None]:
sns.boxplot(x='weather', y='count', data=train_df, palette='Set2')

# 4) Pre-processing

In [None]:
print(train_df.dtypes)
print('\n-----------')
test_df.dtypes

In [None]:
cat_feat = ['week_day', 'period']


for col in cat_feat:
    print(f'Categorical feature name: {col}')
    print(train_df[col].unique())
    print('-'*50)

In [None]:
train_df = pd.get_dummies(train_df, columns=[ 'week_day', 'period'], drop_first=True)
train_df.sample(5)

In [None]:
test_df = pd.get_dummies(test_df, columns=[ 'week_day', 'period'], drop_first=True)
test_df.sample(5)

## 5) Modeling

In [None]:
x_train = train_df.drop(columns=['datetime' , 'casual' , 'registered' , 'count' ])
y_train = train_df[['count']]
x_test  = test_df.drop(columns=['datetime']).copy()

x_train.shape, y_train.shape, x_test.shape

In [None]:
std = StandardScaler()
numerical_cols = ['temp', 'atemp', 'humidity', 'windspeed' ,'month', 'hour']
std.fit(x_train[numerical_cols])

In [None]:
x_train[numerical_cols] = std.transform(x_train[numerical_cols])
x_test[numerical_cols] = std.transform(x_test[numerical_cols])
x_train.sample(5)

In [None]:
x_test.sample(5)

In [None]:
rf = RandomForestRegressor()

rf.fit(x_train, y_train)
rf_pred = rf.predict(x_test)
print("Accuracy : {:,.2f}".format(rf.score(x_train , y_train) *100 ) , '%')

In [None]:
submission = pd.DataFrame({
        "datetime": test_df["datetime"],
        "count": rf_pred.astype(np.int64)
    })
submission

In [None]:
# submission.to_csv('submission.csv', index=False)