# Bike Sharing Demand

datetime - hourly date + timestamp<br>
season -  1 = spring, 2 = summer, 3 = fall, 4 = winter<br>
holiday - whether the day is considered a holiday<br>
workingday - whether the day is neither a weekend nor holiday<br>
weather - 1: Clear, Few clouds, Partly cloudy, Partly cloudy<br>
2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist<br>
3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds<br>
4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog<br> 
temp - temperature in Celsius<br>
atemp - "feels like" temperature in Celsius<br>
humidity - relative humidity<br>
windspeed - wind speed<br>
casual - number of non-registered user rentals initiated<br>
registered - number of registered user rentals initiated<br>
count - number of total rentals<br>

In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [15]:
%matplotlib inline

In [16]:
train = pd.read_csv("data/train.csv", parse_dates=["datetime"])
test = pd.read_csv("data/test.csv", parse_dates=["datetime"])
print(train.shape)
print(test.shape)

(10886, 12)
(6493, 9)


In [17]:
train.dtypes

datetime      datetime64[ns]
season                 int64
holiday                int64
workingday             int64
weather                int64
temp                 float64
atemp                float64
humidity               int64
windspeed            float64
casual                 int64
registered             int64
count                  int64
dtype: object

In [18]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 12 columns):
datetime      10886 non-null datetime64[ns]
season        10886 non-null int64
holiday       10886 non-null int64
workingday    10886 non-null int64
weather       10886 non-null int64
temp          10886 non-null float64
atemp         10886 non-null float64
humidity      10886 non-null int64
windspeed     10886 non-null float64
casual        10886 non-null int64
registered    10886 non-null int64
count         10886 non-null int64
dtypes: datetime64[ns](1), float64(3), int64(8)
memory usage: 1020.6 KB


In [19]:
train.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


In [20]:
test.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed
0,2011-01-20 00:00:00,1,0,1,1,10.66,11.365,56,26.0027
1,2011-01-20 01:00:00,1,0,1,1,10.66,13.635,56,0.0
2,2011-01-20 02:00:00,1,0,1,1,10.66,13.635,56,0.0
3,2011-01-20 03:00:00,1,0,1,1,10.66,12.88,56,11.0014
4,2011-01-20 04:00:00,1,0,1,1,10.66,12.88,56,11.0014


# Data Processing

In [21]:
print(train.shape)
train = train[np.abs(train["count"]-train["count"].mean())<=(3*train["count"].std())]
print(train.shape)

(10886, 12)
(10739, 12)


### -Datetime

In [33]:
print(train.shape)
train["dt_year"] = train["datetime"].dt.year
train["dt_month"] = train["datetime"].dt.month
train["dt_day"] = train["datetime"].dt.day
train["dt_hour"] = train["datetime"].dt.hour
train["dt_minute"] = train["datetime"].dt.minute
train["dt_second"] = train["datetime"].dt.second
train["dt_dayofweek"] = train["datetime"].dt.dayofweek

print(train.shape)

train[["dt_year", "dt_month", "dt_day", "dt_hour", "dt_minute", "dt_second", "dt_dayofweek"]].head()

(10739, 12)
(10739, 19)


Unnamed: 0,dt_year,dt_month,dt_day,dt_hour,dt_minute,dt_second,dt_dayofweek
0,2011,1,1,0,0,0,5
1,2011,1,1,1,0,0,5
2,2011,1,1,2,0,0,5
3,2011,1,1,3,0,0,5
4,2011,1,1,4,0,0,5


In [34]:
print(train.shape)
train.loc[train["dt_dayofweek"] == 0, "dt_dayofweek_str"] = "Monday"
train.loc[train["dt_dayofweek"] == 1, "dt_dayofweek_str"] = "Tuesday"
train.loc[train["dt_dayofweek"] == 2, "dt_dayofweek_str"] = "Wednesday"
train.loc[train["dt_dayofweek"] == 3, "dt_dayofweek_str"] = "Thursday"
train.loc[train["dt_dayofweek"] == 4, "dt_dayofweek_str"] = "Friday"
train.loc[train["dt_dayofweek"] == 5, "dt_dayofweek_str"] = "Saturday"
train.loc[train["dt_dayofweek"] == 6, "dt_dayofweek_str"] = "Sunday"

print(train.shape)

train[["dt_dayofweek_str"]].head()

(10739, 19)
(10739, 20)


Unnamed: 0,dt_dayofweek_str
0,Saturday
1,Saturday
2,Saturday
3,Saturday
4,Saturday


### -Season

In [35]:
print(train.shape)
train["season_1"] = train["season"] == 1
train["season_2"] = train["season"] == 2
train["season_3"] = train["season"] == 3
train["season_4"] = train["season"] == 4

print(train.shape)

(10739, 20)
(10739, 24)


### -Weather

10739