In [1]:
import pandas as pd
import datetime as dt

In [2]:
df = pd.read_csv('day.csv')

In [3]:
# creates new columns for each dummy variable in the dataset
def columnize(df, column, new_vals, new_col):
    list1 = df[column].unique()
    dict1 = dict(zip(list1,new_vals))
    df[new_col] = df[column].map(dict1)
    df = pd.concat([df, pd.get_dummies(df[new_col])], 1)
    return df

In [4]:
# removing this outlier because HURRICANE SANDY happened
df[df['cnt']==22]

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
667,668,2012-10-29,4,1,10,0,1,1,3,0.44,0.4394,0.88,0.3582,2,20,22


In [5]:
df = df[df['cnt']>100]

In [6]:
#changing dates (dteday) to datetime objects
df['dteday'] = pd.to_datetime(df['dteday'])

#changing dates to ordinal, to use in regression
df['dteday'] = df['dteday'].map(dt.datetime.toordinal)

In [7]:
weathertypes = ['clear', 'misty', 'light_storm', 'heavy_storm']

In [8]:
#create and add new columns for each weather situation
df = columnize(df, 'weathersit', weathertypes, 'weather')

In [9]:
# changing the season column to be more accurate (matching season to month)
summer = [6,7,8]
fall = [9,10,11]
winter = [12,1,2]
spring = [3,4,5]

sum_dict = dict.fromkeys(summer,'summer')
wint_dict = dict.fromkeys(winter,'winter')
spr_dict = dict.fromkeys(spring,'spring')
fall_dict = dict.fromkeys(fall,'fall')

In [10]:
# combine all the season dicts
seasons = {**sum_dict,**wint_dict,**spr_dict,**fall_dict}

In [11]:
# apply the new season column to the dataframe
df['season']=df['mnth'].map(seasons)

In [12]:
# create dummy variable columns for each season and add it to dataframe
df = pd.concat([df, pd.get_dummies(df['season'])], 1)

In [17]:
# export dataframe to JSON file to use in other notebooks
df.to_json('cleaned_bike_share_data.json', orient='records')

In [19]:
# reviewing json version of df
pd.read_json('cleaned_bike_share_data.json')

Unnamed: 0,atemp,casual,clear,cnt,dteday,fall,holiday,hum,instant,light_storm,...,spring,summer,temp,weather,weathersit,weekday,windspeed,winter,workingday,yr
0,0.363625,331,1,985,734138,0,0,0.805833,1,0,...,0,0,0.344167,clear,2,6,0.160446,1,0,0
1,0.353739,131,1,801,734139,0,0,0.696087,2,0,...,0,0,0.363478,clear,2,0,0.248539,1,0,0
2,0.189405,120,0,1349,734140,0,0,0.437273,3,0,...,0,0,0.196364,misty,1,1,0.248309,1,1,0
3,0.212122,108,0,1562,734141,0,0,0.590435,4,0,...,0,0,0.200000,misty,1,2,0.160296,1,1,0
4,0.229270,82,0,1600,734142,0,0,0.436957,5,0,...,0,0,0.226957,misty,1,3,0.186900,1,1,0
5,0.233209,88,0,1606,734143,0,0,0.518261,6,0,...,0,0,0.204348,misty,1,4,0.089565,1,1,0
6,0.208839,148,1,1510,734144,0,0,0.498696,7,0,...,0,0,0.196522,clear,2,5,0.168726,1,1,0
7,0.162254,68,1,959,734145,0,0,0.535833,8,0,...,0,0,0.165000,clear,2,6,0.266804,1,0,0
8,0.116175,54,0,822,734146,0,0,0.434167,9,0,...,0,0,0.138333,misty,1,0,0.361950,1,0,0
9,0.150888,41,0,1321,734147,0,0,0.482917,10,0,...,0,0,0.150833,misty,1,1,0.223267,1,1,0
