# Module 5 - Time Series

In [None]:
import pandas as pd
import numpy as np
import seaborn; seaborn.set()
import matplotlib.pyplot as plt

from datetime import datetime

%matplotlib inline

### Numpy datetime

In [None]:
date = np.array('2015-07-04', dtype=np.datetime64)
date

In [None]:
#creates 12 concsecutive dates starting from July 4, 2015
date + np.arange(12)

In [None]:
#day-based datetime
np.datetime64('2015-07-04')

In [None]:
#minute-based datetime
#sets to time zone of local computer
np.datetime64('2015-07-04 12:00')

### Pandas datetime

In [None]:
date = pd.to_datetime("4th of July, 2015")
date

In [None]:
#output day of the week
date.strftime('%A')

In [None]:
#create 12 consecutive dates starting from July 4, 2015
date + pd.to_timedelta(np.arange(12), 'D')

In [None]:
#make index using timestamp
index = pd.DatetimeIndex(['2014-07-04', '2014-08-04',
                          '2015-07-04', '2015-08-04'])

data = pd.Series([0,1,2,3], index=index) #a series is a singular column from a dataframe
data

In [None]:
#can slice date index similarly to regular index slicing
#includes ending index
data['2014-07-04':'2015-07-04']

In [None]:
#get rows from 2015
data['2015']

In [None]:
#will convert different formats into datetime
dates = pd.to_datetime([datetime(2015, 7, 3), '4th of July 2015', '2015-Jul-6', '07-07-2015', '20150708'])
dates

In [None]:
#assigned a 'day' frequency
dates.to_period('D')

In [None]:
#calculate # of days between 07-03-2015 and each date
dates - dates[0]

In [None]:
#create days between start date and end date
#default frequency is 'day'
pd.date_range('2015-07-03', '2015-07-10')

In [None]:
#create days from start date with periods (how many)
pd.date_range('2015-07-03', periods=8)

In [None]:
#make range using hourly frequency
#default start at 00:00 (midnight)
pd.date_range('2015-07-03', periods=8, freq='H')

In [None]:
#create period frequency by month
pd.period_range('2015-07', periods=8, freq='M')

In [None]:
#create only hour frequencies (no date attached)
#seconds also included
pd.timedelta_range(0, periods=10, freq='H')

In [None]:
#create 2 hour 30 minute intervals
pd.timedelta_range(0, periods=9, freq='2H30T')

In [None]:
#create business day offsets
from pandas.tseries.offsets import BDay

pd.date_range('2015-07-01', periods=5, freq=BDay())

### Visualize time series data

In [None]:
#make dates the index
#format dates as we load the data
data = pd.read_csv('datasets/FremontBridgeBicycle.csv', index_col="Date", parse_dates=True)
data.head()

In [None]:
data.describe()

In [None]:
#rename columns with shorter name
#create a new column 'Total' with combined values of west and east columns
data.columns = ['West', 'East']
data['Total'] = data.eval('West + East')

In [None]:
#bar plot of data
data.plot()
plt.ylabel('Hourly Bicycle Count')

In [None]:
#hourly intervals are too narrow to make sense of data
#aggregate as weekly data instead
weekly = data.resample('W').sum()
weekly.plot(style=[':', '--', '-']) #line styles for each feature
plt.ylabel('Weekly Bicycle Count')

People tend to bike more in the summers than in the winters

In [None]:
#see what an average day looks like
by_time = data.groupby(data.index.time).mean()
hourly_ticks = 4*60*60*np.arange(6) #6 intervals of 4 hours
by_time.plot(xticks=hourly_ticks, style=[':','--','-'])

Bike usage peaks around 8am and 5pm

In [None]:
#What does bike usage look like by day of the week?
by_weekday = data.groupby(data.index.dayofweek).mean()
by_weekday.index = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri', 'Sat', 'Sun']
by_weekday.plot(style=[':','--','-'])

Bike usage is highest during weekdays and drops off on weekends

In [None]:
#set conditions to show hourly trend on weekdays vs weekends
weekend = np.where(data.index.weekday < 5, 'Weekday', 'Weekend')
by_time = data.groupby([weekend, data.index.time]).mean()

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(14, 5))
by_time.loc['Weekday'].plot(ax=ax[0], title='Weekdays',
                           xticks=hourly_ticks, style=[':', '--', '-'])
by_time.loc['Weekend'].plot(ax=ax[1], title='Weekends',
                           xticks=hourly_ticks, style=[':', '--', '-']);