In [81]:
import pandas as pd
import pandas_datareader.data as pdr
from datetime import datetime
from dateutil import parser
from __future__ import print_function, division
import numpy as np
from pandas_datareader import data, wb
from pandas.tseries.offsets import BDay
import quandl

In [82]:
# Regular Expressions
monte = pd.Series([
    'Graham Chapman', 'John Cleese', 'Terry Gilliam', 'Eric Idle',
    'Terry Jones', 'Michael Palin'
])
monte.str.findall(r'^[^AEIOU].*[^aeiou]$')
# Vectorized item access and slicing
monte.str[0:3]
# Extract the last name of each entry
monte.str.split().str.get(-1)

0    Chapman
1     Cleese
2    Gilliam
3       Idle
4      Jones
5      Palin
dtype: object

In [83]:
# Indicator Variables
full_monte = pd.DataFrame({
    'name':
    monte,
    'info': ['B|C|D', 'B|D', 'A|C', 'B|D', 'B|C', 'B|C|D']
})
full_monte

Unnamed: 0,name,info
0,Graham Chapman,B|C|D
1,John Cleese,B|D
2,Terry Gilliam,A|C
3,Eric Idle,B|D
4,Terry Jones,B|C
5,Michael Palin,B|C|D


In [84]:
monte = pd.Series([
    'Graham Chapman', 'John Cleese', 'Terry Gilliam', 'Eric Idle',
    'Terry Jones', 'Michael Palin'
])
full_monte = pd.DataFrame({
    'name':
    monte,
    'info': ['B|C|D', 'B|D', 'A|C', 'B|D', 'B|C', 'B|C|D']
})
full_monte

Unnamed: 0,name,info
0,Graham Chapman,B|C|D
1,John Cleese,B|D
2,Terry Gilliam,A|C
3,Eric Idle,B|D
4,Terry Jones,B|C
5,Michael Palin,B|C|D


In [85]:
full_monte = pd.DataFrame({
    'name':
    monte,
    'info': ['B|C|D', 'B|D', 'A|C', 'B|D', 'B|C', 'B|C|D']
})
full_monte['info'].str.get_dummies('|')

Unnamed: 0,A,B,C,D
0,0,1,1,1
1,0,1,0,1
2,1,0,1,0
3,0,1,0,1
4,0,1,1,0
5,0,1,1,1


In [86]:
datetime(year=2015, month=7, day=4)
date = parser.parse("4th of July, 2015")
# Prints the day of the week of the parsed datetime
date.strftime('%A')

'Saturday'

In [87]:
## Typed Arrays of Times: NumPy's
date = np.array('2015-07-04', dtype=np.datetime64)
# Vectorized operation on a formatted date
date + np.arange(12)
np.datetime64('2015-07-04 12:59:59.50', 'ns')

numpy.datetime64('2015-07-04T12:59:59.500000000')

In [88]:
# Pandas Time Series: Indexing by Time
from __future__ import print_function, division
import pandas as pd
index = pd.DatetimeIndex(
    ['2014-07-04', '2014-08-04', '2015-07-04', '2015-08-04'])
data = pd.Series([0, 1, 2, 3], index=index)
data['2014-07-04':'2015-07-04']
data['2015']

2015-07-04    2
2015-08-04    3
dtype: int64

In [89]:
# Pandas Time Series Data Structures
from __future__ import print_function, division
from datetime import datetime
import pandas as pd
dates = pd.to_datetime([datetime(
    2015, 7, 3), '4th of July, 2015', '2015-Jul-6', '07-07-2015', '20150708'])
dates.to_period('D')
dates - dates[0]

TimedeltaIndex(['0 days', '1 days', '3 days', '4 days', '5 days'], dtype='timedelta64[ns]', freq=None)

In [90]:
# Frequencies and Offsets
#   Frequency Table:
'''Alias   Description
B       business day frequency
C       custom business day frequency (experimental)
D       calendar day frequency
W       weekly frequency
M       month end frequency
BM      business month end frequency
CBM     custom business month end frequency
MS      month start frequency
BMS     business month start frequency
CBMS    custom business month start frequency
Q       quarter end frequency
BQ      business quarter endfrequency
QS      quarter start frequency
BQS     business quarter start frequency
A       year end frequency
BA      business year end frequency
AS      year start frequency
BAS     business year start frequency
BH      business hour frequency
H       hourly frequency
T, min  minutely frequency
S       secondly frequency
L, ms   milliseonds
U, us   microseconds
N       nanoseconds'''
# This generates a frequency of 2 hours 30 minutes
pd.timedelta_range(0, periods = 9, freq = "2H30T")
#'B' == BDay() function
pd.date_range('2015-07-01', periods = 5, freq = 'B')

DatetimeIndex(['2015-07-01', '2015-07-02', '2015-07-03', '2015-07-06',
               '2015-07-07'],
              dtype='datetime64[ns]', freq='B')

In [91]:
# Resampling, Shifting, and Windowing
quandl.ApiConfig.api_key = "pSYtJfSUXGc2G-WGhKH_"
symbol = 'WIKI/AAPL'
df = quandl.get(symbol)
df.loc['2015-01-02']

Open           1.113900e+02
High           1.114400e+02
Low            1.073500e+02
Close          1.093300e+02
Volume         5.320463e+07
Ex-Dividend    0.000000e+00
Split Ratio    1.000000e+00
Adj. Open      1.058210e+02
Adj. High      1.058685e+02
Adj. Low       1.019829e+02
Adj. Close     1.038640e+02
Adj. Volume    5.320463e+07
Name: 2015-01-02 00:00:00, dtype: float64