In [24]:
# New functions to grab monthly open price data from yahoo
# Works as of April 2018
# Cleaning function for use with pyfolio

import pandas as pd
import datetime
import requests
def get_yahoo_finance_data(symbol, start=None, end=None, interval='1d'):
    end = pd.to_datetime(end) or datetime.datetime.utcnow()
    start = pd.to_datetime(start) or end - datetime.timedelta(days=24)
    url = 'https://query1.finance.yahoo.com/v8/finance/chart/{symbol}?symbol={symbol}&period1={start}&period2={end}&interval={interval}&includePrePost=true&events=div%7Csplit%7Cearn&corsDomain=finance.yahoo.com'.format(
        symbol=symbol,
        start=start.strftime('%s'),
        end=end.strftime('%s'),
        interval=interval
    )
    resp = requests.get(url)
    resp.raise_for_status()
    
    rt = resp.json()
    
    return resp.json()

def get_correct_data_format(resp_json):
    symbol = rtjson['chart']['result'][0]['meta']['symbol']
    timestamps = rtjson['chart']['result'][0]['timestamp']
    open_price = rtjson['chart']['result'][0]['indicators']['quote'][0]['open']
    close_price = rtjson['chart']['result'][0]['indicators']['quote'][0]['close']
    
    ts = pd.DataFrame({'open' : pd.Series(open_price, index=pd.to_datetime(timestamps, unit='s')),
                       'close' : pd.Series(close_price, index=pd.to_datetime(timestamps, unit='s'))})
    ts.name = symbol
    ts.index = pd.to_datetime(ts.index, unit='s')
    
    return ts
    
def get_symbol_returns_custom(new_df, symbol):
    rets = new_df[['close']]
    rets = rets.shift(-1)
    rets.iloc[-1]['close'] = new_df.tail(1)['open']
    rets = rets.shift(1) / rets - 1
    rets = rets.dropna()
    rets['tick_datetime'] = pd.to_datetime(rets.index)   
    rets.set_index('tick_datetime',inplace=True)
    rets.index = rets.index.tz_localize("UTC")
    rets.columns = [new_df.name]
    rets = pd.Series(rets[symbol])
    return rets
    

In [19]:
rtjson = get_yahoo_finance_data('FB', start='2012-05-22', end='2014-05-16')

In [21]:
its = get_correct_data_format(rtjson)

In [25]:
ts = get_symbol_returns_custom(its, 'FB')

In [26]:
ts.head()

tick_datetime
2012-05-23 13:30:00+00:00   -0.031184
2012-05-24 13:30:00+00:00    0.035099
2012-05-25 13:30:00+00:00    0.106449
2012-05-29 13:30:00+00:00    0.023058
2012-05-30 13:30:00+00:00   -0.047635
Name: FB, dtype: float64

In [27]:
[ts.size, ts.name, ts.index, ts.dtype]

[497,
 'FB',
 DatetimeIndex(['2012-05-23 13:30:00+00:00', '2012-05-24 13:30:00+00:00',
                '2012-05-25 13:30:00+00:00', '2012-05-29 13:30:00+00:00',
                '2012-05-30 13:30:00+00:00', '2012-05-31 13:30:00+00:00',
                '2012-06-01 13:30:00+00:00', '2012-06-04 13:30:00+00:00',
                '2012-06-05 13:30:00+00:00', '2012-06-06 13:30:00+00:00',
                ...
                '2014-05-02 13:30:00+00:00', '2014-05-05 13:30:00+00:00',
                '2014-05-06 13:30:00+00:00', '2014-05-07 13:30:00+00:00',
                '2014-05-08 13:30:00+00:00', '2014-05-09 13:30:00+00:00',
                '2014-05-12 13:30:00+00:00', '2014-05-13 13:30:00+00:00',
                '2014-05-14 13:30:00+00:00', '2014-05-15 13:30:00+00:00'],
               dtype='datetime64[ns, UTC]', name='tick_datetime', length=497, freq=None),
 dtype('float64')]