In [1]:
import requests
from datetime import datetime
import json
import time
import pandas as pd
from time import mktime

pd.set_option('display.max_rows', 5000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

In [2]:
class PolgonData(object):

    def __init__(self):
        self.params=params = (('apiKey', 'M_PKVL_rqHZI7VM9ZYO_hwPiConz5rIklx893F'),)

    def PolygonLastTrades(self,symbol):
        # Make use of Tickers
        requesturl='https://api.polygon.io/v1/last/stocks/'+symbol
        response = requests.get(requesturl, params=self.params)
        return json.loads(response.text)

    def PolygonHistoricTrades(self, date=None, symbol=None,startTS=None,endTS=None,limitresult=10):
        if startTS:
            # For Getting Paginated Request
            requesturl='https://api.polygon.io/v2/ticks/stocks/nbbo/'+symbol+'/'+date+'?timestamp='+startTS+'&timestampLimit='+endTS+'&limit='+limitresult
            print("Paginated Request For = " + symbol)
        else:
            requesturl='https://api.polygon.io/v2/ticks/stocks/nbbo/'+symbol+'/'+date+'?timestampLimit='+endTS+'&limit='+limitresult
            print("First Request For = " + symbol)
        print(requesturl)
        response = requests.get(requesturl, params=self.params)
        return json.loads(response.text)
    
    def PolygonDailyOpenClose(self,date=None, symbol=None):
        requesturl='https://api.polygon.io/v1/open-close/'+symbol+'/'+date
        response = requests.get(requesturl, params=self.params)
        return json.loads(response.text)
    
    def PolygonAggregdateData(self):
        # Make use of Tickers, Date and Limit
        requesturl='https://api.polygon.io/v2/aggs/ticker/AAPL/range/1/minute/2020-02-14/2020-02-15'
        response = requests.get(requesturl, params=self.params)
        return json.loads(response.text)

In [3]:
from datetime import datetime

class DateTimeManipulation(object):
    
    def __init__(self,date=None):
        self.date=date

    # Returns timestamp with milliseconds
    def unix_time_millis(self,dt):
        epoch = datetime.utcfromtimestamp(0)
        tsDate=(dt - epoch).total_seconds() * 1000.0
        tsDate=str(int(tsDate))+'000000'
        return tsDate

    def stringTimeToDatetime(self,date=None,time=None):
        marketOpenTSStr = date +' '+ time
        return datetime.strptime(marketOpenTSStr,'%Y-%m-%d %H:%M:%S')
    
    def convertStringDateToTS(self,starttime='9:30:00',endtime='4:00:00'):
        marketOpenTSStr = self.date +' '+ starttime
        marketCloseTSStr = self.date +' ' + endtime
        
        marketTimeStamps={}
        marketTimeStamps['marketOpenTS']=self.unix_time_millis(datetime.strptime(marketOpenTSStr,'%Y-%m-%d %H:%M:%S'))
        marketTimeStamps['marketCloseTS']=self.unix_time_millis(datetime.strptime(marketCloseTSStr,'%Y-%m-%d %H:%M:%S'))
        return marketTimeStamps
    
    def getHumanTime(self,ts,getMilliSecondsAlso=False):
        try:
            s, ms = divmod(ts, 1000000000)
            if getMilliSecondsAlso:
                return datetime(*time.gmtime(s)[:6]),ms
            else:
                return datetime(*time.gmtime(s)[:6])
            #print('{}.{:03d}'.format(time.strftime('%Y-%m-%d %H:%M:%S',  time.gmtime(s)), ms))
        except AttributeError:
            print("Attribute Error Occured")
            print(ts)
            print(s)
            print(ms)
            

In [4]:
# Taking in ETF List
holdings=pd.read_csv("XLK-holdings20202002.csv")

holdings['Weighting']=holdings['Weighting'].apply(lambda x:x.replace('%',''))
holdings['Weighting']=holdings['Weighting'].astype(float)
holdings['Weighting']=holdings['Weighting']/100
weights=dict(zip(holdings.Symbol,holdings.Weighting))

cashvalue=holdings[holdings['Symbol']=='CASH'].get('Weighting').item()*28583351000

symbols=list(holdings['Symbol'].values)+['XLK']
symbols.remove('CASH')

# Process the ticker data

XLK: Technology Select Sector SPDR Fund <br>
Inception Date: 1998-12-16 <br>
Fund Holdings as of: 2020-02-21 <br>
"Total Assets Under Management (in thousands):	28583351" <br>
Shares Outstanding: 287356000 <br>
Expense Ratio: 0.13% <br>
Tracks This Index: Technology Select Sector Index <br>
ETFdb.com Category: Technology Equities <br>
Issuer: State Street SPDR <br>
Structure: ETF <br>

In [5]:
# Create an object of date when we need and time between which we need data
date='2020-02-20'
previousdate='2020-02-19'
starttime='9:30:00'
endtime='17:00:00'

class GetETFFrame(object):
    
    def __init__(self):
        self.tickHistData={}
        self.date=date
        self.starttime=starttime
        self.endtime=endtime
        endtimeLoop='16:00:00'
        self.extractDataTillTime = DateTimeManipulation().stringTimeToDatetime(date=self.date,time=endtimeLoop)
        self.datetimeObj=DateTimeManipulation(self.date)
        self.marketTimeStamps=self.datetimeObj.convertStringDateToTS(starttime=self.starttime,endtime=self.endtime)
    

    def getDataFromPolygon(self,symbol):
        # First Request
        data=PolgonData().PolygonHistoricTrades(date=self.date,symbol=symbol,endTS=self.marketTimeStamps['marketCloseTS'],limitresult=str(50000))
        # Last timestamp from data received
        lastUnixTimeStamp = data['results'][-1]['t']
        # Covert UNIX timestamp to human timestamp
        lastHumanTimeStamp = self.datetimeObj.getHumanTime(lastUnixTimeStamp)
        # Get timestamp for date +  '18:00:00' hrs - Make use of pagination
        # Paginated Request if the data from above doesn't reach 5 pm time
        while lastHumanTimeStamp < self.extractDataTillTime:
            print(self.datetimeObj.getHumanTime(data['results'][-1]['t']))
            data2=PolgonData().PolygonHistoricTrades(date=self.date,symbol=symbol,startTS=str(lastUnixTimeStamp),endTS=self.marketTimeStamps['marketCloseTS'],limitresult=str(50000))
            # Last timestamp from data received
            lastUnixTimeStamp = data2['results'][-1]['t']
            # Covert UNIX timestamp to human timestamp
            lastHumanTimeStamp = self.datetimeObj.getHumanTime(lastUnixTimeStamp)
            # Get timestamp for date +  '18:00:00' hrs - Make use of pagination
            data['results']=data['results'] + data2['results']
        self.tickHistData[symbol] = data
    
    def returnDf(self):
        return self.tickHistData
    
import concurrent.futures    
executor = concurrent.futures.ProcessPoolExecutor(20)
ob=GetETFFrame()
#futures = [executor.submit(ob.getDataFromPolygon, symbol) for symbol in symbols]
for symbol in symbols:
    ob.getDataFromPolygon(symbol)
#concurrent.futures.wait(futures)
tickHistData=ob.returnDf()

First Request For = MSFT
https://api.polygon.io/v2/ticks/stocks/nbbo/MSFT/2020-02-20?timestampLimit=1582218000000000000&limit=50000
2020-02-20 15:11:55
Paginated Request For = MSFT
https://api.polygon.io/v2/ticks/stocks/nbbo/MSFT/2020-02-20?timestamp=1582211515555489693&timestampLimit=1582218000000000000&limit=50000
First Request For = AAPL
https://api.polygon.io/v2/ticks/stocks/nbbo/AAPL/2020-02-20?timestampLimit=1582218000000000000&limit=50000
2020-02-20 15:05:25
Paginated Request For = AAPL
https://api.polygon.io/v2/ticks/stocks/nbbo/AAPL/2020-02-20?timestamp=1582211125781559869&timestampLimit=1582218000000000000&limit=50000
First Request For = V
https://api.polygon.io/v2/ticks/stocks/nbbo/V/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = MA
https://api.polygon.io/v2/ticks/stocks/nbbo/MA/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = INTC
https://api.polygon.io/v2/ticks/stocks/nbbo/INTC/2020-02-20?timestampLimit=1582218000

First Request For = WDC
https://api.polygon.io/v2/ticks/stocks/nbbo/WDC/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = SWKS
https://api.polygon.io/v2/ticks/stocks/nbbo/SWKS/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = CDW
https://api.polygon.io/v2/ticks/stocks/nbbo/CDW/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = HPE
https://api.polygon.io/v2/ticks/stocks/nbbo/HPE/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = KEYS
https://api.polygon.io/v2/ticks/stocks/nbbo/KEYS/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = MXIM
https://api.polygon.io/v2/ticks/stocks/nbbo/MXIM/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = FTNT
https://api.polygon.io/v2/ticks/stocks/nbbo/FTNT/2020-02-20?timestampLimit=1582218000000000000&limit=50000
First Request For = AKAM
https://api.polygon.io/v2/ticks/stocks/nbbo/AKAM/2020-02-

NameError: name 'lastprieforNaFilling' is not defined

In [6]:
lastprieforNaFilling={}
ob=PolgonData()
for symbol in symbols:
    lastprieforNaFilling[symbol]=ob.PolygonDailyOpenClose(previousdate,symbol)['close']

# Meaning of Each Columns
## Data Mapping 


 'i': {'name': 'indicators', 'type': '[]int' <br>
 's': {'name': 'bid_size', 'type': 'int'}, <br>
 'x': {'name': 'bid_exchange', 'type': 'int'}, <br>
 'P': {'name': 'ask_price', 'type': 'float64'}, <br>
 'S': {'name': 'ask_size', 'type': 'int'}, <br>
 't': {'name': 'sip_timestamp', 'type': 'int64'}, <br>
 'f': {'name': 'trf_timestamp', 'type': 'int64'}, <br>
 'c': {'name': 'conditions', 'type': '[]int'}, <br>
 'z': {'name': 'tape', 'type': 'int'}, <br>
 'X': {'name': 'ask_exchange', 'type': 'int'}, <br>
 'y': {'name': 'participant_timestamp', 'type': 'int64'}, <br>
 'q': {'name': 'sequence_number', 'type': 'int'}, <br>
 'p': {'name': 'bid_price', 'type': 'float64'} <br>
 
 ### Clean the Holdings data

In [7]:
finalDF=[]
appendData=[]
for key,value in tickHistData.items():
    df=pd.DataFrame.from_dict(value['results'])
    df['Symbol']=key
    appendData.append(df)

finalDF= pd.concat(appendData)

In [8]:
# Clean the DataFrame
finalDF['t']=finalDF['t'].apply(lambda x:DateTimeManipulation().getHumanTime(x,getMilliSecondsAlso=False))
finalDF['y']=finalDF['y'].apply(lambda x:DateTimeManipulation().getHumanTime(x,getMilliSecondsAlso=False))
finalDF['Spread']=finalDF['P']-finalDF['p']
finalDF['MidPrice']=(finalDF['P']+finalDF['p'])/2

In [9]:
dfGroupedByObject=finalDF.groupby([finalDF['t'].dt.hour,finalDF['t'].dt.minute,finalDF['Symbol']])['MidPrice'].mean()

In [65]:
df=dfGroupedByObject.unstack(level=2)

In [66]:
df=df.fillna(method='ffill')

In [67]:
df=df.fillna(lastprieforNaFilling)

In [97]:
for name,group in df.groupby(level=0):
    if name==12:
        break

In [98]:
etfprice=group['XLK']
del group['XLK']
group

Unnamed: 0_level_0,Symbol,AAPL,ACN,ADBE,ADI,ADP,ADS,ADSK,AKAM,AMAT,AMD,ANET,ANSS,APH,AVGO,BR,CDNS,CDW,CRM,CSCO,CTSH,CTXS,DXC,FFIV,FIS,FISV,FLIR,FLT,FTNT,GLW,GPN,HPE,HPQ,IBM,INTC,INTU,IPGP,IT,JKHY,JNPR,KEYS,KLAC,LDOS,LRCX,MA,MCHP,MSFT,MSI,MU,MXIM,NLOK,NOW,NTAP,NVDA,ORCL,PAYC,PAYX,PYPL,QCOM,QRVO,SNPS,STX,SWKS,TEL,TXN,V,VRSN,WDC,WU,XLNX,XRX,ZBRA
t,t,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1
12,0,322.756852,215.93625,384.233167,125.05,45.98,107.25,209.8275,66.75,67.388333,58.06825,222.51,297.435,97.375,315.62,117.36,74.1,132.0,192.95,46.151667,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,28.0,206.2985,14.553333,22.43,150.735,67.13,304.8475,61.505,153.935,170.85,24.185,99.086667,170.535,122.33,333.128333,344.593889,40.0,186.9275,187.2425,59.98,63.2,13.0,361.11,53.2025,314.35125,55.37,330.1925,83.5,123.583889,90.595,103.62,160.815,53.755,119.98125,93.29,133.4,213.3175,203.99,69.1875,25.7,45.5,37.05,116.5
12,1,322.882857,215.93625,383.99675,125.05,45.98,107.25,209.8275,66.75,67.388333,58.163182,222.51,297.435,97.375,315.62,117.36,74.1,132.0,192.95,46.118,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,28.0,206.2985,14.553333,22.43,150.735,67.13,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.128333,344.593889,40.0,186.9275,187.2425,59.956667,63.2,13.0,361.11,53.2025,314.333,55.37,330.1925,83.5,123.583889,90.595,103.62,161.35,53.755,119.98125,93.29,133.4,213.3175,203.99,69.495,25.7,45.5,37.05,116.5
12,2,322.9825,215.93625,383.995208,125.25,45.98,107.25,209.8275,66.75,67.388333,58.226957,222.51,297.435,97.375,315.62,117.36,74.1,132.0,192.95,46.118,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,28.0,206.2985,14.553333,22.43,150.735,67.13,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.375,344.593889,40.0,186.9275,187.2425,59.972222,63.2,13.0,361.11,53.2025,314.334091,55.37,330.1925,83.5,123.583889,90.595,103.62,161.466667,53.755,119.98125,93.29,133.4,213.3175,203.99,69.495,25.7,45.5,37.05,116.5
12,3,322.995714,215.93625,383.994286,125.25,45.98,107.25,209.8275,66.75,67.388333,58.283529,222.51,297.435,97.375,315.62,117.36,74.1,132.0,192.95,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,28.0,206.2985,14.553333,22.43,150.735,67.129,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.375,344.593889,40.0,186.9275,187.2425,60.075,63.2,13.0,361.11,53.2025,314.334091,55.37,330.1925,83.5,123.583889,90.595,103.62,160.983333,53.755,119.98125,93.29,133.4,213.3175,203.99,69.495,25.7,45.5,37.05,116.5
12,4,322.97375,215.93625,383.995625,125.25,45.98,107.25,209.8275,66.75,67.3,58.3365,222.51,297.435,97.375,315.62,117.36,74.1,132.0,192.95,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.96,67.130833,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.375,345.0,40.0,186.9275,187.2425,59.975,63.2,13.0,361.11,53.2025,314.7955,55.368571,330.1925,83.5,123.583889,90.58,103.62,162.075,53.755,119.98125,93.29,133.4,213.286,203.99,69.495,25.7,45.5,37.05,116.5
12,5,322.923,215.93625,383.996111,125.25,45.98,107.25,209.8275,66.75,67.3,58.325556,222.51,297.435,97.375,315.62,117.36,74.1,132.0,193.05,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.96,67.128333,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.375,345.0,40.0,186.86,187.2425,59.920714,63.2,13.0,361.11,53.2025,314.8485,55.450714,327.59,83.5,123.583889,90.58,103.62,162.075,53.755,119.98125,93.29,133.4,213.286,203.99,69.5,25.7,45.5,37.05,116.5
12,6,323.096818,215.93625,384.426429,125.25,45.98,107.25,209.8275,66.75,67.23,58.26125,222.51,297.435,97.375,315.62,117.36,74.1,132.0,193.05,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.96,67.128333,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,333.405,345.0,40.0,186.86,187.2425,59.920714,63.2,13.0,361.11,53.2025,314.746071,55.37,327.59,83.5,123.583889,90.58,103.62,162.075,53.755,119.98125,93.29,133.4,213.286,203.99,69.5,25.7,45.5,37.05,116.5
12,7,323.185556,215.93625,384.75,125.25,45.98,107.25,209.8275,66.75,67.31,58.271765,222.51,297.435,97.375,315.62,117.36,74.1,132.0,193.05,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.976667,67.128333,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,334.0,345.0,40.0,186.89625,187.2425,59.920714,63.2,13.0,361.11,53.2025,314.596875,55.37,327.59,83.5,123.583889,90.58,103.62,162.075,53.755,119.98125,93.29,133.4,213.286,203.99,69.5,25.7,45.5,37.05,116.5
12,8,323.132,215.93625,384.75,125.25,45.98,107.25,209.8275,66.75,67.31,58.274737,222.51,297.435,97.375,315.62,117.36,74.1,132.0,193.05,46.15,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.976667,67.128333,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,334.0,345.0,40.0,186.925,187.2425,60.003333,63.2,13.0,361.11,53.2025,314.5635,55.37,327.59,83.5,124.1405,90.58,103.62,162.075,53.755,119.98125,93.29,133.4,213.365,203.99,69.5,25.7,45.5,37.05,116.5
12,9,323.153333,215.93625,384.75,125.25,45.98,107.25,209.8275,66.75,67.31,58.267778,222.51,297.435,97.375,315.62,117.36,74.1,132.0,193.05,46.155,68.55,57.89,29.336667,126.765,157.19,123.455,50.0,125.0,119.529,27.85,206.2985,14.553333,22.43,150.976667,67.128333,304.8475,61.505,153.935,170.85,24.185,99.125,170.535,122.33,334.0,345.0,40.0,186.925,187.2425,60.03,63.2,13.0,361.11,53.2025,314.289286,55.37,327.59,83.5,124.1405,90.58,103.62,162.075,53.755,119.1,93.29,133.4,213.365,203.99,69.625,25.7,45.5,37.05,116.5


In [99]:
group=group.pct_change()*100
group=group.dropna()

In [None]:
etfpricechange=etfprice.pct_change().dropna()*100
etfpricechange=etfpricechange.unstack(level=1)

netassetvaluereturn=group.assign(**weights).mul(group).sum(axis=1)
netassetvaluereturn=netassetvaluereturn.unstack(level=1)


In [93]:
ds=pd.concat([etfprice.unstack(level=1),etfpricechange,netassetvaluereturn],axis=0).T
ds.columns=['ETF Price','ETF Change Price %','Net Asset Value Change%']

In [94]:
ds['Arbitrage in $']=(ds['ETF Change Price %'] - ds['Net Asset Value Change%'])*ds['ETF Price']/100
ds['ETF Trading Spread in $']=etfspread.unstack(level=1).loc[15]

In [96]:
ds

Unnamed: 0_level_0,ETF Price,ETF Change Price %,Net Asset Value Change%,Arbitrage in $,ETF Trading Spread in $
t,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,102.589615,,,,0.009435
1,102.62,0.029618,0.007342,0.022859,0.009819
2,102.623571,0.00348,0.009072,-0.005738,0.010152
3,102.645978,0.021834,0.004279,0.018019,0.009465
4,102.665147,0.018675,0.008517,0.010428,0.009684
5,102.665147,0.0,-0.008898,0.009136,0.009592
6,102.6675,0.002292,0.008152,-0.006016,0.009718
7,102.66,-0.007305,0.012901,-0.020744,0.00995
8,102.65,-0.009741,0.012335,-0.022661,0.009695
9,102.65,0.0,-0.002012,0.002065,0.009765
