In [1]:
import pandas as pd
import numpy as np
from etfdata import etf_marketCap, etf_cash, etf_index_mapping

In [2]:
def getProjectedUniverseDF(etf_name):
    
    # Reading ICE Data For Projected Universe
    projected_universe_df = pd.read_excel('Data/{}-Projected.xlsx'.format(etf_index_mapping[etf_name]), skiprows=[0])
    
    # When reading data, below strings are read, they must be marked as np.nan
    msg1 = 'Any unauthorized use or disclosure is prohibited. Nothing herein should in any way be deemed to alter the legal rights and obligations contained in agreements between any ICE Data Services entity ("ICE") and their clients relating to any of the Indices or products or services described herein. The information provided by ICE and contained herein is subject to change without notice and does not constitute any form of representation, or undertaking.  ICE and its affiliates make no warranties whatsoever, either express or implied, as to merchantability, fitness for a particular purpose, or any other matter in connection with the information provided. Without limiting the foregoing, ICE and its affiliates makes no representation or warranty that any information provided hereunder are complete or free from errors, omissions, or defects. All information provided by ICE is owned by or licensed to ICE. ICE retains exclusive ownership of the ICE Indices, including the ICE BofAML Indexes, and the analytics used to create this analysis ICE may in its absolute discretion and without prior notice revise or terminate the ICE information and analytics at any time. The information in this analysis is for internal use only and redistribution of this information to third parties is expressly prohibited.'
    msg2 = 'Neither the analysis nor the information contained therein constitutes investment advice or an offer  or an invitation to make an offer  to buy or sell any securities or any options  futures or other derivatives related to such securities. The information and calculations contained in this analysis have been obtained from a variety of sources  including those other than ICE and ICE does not guarantee their accuracy.  Prior to relying on any ICE information and/or the execution of a security trade based upon such ICE information, you are advised to consult with your broker or other financial representative to verify pricing information. There is no assurance that hypothetical results will be equal to actual performance under any market conditions. THE ICE INFORMATION IS PROVIDED TO THE USERS "AS IS." NEITHER ICE, NOR ITS AFFILIATES, NOR ANY THIRD PARTY DATA PROVIDER WILL BE LIABLE TO ANY USER OR ANYONE ELSE FOR ANY INTERRUPTION, INACCURACY, ERROR OR OMISSION, REGARDLESS OF CAUSE, IN THE ICE INFORMATION OR FOR ANY DAMAGES RESULTING THEREFROM. In no event shall ICE or any of its affiliates, employees  officers  directors or agents of any such persons have any liability to any person or entity relating to or arising out of this information, analysis  or the indices  contained herein.'
    
    # Projected Universe Data
    projected_universe_df = projected_universe_df.replace('NaN', np.nan)
    projected_universe_df = projected_universe_df.replace(msg1, np.nan)
    projected_universe_df= projected_universe_df.replace(msg2, np.nan)

    #drop row if ISIN number is Nan
    projected_universe_df.dropna(subset=['ISIN number'], inplace=True)

    # Storing only the required columns
    projected_universe_df = projected_universe_df[['ISIN number', '% Mkt Value']]
    
    projected_universe_df.rename(columns={'% Mkt Value':'Projected % Mkt Cap'}, inplace=True)
    return projected_universe_df

In [3]:
def getETFWebsiteData(tickerDatabase_df, etf_name):
    # PFFD Website Data
    
    # Skipping the first 2 rows and last row
    etf_currentHoldings = pd.read_csv('Data/{}_Website_Data.csv'.format(etf_name), skiprows=2)
    etf_currentHoldings = etf_currentHoldings[:-1]
    
    # Keeping only the required Columns
    etf_currentHoldings = etf_currentHoldings[['SEDOL', 'Shares Held']]
    
    # To get the ISIN by merging on 'SEDOL'
    etf_currentHoldings = etf_currentHoldings.merge(tickerDatabase_df[~tickerDatabase_df['SEDOL'].isna()], left_on='SEDOL', right_on='SEDOL', how='left')
    etf_currentHoldings.rename(columns={'Shares Held':'Current {} Shares'.format(etf_name)}, inplace=True)
    etf_currentHoldings['Current {} Shares'.format(etf_name)] = etf_currentHoldings['Current {} Shares'.format(etf_name)].str.replace(',','').astype(float)
    etf_currentHoldings = etf_currentHoldings[['ISIN', 'Current {} Shares'.format(etf_name)]]
    return etf_currentHoldings

In [4]:
def getTickerDatabase():
    tickerDatabase_df = pd.read_excel('Static Data/TickerDatabase.xlsx')
    tickerDatabase_df = tickerDatabase_df[['ISIN', 'SEDOL', 'Ticker', 'Last Price']]
    
    return tickerDatabase_df

In [5]:
def getOutputDF(etf_currentHoldings, projected_universe_df, tickerDatabase_df, etf_name):
    
    # DataFrame for our final Output
    etf_df = pd.DataFrame()
    
    # To get all the unique ISIN
    unique_ISIN = pd.concat([etf_currentHoldings['ISIN'], projected_universe_df['ISIN number']]).drop_duplicates().reset_index(drop=True)
    
    etf_df['ISIN'] = unique_ISIN
    etf_df.dropna(subset=['ISIN'], inplace=True)
    etf_df = etf_df.merge(etf_currentHoldings[['ISIN', 'Current {} Shares'.format(etf_name)]], left_on='ISIN', right_on='ISIN', how='left')
    etf_df = etf_df.merge(projected_universe_df, left_on='ISIN', right_on='ISIN number', how='left').drop(columns=['ISIN number'], axis=1)
    etf_df = etf_df.merge(tickerDatabase_df[['ISIN', 'Ticker', 'Last Price']], left_on='ISIN', right_on='ISIN', how='left')


    etf_df.fillna(0, inplace=True)
    
    etf_df['Projected {} Shares'.format(etf_name)] = etf_df.apply(lambda x: getProjectedShares(x, etf_name), axis=1)
    etf_df['Difference'] = etf_df.apply(lambda x: getDifference(x, etf_name), axis=1)
    etf_df.sort_values(by='Difference', key=abs, ascending=False, inplace=True)
    
    return etf_df

In [6]:
def getProjectedShares(x, etf_name):
    if(not isinstance(x['Last Price'], str) and x['Last Price']!=0):
        return np.rint(((etf_marketCap[etf_name]-etf_cash[etf_name])*x['Projected % Mkt Cap'])/(x['Last Price']*100))
    return 0

def getDifference(x, etf_name):
    if((not isinstance(x['Projected {} Shares'.format(etf_name)], str)) and (not isinstance(x['Current {} Shares'.format(etf_name)], str))):
        return x['Projected {} Shares'.format(etf_name)]-x['Current {} Shares'.format(etf_name)]
    return np.nan

In [7]:
def writeToFile(etf_df, etf_name):
    # Output File Name
    outputFile = 'Output/ETF/{} ETF vs ICE.xlsx'.format(etf_name)
    
    with pd.ExcelWriter(outputFile, mode="w", engine='xlsxwriter') as writer:
        etf_df.to_excel(writer, sheet_name=etf_name, columns=['ISIN', 'Ticker', 'Last Price', 'Projected % Mkt Cap', 'Current {} Shares'.format(etf_name), 'Projected {} Shares'.format(etf_name), 'Difference'])
        
        # Formatting
        workbook = writer.book
        worksheet = writer.sheets[etf_name]

        cellFormat = workbook.add_format({'num_format': '#,##0'})
        worksheet.set_column('F:H', 10, cellFormat)

In [8]:
def main(etf_name):
    
    index_name = etf_index_mapping[etf_name]
    
    tickerDatabase_df = getTickerDatabase()
    projected_universe_df = getProjectedUniverseDF(etf_name)
    etf_currentHoldings = getETFWebsiteData(tickerDatabase_df, etf_name)
    etf_df = getOutputDF(etf_currentHoldings, projected_universe_df, tickerDatabase_df, etf_name)
    writeToFile(etf_df, etf_name)

In [9]:
# main('PFFD')
main('PFFV')

In [10]:
getETFWebsiteData(getTickerDatabase(), 'PFFV')

Unnamed: 0,ISIN,Current PFFV Shares
0,US38144G8042,512281.0
1,US0605051798,381354.0
2,US9029738666,11872.0
3,US9497464654,322181.0
4,US61761J4067,307225.0
...,...,...
66,US64828T3005,57516.0
67,US00123Q8565,50591.0
68,US6496048819,56691.0
69,US4884013081,55552.0
