#### Package Import

In [1]:
import sys
import os
import numpy as np
import pandas as pd
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath('"Nvidia Holder Analysis - Querying Data.ipynb"')))))
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath('"Nvidia Holder Analysis - Querying Data.ipynb"')))))
import apis.pa.mypaengine as pa
import apis.qe.myqengine as qe
import warnings
warnings.filterwarnings("ignore")

Python-dotenv could not parse statement starting at line 5
Python-dotenv could not parse statement starting at line 5


### Screening for IDs & Characteristics using Quant Engine

#### 1. Get All Mutual Funds that Hold Nvidia

##### a. Query data using FQL

In [2]:
#set universe to NVDA-US
univ = qe.IdUniverse(ids = ['NVDA-US'],
                    universe_type ='Equity')

#set to latest month end
time_series = qe.TimeSeries(start_date='-2M',
                            end_date = '0M')

#Define FQL Formulas
formulas = ['OS_TOP_HLDR_ID(ALL,#DATE,,M,,M)','OS_TOP_HLDR_NAME(ALL,#DATE,,M,,M,,"EN")','OS_TOP_HLDR_MV(ALL,#DATE,,M,,M,SEC,USD)']

#Calculate
q_req = qe.calculate(universe=univ, dates = time_series, formulas=formulas,is_array_return_type=True,source= 'FqlExpression')


##### b. Data prep

In [3]:
df_temp = q_req.data.copy()

#clean up table
df_temp.rename(columns ={'OS_TOP_HLDR_ID(ALL,#DATE,,M,,M)':'hldr_id',
                    'OS_TOP_HLDR_NAME(ALL,#DATE,,M,,M,,"EN")':'hldr_name',
                    'OS_TOP_HLDR_MV(ALL,#DATE,,M,,M,SEC,USD)':'position_mv',
                    },inplace=True)
df_temp = df_temp.set_index(['DATE'])
                    

#Expand arrays and rejoin
df = pd.concat([df_temp['hldr_id'].explode() ,df_temp['hldr_name'].explode(),df_temp['position_mv'].explode()],axis=1)
df=df.reset_index().set_index(['DATE','hldr_id'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,hldr_name,position_mv
DATE,hldr_id,Unnamed: 2_level_1,Unnamed: 3_level_1
20230630,M4004630,Vanguard Total Stock Market ETF,30989909233.66
20230630,M4004543,Vanguard 500 Index Fund,24909466708.88
20230630,M4017554,Invesco QQQ Trust,13895778480.74
20230630,M4006564,SPDR S&P 500 ETF Trust,11899983657.38
20230630,M4001597,Fidelity 500 Index Fund,11813666426.38


#### 2. Get exchange ticker and other additional data items for holder IDs returned

##### a. Query data using FQL

In [4]:
#Set universe to hldr_ids from previous step
fund_univ = qe.IdUniverse(ids = df.index.get_level_values(1).unique().to_list(),universe_type ='Equity')

#Set Formulas using Holder Formulas
formulas = ['OS_HLDR_MSTYLE','OS_HLDR_MF_FAMILY','FFD_AUM(#DATE,,M,USD)','STRING(FIRST_ITEM_AV(OS_FUND_TICKER))']

#Calculate
q_req = qe.calculate(universe=fund_univ, dates = time_series, formulas=formulas,source= 'FqlExpression')

In [5]:
#Clean up table
dff_arc = q_req.data.copy()

In [6]:
dff = dff_arc.copy()

dff.rename(columns= {"UNIVERSE":"hldr_id",
                    'OS_HLDR_MSTYLE':'style',
                    'OS_HLDR_MF_FAMILY':'fund_family',
                    'STRING(FIRST_ITEM_AV(OS_FUND_TICKER))':'ticker',
                    'FFD_AUM(#DATE,,M,USD)':'aum',
                    },inplace=True)
dff.set_index(['DATE','hldr_id'],inplace=True)
dff

Unnamed: 0_level_0,Unnamed: 1_level_0,style,fund_family,aum,ticker
DATE,hldr_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
20230630,M4004630,Generalist,Vanguard Funds,1.348484e+12,VTSMX
20230630,M4004543,Generalist,Vanguard Funds,8.842364e+11,VFINX
20230630,M4017554,Generalist,PowerShares Funds,2.004900e+11,QQQ
20230630,M4006564,Generalist,SPDR Funds,4.240008e+11,SPY
20230630,M4001597,Index,@NA,4.197272e+11,FXAIX
...,...,...,...,...,...
20230831,M4257447,Generalist,@NA,5.075026e+06,CBLS
20230831,M23431828,Generalist,@NA,1.967357e+07,440340
20230831,M26189410,Growth,Congress Funds,1.296647e+06,CAML
20230831,M21374403,Generalist,Smart Sentiment Funds,5.891182e+05,OAIE


##### b. Data prep

In [7]:

#join datasets
dff = dff.join(df,how='left')
#calculate 
dff['weight_in_nvda'] =dff['position_mv'] /dff['aum']

#Replace @NA Groupings with Other
dff['fund_family']=dff['fund_family'].replace('@NA','Other')
dff['style']=dff['style'].replace('@NA','Other')
dff = dff[dff['ticker']!='@NA']
#Clean up table
dff[dff['ticker']!='']
dff = dff.replace('',np.nan)
dff = dff.dropna()


#preview
dff.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,style,fund_family,aum,ticker,hldr_name,position_mv,weight_in_nvda
DATE,hldr_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
20230630,M4004630,Generalist,Vanguard Funds,1348484000000.0,VTSMX,Vanguard Total Stock Market ETF,30989910000.0,0.022981
20230630,M4004543,Generalist,Vanguard Funds,884236400000.0,VFINX,Vanguard 500 Index Fund,24909470000.0,0.028171
20230630,M4017554,Generalist,PowerShares Funds,200490000000.0,QQQ,Invesco QQQ Trust,13895780000.0,0.069309
20230630,M4006564,Generalist,SPDR Funds,424000800000.0,SPY,SPDR S&P 500 ETF Trust,11899980000.0,0.028066
20230630,M4001597,Index,Other,419727200000.0,FXAIX,Fidelity 500 Index Fund,11813670000.0,0.028146


In [24]:
#Set universe to hldr_ids from previous step
fund_univ = qe.IdUniverse(ids = list(dff.ticker.unique()),universe_type ='Equity')

#Set Formulas using Holder Formulas
formulas = ['FFD_CLASS_FOCUS(TEXT)','FFD_CLASS_NICHE(TEXT)','FFD_CLASS_CAT(TEXT)']

#Calculate
q_req = qe.calculate(universe=fund_univ, dates = qe.TimeSeries(start_date = '0M',end_date = '0M'), formulas=formulas,source= 'ScreeningExpression')
q_req.data

Unnamed: 0,DATE,UNIVERSE,FFD_CLASS_FOCUS(TEXT),FFD_CLASS_NICHE(TEXT),FFD_CLASS_CAT(TEXT)
0,20230831,VTSMX,Total Market,Broad-based,Size and Style
1,20230831,VFINX,Large Cap,Broad-based,Size and Style
2,20230831,QQQ,Large Cap,Broad-based,Size and Style
3,20230831,SPY,Large Cap,Broad-based,Size and Style
4,20230831,FXAIX,Large Cap,Broad-based,Size and Style
...,...,...,...,...,...
4515,20230831,CBLS,Long/Short,Long/Short,Hedge Fund Strategies
4516,20230831,440340,,,
4517,20230831,CAML,Large Cap,Growth,Size and Style
4518,20230831,OAIE,Long/Short,Event-driven,Hedge Fund Strategies


In [25]:
df_c = q_req.data.rename(columns = {"UNIVERSE":'ticker',
                                    "FFD_CLASS_FOCUS(TEXT)":"focus",
                                    "FFD_CLASS_NICHE(TEXT)":"niche",
                                    "FFD_CLASS_CAT(TEXT)":'category'})

df_c = df_c.reset_index().set_index('ticker').drop(columns = ["DATE","index"])
df_c

Unnamed: 0_level_0,focus,niche,category
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
VTSMX,Total Market,Broad-based,Size and Style
VFINX,Large Cap,Broad-based,Size and Style
QQQ,Large Cap,Broad-based,Size and Style
SPY,Large Cap,Broad-based,Size and Style
FXAIX,Large Cap,Broad-based,Size and Style
...,...,...,...
CBLS,Long/Short,Long/Short,Hedge Fund Strategies
440340,,,
CAML,Large Cap,Growth,Size and Style
OAIE,Long/Short,Event-driven,Hedge Fund Strategies


In [27]:
dff= dff.join(df_c,how='left',on='ticker')

In [28]:
dff.to_pickle('../../data/Nvidia Holder Analysis/full_dataset_t3m.pkl')