## This code illustrates how to get S&P500 index constituents and their identifiers from CRSP and Compustat
- Researchers used to be able to extract index membership information from Compustat's "comp.idxcst_his" data. Now that S&P pulled this piece of data off from WRDS platform, we have to turn to CRSP to get S&P500 Index membership data. 
- Unfortunately, there is no easy way to uncover constituents info for the other indices covered by "comp.idxcst_his".

In [1]:
##########################################
# S&P 500 Index Constituents             #
# Qingyi (Freda) Song Drechsler          #
# Date: October 2020                     #
##########################################

import pandas as pd
import wrds

In [11]:
import numpy as np

### Step 1: Connect to WRDS

In [4]:
###################
# Connect to WRDS #
###################
conn=wrds.Connection()

Enter your WRDS username [ec2-user]: ly229
Enter your password: ········


WRDS recommends setting up a .pgpass file.


Create .pgpass file now [y/n]?:  y


Created .pgpass file successfully.
You can create this file yourself at any time with the create_pgpass_file() function.
Loading library list...
Done


### Step 2: Get SP500 Index Membership from CRSP
- I opt for the monthly frequency of the data, but one can choose to work with crsp.dsp500list if more precise date range is needed.

In [3]:
sp500 = conn.raw_sql("""
                        select 
                        a.*, b.date, b.ret
                        
                        from crsp.msp500list as a,
                        crsp.msf as b
                        
                        where a.permno=b.permno
                        and b.date >= a.start and b.date<= a.ending
                        and b.date>='01/01/2000'
                        order by date;
                        """, date_cols=['start', 'ending', 'date'])

In [12]:
sp500.head()

Unnamed: 0,permno,start,ending,date,ret
0,75175.0,1996-12-03,2009-12-18,2000-01-31,-0.052071
1,22840.0,1981-09-10,2012-06-28,2000-01-31,-0.164306
2,79973.0,1998-04-27,2006-07-31,2000-01-31,-0.150911
3,12431.0,1957-03-01,2009-06-30,2000-01-31,-0.145289
4,75789.0,1992-01-02,2008-09-10,2000-01-31,0.066401


### Step 3: Add Other Company Identifiers from CRSP.MSENAMES
- You don't need this step if only PERMNO is required
- This step aims to add TICKER, SHRCD, EXCHCD and etc. 

In [4]:
# Add Other Descriptive Variables

mse = conn.raw_sql("""
                        select comnam, ncusip, namedt, nameendt, 
                        permno, shrcd, exchcd, hsiccd, ticker
                        from crsp.msenames
                        """, date_cols=['namedt', 'nameendt'])

# if nameendt is missing then set to today date
mse['nameendt']=mse['nameendt'].fillna(pd.to_datetime('today'))

In [6]:
mse.sample(5)

Unnamed: 0,comnam,ncusip,namedt,nameendt,permno,shrcd,exchcd,hsiccd,ticker
45668,PENN MEDICAL TECHNOLOGY INC,70754810,1984-04-03,1984-10-16,61939.0,10.0,3.0,3843.0,PEMD
100555,SIMCLAR INC,82859910,2006-06-30,2008-04-24,92340.0,11.0,3.0,3670.0,SIMC
16891,CAMBRIA E T F TRUST,13206185,2017-05-24,2017-12-25,16740.0,73.0,4.0,6726.0,CCOR
64564,BUILDERS WAREHOUSE ASSOC INC,11991630,1995-05-10,1995-06-29,79154.0,11.0,3.0,5210.0,BWAI
31856,SHATTUCK DENN MNG CORP,82024210,1968-01-02,1973-05-22,36433.0,10.0,2.0,3499.0,SDE


In [5]:
# Merge with SP500 data
sp500_full = pd.merge(sp500, mse, how = 'left', on = 'permno')

# Impose the date range restrictions
sp500_full = sp500_full.loc[(sp500_full.date>=sp500_full.namedt) \
                            & (sp500_full.date<=sp500_full.nameendt)]
sp500_full.sample(5)

Unnamed: 0,permno,start,ending,date,ret,comnam,ncusip,namedt,nameendt,shrcd,exchcd,hsiccd,ticker
263452,13856,1957-03-01,2023-12-29,2005-10-31,0.041792,PEPSICO INC,71344810,2004-06-10,2017-12-19,11,1,2086,PEP
607800,70578,1989-01-12,2023-12-29,2013-04-30,0.055375,ECOLAB INC,27886510,2004-06-10,2017-03-09,11,1,2841,ECL
123533,83332,1996-10-01,2006-11-30,2002-09-30,-0.560694,LUCENT TECHNOLOGIES INC,54946310,2002-01-02,2004-02-19,11,1,3661,LU
811368,77605,1995-02-24,2023-12-29,2017-10-31,-0.03531,BOSTON SCIENTIFIC CORP,10113710,2004-06-10,2023-12-29,11,1,3841,BSX
414416,76149,1998-11-13,2015-01-26,2009-01-30,-0.098443,SAFEWAY INC,78651420,2004-06-10,2015-01-29,11,1,5411,SWY


### Step 4: Add Compustat Identifiers
- Link with Compustat's GVKEY and IID if need to work with fundamental data
- Linkage is done through crsp.ccmxpf_linktable

In [6]:
# Linking with Compustat through CCM

ccm=conn.raw_sql("""
                  select gvkey, liid as iid, lpermno as permno, linktype, linkprim, 
                  linkdt, linkenddt
                  from crsp.ccmxpf_linktable
                  where substr(linktype,1,1)='L'
                  and (linkprim ='C' or linkprim='P')
                  """, date_cols=['linkdt', 'linkenddt'])

# if linkenddt is missing then set to today date
ccm['linkenddt']=ccm['linkenddt'].fillna(pd.to_datetime('today'))

In [7]:
# Merge the CCM data with S&P500 data
# First just link by matching PERMNO
sp500ccm = pd.merge(sp500_full, ccm, how='left', on=['permno'])

# Then set link date bounds
sp500ccm = sp500ccm.loc[(sp500ccm['date']>=sp500ccm['linkdt'])\
                        &(sp500ccm['date']<=sp500ccm['linkenddt'])]
sp500ccm.sample(5)

Unnamed: 0,permno,start,ending,date,ret,comnam,ncusip,namedt,nameendt,shrcd,exchcd,hsiccd,ticker,gvkey,iid,linktype,linkprim,linkdt,linkenddt
153045,92121,2007-07-02,2023-12-29,2019-12-31,-0.000589,DISCOVER FINANCIAL SERVICES,25470910,2017-04-26,2020-02-17,11,1,6022,DFS,177376,2,LC,P,2007-07-02,2024-10-27 03:21:48.429932
103593,86725,2008-11-24,2014-01-23,2013-04-30,0.140183,LIFE TECHNOLOGIES CORP,53217V10,2008-11-24,2014-02-03,11,3,2836,LIFE,118577,1,LC,P,1999-02-26,2014-02-03 00:00:00.000000
32294,37584,1998-04-29,2023-12-29,2003-12-31,0.090215,FRANKLIN RESOURCES INC,35461310,2002-01-02,2004-02-16,11,1,6282,BEN,4885,1,LU,P,1983-09-23,2024-10-27 03:21:48.429932
116852,19502,1979-10-11,2023-12-29,2015-02-27,0.131085,WALGREENS BOOTS ALLIANCE INC,93142710,2014-12-31,2023-12-29,11,3,5912,WBA,11264,1,LU,P,1962-01-31,2024-10-27 03:21:48.429932
80948,38703,1976-07-01,2023-12-29,2010-04-30,0.063946,WELLS FARGO & CO NEW,94974610,2004-06-10,2012-10-11,11,1,6021,WFC,8007,1,LC,P,1962-12-10,2024-10-27 03:21:48.429932


In [8]:
# Rearrange columns for final output

sp500ccm = sp500ccm.drop(columns=['namedt', 'nameendt', \
                                  'linktype', 'linkprim', 'linkdt', 'linkenddt'])
sp500ccm = sp500ccm[['date', 'permno', 'comnam', 'ncusip', 'shrcd', 'exchcd', 'hsiccd', 'ticker', \
                     'gvkey', 'iid', 'start', 'ending', 'ret']]
sp500ccm.sample(5)

Unnamed: 0,date,permno,comnam,ncusip,shrcd,exchcd,hsiccd,ticker,gvkey,iid,start,ending,ret
80323,2010-03-31,85032,QWEST COMMUNICATIONS INTL INC,74912110,11,1,4813,Q,61489,1,2000-07-06,2011-03-31,0.144737
55833,2007-01-31,23660,CINTAS CORP,17290810,11,3,2320,CTAS,3062,1,2001-03-01,2023-12-29,0.036263
82562,2010-07-30,23501,PEPCO HOLDINGS INC,71329110,11,1,4931,POM,8694,1,2007-11-09,2016-03-29,0.078444
4015,2000-06-30,48485,JEFFERSON PILOT CORP,47507010,11,1,6311,JP,6241,1,1976-07-01,2006-03-31,-0.177596
39995,2004-12-31,76557,M B N A CORP,55262L10,11,1,6021,KRB,23592,1,1992-04-23,2005-12-30,0.065889


In [9]:
cnt = sp500ccm.groupby(['date'])['permno'].count().reset_index().rename(columns={'permno':'npermno'})
cnt.sample(4)

Unnamed: 0,date,npermno
251,2020-12-31,501
100,2008-05-30,500
197,2016-06-30,501
258,2021-07-30,500


In [22]:
sp500ccm.sample(10)

Unnamed: 0,date,permno,comnam,ncusip,shrcd,exchcd,hsiccd,ticker,gvkey,iid,start,ending,ret
113348,2014-11-28,26825.0,KELLOGG CO,48783610,11.0,1.0,2043.0,K,6375,1,1961-04-26,2019-12-31,0.043465
507,2000-01-31,80599.0,LEHMAN BROTHERS HOLDINGS INC,52490810,11.0,1.0,6282.0,LEH,30128,1,1998-01-12,2008-09-16,-0.157934
60387,2007-10-31,40272.0,INTERNATIONAL FLAVORS & FRAG INC,45950610,11.0,1.0,2869.0,IFF,6078,1,1976-03-04,2019-12-31,-0.012297
50310,2006-06-30,78034.0,PATTERSON COMPANIES INC,70339510,11.0,3.0,5990.0,PDCO,25880,1,2005-10-11,2018-03-16,0.019854
97932,2012-10-31,16600.0,HERSHEY CO,42786610,11.0,1.0,2066.0,HSY,5597,1,1957-03-01,2019-12-31,-0.028777
70840,2009-02-27,24766.0,NORTHROP GRUMMAN CORP,66680710,11.0,1.0,3812.0,NOC,7985,1,1985-06-13,2019-12-31,-0.215295
23639,2002-12-31,13119.0,MAYTAG CORP,57859210,11.0,1.0,3633.0,MYG,7139,1,1960-12-14,2006-03-31,-0.079457
112012,2014-09-30,28345.0,MURPHY OIL CORP,62671710,11.0,1.0,1382.0,MUR,7620,1,2005-08-15,2017-07-25,-0.089003
93624,2012-03-30,52329.0,JACOBS ENGINEERING GROUP INC,46981410,11.0,1.0,8711.0,JEC,6216,1,2007-10-26,2019-12-31,-0.040026
114295,2015-01-30,91461.0,WESTERN UNION CO,95980210,11.0,1.0,6099.0,WU,175263,1,2006-10-02,2019-12-31,-0.05081


In [15]:
### create a link table of PERMNO, CUSIP, GVKEY, IID
sp500_dates = sp500ccm[['start', 'ending', 'permno', 'ncusip', 'ticker', 'gvkey']].copy()

In [16]:
# Convert start and end columns to datetime
sp500_dates['start'] = pd.to_datetime(sp500_dates['start'])
sp500_dates['ending'] = pd.to_datetime(sp500_dates['ending'])

# Function to generate quarterly dates between start and end
def generate_quarterly_dates(start, end):
    # Create a date range from start to end with quarterly frequency
    return pd.date_range(start=start, end=end, freq='QE').to_list()

# Apply the function to each row
sp500_dates['quarters'] = sp500_dates.apply(lambda row: generate_quarterly_dates(row['start'], row['ending']), axis=1)

# Expand the quarters column into individual rows
sp500_dates_q = sp500_dates.explode('quarters').reset_index(drop=True)
print(sp500_dates_q)

  return pd.date_range(start=start, end=end, freq='Q').to_list()


              start     ending  permno    ncusip ticker   gvkey   quarters
0        1999-07-22 2023-12-29   77178  74752510   QCOM  024800 1999-09-30
1        1999-07-22 2023-12-29   77178  74752510   QCOM  024800 1999-12-31
2        1999-07-22 2023-12-29   77178  74752510   QCOM  024800 2000-03-31
3        1999-07-22 2023-12-29   77178  74752510   QCOM  024800 2000-06-30
4        1999-07-22 2023-12-29   77178  74752510   QCOM  024800 2000-09-30
...             ...        ...     ...       ...    ...     ...        ...
16683673 1999-10-13 2023-12-29   10138  74144T10   TROW  012138 2022-09-30
16683674 1999-10-13 2023-12-29   10138  74144T10   TROW  012138 2022-12-31
16683675 1999-10-13 2023-12-29   10138  74144T10   TROW  012138 2023-03-31
16683676 1999-10-13 2023-12-29   10138  74144T10   TROW  012138 2023-06-30
16683677 1999-10-13 2023-12-29   10138  74144T10   TROW  012138 2023-09-30

[16683678 rows x 7 columns]


In [17]:
# Create a new DataFrame with selected columns
sp500_link_table = sp500_dates_q[['quarters', 'permno', 'ncusip', 'ticker', 'gvkey']].copy()

# Optional: Reset index if you want a clean index for the new table
sp500_link_table.reset_index(drop=True, inplace=True)

# Display the new table
print(sp500_link_table)

           quarters  permno    ncusip ticker   gvkey
0        1999-09-30   77178  74752510   QCOM  024800
1        1999-12-31   77178  74752510   QCOM  024800
2        2000-03-31   77178  74752510   QCOM  024800
3        2000-06-30   77178  74752510   QCOM  024800
4        2000-09-30   77178  74752510   QCOM  024800
...             ...     ...       ...    ...     ...
16683673 2022-09-30   10138  74144T10   TROW  012138
16683674 2022-12-31   10138  74144T10   TROW  012138
16683675 2023-03-31   10138  74144T10   TROW  012138
16683676 2023-06-30   10138  74144T10   TROW  012138
16683677 2023-09-30   10138  74144T10   TROW  012138

[16683678 rows x 5 columns]


In [18]:
print(sp500_link_table.loc[sp500_link_table['permno'] == 77178])

           quarters  permno    ncusip ticker   gvkey
0        1999-09-30   77178  74752510   QCOM  024800
1        1999-12-31   77178  74752510   QCOM  024800
2        2000-03-31   77178  74752510   QCOM  024800
3        2000-06-30   77178  74752510   QCOM  024800
4        2000-09-30   77178  74752510   QCOM  024800
...             ...     ...       ...    ...     ...
16676165 2022-09-30   77178  74752510   QCOM  024800
16676166 2022-12-31   77178  74752510   QCOM  024800
16676167 2023-03-31   77178  74752510   QCOM  024800
16676168 2023-06-30   77178  74752510   QCOM  024800
16676169 2023-09-30   77178  74752510   QCOM  024800

[27936 rows x 5 columns]


In [19]:
file_path_1 = 'DS_data/sp500_link_table.csv' 
sp500_link_table.to_csv(file_path_1, index=False)

['acti', 'asia', 'asib', 'asic', 'asio', 'asix', 'bmdebt', 'bmheader', 'bmpaymts', 'bmquotes', 'bmyield', 'bndprt06', 'bndprt12', 'bxcalind', 'bxdlyind', 'bxmthind', 'bxquotes', 'bxyield', 'cap', 'ccm_lookup', 'ccm_qvards', 'ccmxpf_linktable', 'ccmxpf_lnkhist', 'ccmxpf_lnkrng', 'ccmxpf_lnkused', 'comphead', 'comphist', 'compmaster', 'contact_info', 'core', 'crsp_cik_map', 'crsp_daily_data', 'crsp_header', 'crsp_monthly_data', 'crsp_names', 'crsp_portno_map', 'crsp_ziman_daily_index', 'crsp_ziman_monthly_index', 'cs20yr', 'cs5yr', 'cs90d', 'cst_hist', 'daily_nav', 'daily_nav_ret', 'daily_returns', 'dividends', 'dport1', 'dport2', 'dport3', 'dport4', 'dport5', 'dport6', 'dport7', 'dport8', 'dport9', 'dsbc', 'dsbo', 'dse', 'dse62', 'dse62delist', 'dse62dist', 'dse62exchdates', 'dse62names', 'dse62nasdin', 'dse62shares', 'dseall', 'dseall62', 'dsedelist', 'dsedist', 'dseexchdates', 'dsenames', 'dsenasdin', 'dseshares', 'dsf', 'dsf62', 'dsf62_v2', 'dsf_v2', 'dsfhdr', 'dsfhdr62', 'dsi', 'dsi

In [5]:
df_1 = pd.read_csv('tnf_us_22q1.csv')

  df_1 = pd.read_csv('tnf_us_22q1.csv')


In [27]:
df_1

Unnamed: 0,fundno,fundname,mgrcoab,country,shares,stkname,ticker,prc,shrout1
0,73,BMO NORTH AMERICAN DIV F,_EJYTQ,UNITED STATES,31324.0,AGCO CORP,AGCO,146.03,75.0
1,73,BMO NORTH AMERICAN DIV F,_EJYTQ,UNITED STATES,157048.0,AES CORP,AES,25.73,667.0
2,73,BMO NORTH AMERICAN DIV F,_EJYTQ,UNITED STATES,11500.0,ABBOTT LABORATORIES,ABT,118.36,1751.0
3,73,BMO NORTH AMERICAN DIV F,_EJYTQ,UNITED STATES,7162.0,ACUITY BRANDS INC,AYI,189.30,35.0
4,73,BMO NORTH AMERICAN DIV F,_EJYTQ,UNITED STATES,3900.0,AIR PRODUCTS AND CHEMICALS I,APD,249.91,222.0
...,...,...,...,...,...,...,...,...,...
2071576,191191,PROSHARES METAVERSE ETF,,UNITED STATES,202.0,WILLIAMS-SONOMA INC,WSM,145.00,69.0
2071577,191191,PROSHARES METAVERSE ETF,,UNITED STATES,89768.0,WIMI HOLOGRAM CLOUD INC,WIMI,2.67,77.0
2071578,191191,PROSHARES METAVERSE ETF,,UNITED STATES,10061.0,ZYNGA INC,ZNGA,9.24,1137.0
2071579,191191,PROSHARES METAVERSE ETF,,UNITED STATES,911.0,AMBARELLA INC,,104.92,38.0


In [47]:
Russell_1000 = df_1[df_1['fundname'].str.contains('1000', case=False, na=False)].drop_duplicates(subset='fundname')

In [48]:
Russell_1000

Unnamed: 0,fundno,fundname,mgrcoab,country,shares,stkname,ticker,prc,shrout1
589714,52100,SCHWAB 1000 INDEX (R) FU,SCHWAB,UNITED STATES,48281.0,ADT INC,ADT,7.59,851.0
630301,56763,ISHARES RUSSELL1000VAL I,_EJZOE,UNITED STATES,439417.0,ADT INC,ADT,7.59,851.0
637524,56804,ISHARES RUSSELL 1000 IND,_EJZOE,UNITED STATES,102181.0,ADT INC,ADT,7.59,851.0
638547,56805,ISHARES RUSSELL1000 GR I,_EJZOE,UNITED STATES,20034.0,AGCO CORP,AGCO,146.03,75.0
1089658,95587,SPDR RUSSELL1000 YLD FOC,,UNITED STATES,18438.0,ADT INC,ADT,7.59,851.0
1224334,106050,POWERSHARES RUSSELL1000L,,UNITED STATES,1172.0,AGNC INVESTMENT CORP,AGNC,13.1,523.0
1267019,112988,SPDR RUSSELL1000MOMENTUM,,UNITED STATES,3207.0,ADT INC,ADT,7.59,851.0
1321790,117073,VANGUARD RUSSELL1000GR I,,UNITED STATES,2929.0,AGCO CORP,AGCO,146.03,75.0
1428279,128232,POWERSHARES RUSSELL1000E,,UNITED STATES,49942.0,ADT INC,ADT,7.59,851.0
1456177,132142,SPDR RUSSELL1000LOW VOLA,,UNITED STATES,27824.0,AFLAC INC,AFL,64.39,649.0


In [44]:
Russell_3000 = df_1[df_1['fundname'].str.contains('3000', case=False, na=False)].drop_duplicates(subset='fundname')
Russell_3000

Unnamed: 0,fundno,fundname,mgrcoab,country,shares,stkname,ticker,prc,shrout1
631152,56769,ISHARES RUSSELL 3000 IND,_EJZOE,UNITED STATES,10642.0,AAON INC,AAON,55.73,53.0
640859,56871,ISHARES RUSSELL3000 GR I,_EJZOE,UNITED STATES,32588.0,ASGN INC,ASGN,116.71,51.0
641330,56883,ISHARES RUSSELL3000VAL I,_EJZOE,UNITED STATES,360472.0,AFLAC INC,AFL,64.39,649.0
1843519,167165,VANGUARD RUSSELL 3000 IN,,UNITED STATES,2128.0,AAON INC,AAON,55.73,53.0


In [18]:
sp_500 = df_1[df_1['fundname'].str.contains('500', case=False, na=False)].drop_duplicates(subset='fundname')
sp_500

Unnamed: 0,fundno,fundname,mgrcoab,country,shares,stkname,ticker,prc,shrout1
150940,177427,CBOE VEST SP 500 DIV ARS,,UNITED STATES,20320.0,AFLAC INC,AFL,64.39,649.0
164421,7995,DWS EQUITY 500 INDEX POR,_FRFSP,UNITED STATES,29442.0,AFLAC INC,AFL,64.39,649.0
179491,81045,DIREXION MTHLY S&P500BEA,_EKGDK,UNITED STATES,2392275.0,GOLDMAN SACHS FOCUSED INTERN,FTIX,1.00,
179492,81046,DIREXION MTHLY S&P500BUL,_EKGDK,UNITED STATES,11780000.0,FIDELITY COLCHESTER STREET T,FRGX,1.00,
209564,177426,STATE STREET SP 500 INDE,,UNITED STATES,25474.0,AFLAC INC,AFL,64.39,649.0
...,...,...,...,...,...,...,...,...,...
2062299,190540,DIREXION DAILY SP 500 EQ,,UNITED STATES,1268157.0,DREYFUS GOVERNMENT CASH MANA,DGCX,1.00,
2062702,190562,MLLMN 6MO BFFRD SP500 W,,UNITED STATES,1049.0,ISHARES 03 MONTH TREASURY BO,SGOV,100.05,20.0
2062752,190574,MLLMN 1YR BFRD SP500 NAS,,UNITED STATES,1000.0,ISHARES 03 MONTH TREASURY BO,SGOV,100.05,20.0
2064662,190721,MLLMN 6YR PRDOWN SP500 W,,UNITED STATES,11570.0,INVESCO BULLETSHARES 2029 CO,BSCT,19.45,7.0


In [32]:
# str.contains('index', case=False, na=False) 
# when no (case=False), the letter case is sensitive
IND = df_1[df_1['fundname'].str.endswith(' IND')].drop_duplicates(subset='fundname')
IND

Unnamed: 0,fundno,fundname,mgrcoab,country,shares,stkname,ticker,prc,shrout1
204844,164735,PRAXIS INTERNATIONAL IND,,UNITED STATES,33452.0,ABB LTD,ABB,32.34,1929.0
328258,5677,ISHARES RUSSELL 2000 IND,_EJZOE,UNITED STATES,944645.0,AAON INC,AAON,55.73,53.0
330270,5705,ISHARES DOW JONES US IND,_EJZOE,UNITED STATES,40560.0,ADT INC,ADT,7.59,851.0
360270,9753,FIDELITY GBL EX U.S. IND,_FTNOW,UNITED STATES,560908.0,AGNICO EAGLE MINES LTD,AEM/,61.2,456.0
363339,9821,VANGUARD GBL SML-CAP IND,VGUARD,UNITED STATES,18975.0,AAON INC,AAON,55.73,53.0
373139,12485,ISHARES MSCI AUSTRLA IND,_EJZOE,UNITED STATES,880000.0,BLACKROCK CASH FUNDS TREASUR,,1.0,
373201,12490,VANGUARD PACIFIC STK IND,VGUARD,UNITED STATES,17210.0,PROCREA HOLDINGS INC,7384,,29.0
377178,12495,ISHARES MSCI GERMANY IND,_EJZOE,UNITED STATES,890000.0,BLACKROCK CASH FUNDS TREASUR,,1.0,
388447,15469,FIDELITY MSCI ENERGY IND,,UNITED STATES,72133.0,ALTO INGREDIENTS INC,ALTO,6.82,74.0
429167,28948,METLIFE RUSSELL 2000 IND,,UNITED STATES,14309.0,AAON INC,AAON,55.73,53.0


In [33]:
ind_pattern = 'INDEX|1000|2000|3000|500' 
IND = df_1[
    df_1['fundname'].str.endswith('IND') | 
    df_1['fundname'].str.contains(ind_pattern)
].drop_duplicates(subset='fundname')

print(IND)

         fundno                  fundname mgrcoab        country      shares  \
150940   177427  CBOE VEST SP 500 DIV ARS     NaN  UNITED STATES     20320.0   
164421     7995  DWS EQUITY 500 INDEX POR  _FRFSP  UNITED STATES     29442.0   
174129    62943  MMA PRAXIS VALUE INDEX F  MMACAP  UNITED STATES     38140.0   
179491    81045  DIREXION MTHLY S&P500BEA  _EKGDK  UNITED STATES   2392275.0   
179492    81046  DIREXION MTHLY S&P500BUL  _EKGDK  UNITED STATES  11780000.0   
...         ...                       ...     ...            ...         ...   
2062299  190540  DIREXION DAILY SP 500 EQ     NaN  UNITED STATES   1268157.0   
2062702  190562   MLLMN 6MO BFFRD SP500 W     NaN  UNITED STATES      1049.0   
2062752  190574  MLLMN 1YR BFRD SP500 NAS     NaN  UNITED STATES      1000.0   
2064662  190721  MLLMN 6YR PRDOWN SP500 W     NaN  UNITED STATES     11570.0   
2066272  190996  MLLMN 6YR BFRD SP500 W P     NaN  UNITED STATES     11570.0   

                              stkname t