# Setup

### Import Dependencies

In [1]:
import eikon as ek
import pandas as pd
from datetime import datetime
import os

# ek.set_app_key('HIDDEN')

### Get S&P 500 RICS

In [21]:
sp500 = ek.get_data('.SPX', ['TR.IndexConstituentRIC' , 'TR.IndexConstituentName'])

In [27]:
rics_sp500 = sp500[0]['Constituent RIC']

In [41]:
sp500[0].to_csv('sp500_rics.csv')

# Retrieve Data

### Load S&P500 RICS

In [2]:
# Read sp500 rics
sp500 = pd.read_csv('sp500_rics.csv')
rics_sp500 = sp500['Constituent RIC']
rics_sp500

0      POOL.OQ
1      CHRW.OQ
2        AJG.N
3        CNP.N
4       AMCR.N
        ...   
498     SCHW.N
499      DXC.N
500    SNPS.OQ
501        J.N
502      AVY.N
Name: Constituent RIC, Length: 503, dtype: object

In [4]:
tickers_sp500 = [ek.get_symbology(ric, to_symbol_type='ticker') for ric in rics_sp500]
tickers_sp500

[        ticker
 POOL.OQ   POOL,
         ticker
 CHRW.OQ   CHRW,
       ticker
 AJG.N    AJG,
       ticker
 CNP.N    CNP,
        ticker
 AMCR.N   AMCR,
      ticker
 WM.N     WM,
      ticker
 BA.N     BA,
        ticker
 FOX.OQ    FOX,
       ticker
 LIN.N    LIN,
      ticker
 WY.N     WY,
       ticker
 MCD.N    MCD,
      ticker
 HD.N     HD,
       ticker
 SPG.N    SPG,
         ticker
 HBAN.OQ   HBAN,
         ticker
 QRVO.OQ   QRVO,
       ticker
 LVS.N    LVS,
        ticker
 CTRA.N   CTRA,
       ticker
 ON.OQ     ON,
       ticker
 AIG.N    AIG,
       ticker
 AIZ.N    AIZ,
        ticker
 CARR.N   CARR,
         ticker
 EVRG.OQ   EVRG,
         ticker
 COST.OQ   COST,
       ticker
 MCO.N    MCO,
       ticker
 DIS.N    DIS,
         ticker
 PAYX.OQ   PAYX,
        ticker
 AMD.OQ    AMD,
        ticker
 REG.OQ    REG,
       ticker
 DHI.N    DHI,
       ticker
 TRV.N    TRV,
       ticker
 BIO.N    BIO,
       ticker
 MOS.N    MOS,
       ticker
 BWA.N    BWA,
        tic

In [26]:
tickers_sp500_v2 = [tickers_sp500[i]['ticker'][0] for i in range(len(tickers_sp500))]

In [28]:
pd.DataFrame(tickers_sp500_v2).to_csv('sp500_tickers.csv')

In [29]:
CUSIP_sp500 = [ek.get_symbology(ric, to_symbol_type='CUSIP') for ric in rics_sp500]

In [46]:
CUSIP_sp500_v2 = [CUSIP_sp500[i].iloc[0][0] for i in range(len(CUSIP_sp500))]

In [48]:
pd.DataFrame(CUSIP_sp500_v2).to_csv('sp500_CUSIP.csv')

In [49]:
tickers_sp500_v2

['POOL',
 'CHRW',
 'AJG',
 'CNP',
 'AMCR',
 'WM',
 'BA',
 'FOX',
 'LIN',
 'WY',
 'MCD',
 'HD',
 'SPG',
 'HBAN',
 'QRVO',
 'LVS',
 'CTRA',
 'ON',
 'AIG',
 'AIZ',
 'CARR',
 'EVRG',
 'COST',
 'MCO',
 'DIS',
 'PAYX',
 'AMD',
 'REG',
 'DHI',
 'TRV',
 'BIO',
 'MOS',
 'BWA',
 'WDC',
 'VTR',
 'IVZ',
 'ZBRA',
 'AMAT',
 'STX',
 'FDS',
 'SYK',
 'VRSN',
 'TECH',
 'NVR',
 'BBWI',
 'LOW',
 'BSX',
 'SYY',
 'FCX',
 'MAS',
 'BEN',
 'META',
 'EXPD',
 'PAYC',
 'EXPE',
 'RJF',
 'CL',
 'VICI',
 'CE',
 'UNH',
 'ARE',
 'GEN',
 'LLY',
 'CDAY',
 'GRMN',
 'PODD',
 'NWL',
 'ISRG',
 'AVB',
 'TPR',
 'CBRE',
 'CINF',
 'SEE',
 'WMB',
 'EA',
 'TMUS',
 'CB',
 'TXN',
 'MDLZ',
 'BRO',
 'BALL',
 'PFG',
 'KEYS',
 'SEDG',
 'PCG',
 'RL',
 'JPM',
 'TTWO',
 'CDW',
 'RMD',
 'LNT',
 'CMG',
 'PCAR',
 'GNRC',
 'CHTR',
 'PWR',
 'CCL',
 'PM',
 'COO',
 'SNA',
 'UAL',
 'ESS',
 'KHC',
 'JBHT',
 'CMI',
 'PNR',
 'CRL',
 'DPZ',
 'HPQ',
 'LW',
 'TGT',
 'NTAP',
 'OXY',
 'IR',
 'TER',
 'WAB',
 'NOW',
 'EXC',
 'BR',
 'EMN',
 'AZO',
 'TEL',
 

In [50]:
def save_strings_to_file(string_list, filename):
    with open(filename, 'w') as file:
        for string in string_list:
            file.write(string + '\n')

save_strings_to_file(tickers_sp500_v2, 'tickers.txt')

In [51]:
save_strings_to_file(CUSIP_sp500_v2, 'CUSIP.txt')

### Target: MarketCap

In [45]:
fields = ['TR.CompanyMarketCap.date', 'TR.CompanyMarketCap']

# Prepare a list to hold DataFrames
dfs = []

# Retrieve the data
for ric in rics_sp500:
    params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
    data, err = ek.get_data([ric], fields, params)
    data['RIC'] = ric
    dfs.append(data)

# Concatenate all the dataframes
marketcaps = pd.concat(dfs, ignore_index=True)

# Now, df is a pandas DataFrame that contains the requested data for the S&P500 companies
marketcaps.to_csv('marketcaps_Jan2000_May2023.csv')
marketcaps

Unnamed: 0,Instrument,Date,Company Market Cap,RIC
0,POOL.OQ,2000-01-31T00:00:00Z,286640145.927565,POOL.OQ
1,POOL.OQ,2000-02-29T00:00:00Z,279740295.325611,POOL.OQ
2,POOL.OQ,2000-03-31T00:00:00Z,345271397.253272,POOL.OQ
3,POOL.OQ,2000-04-28T00:00:00Z,415030312.208526,POOL.OQ
4,POOL.OQ,2000-05-31T00:00:00Z,381912443.492395,POOL.OQ
...,...,...,...,...
140835,AVY.N,2022-12-30T00:00:00Z,14655391534.0,AVY.N
140836,AVY.N,2023-01-31T00:00:00Z,15311477012.48,AVY.N
140837,AVY.N,2023-02-28T00:00:00Z,14777244155.25,AVY.N
140838,AVY.N,2023-03-31T00:00:00Z,14512828896.75,AVY.N


### Cashflow Statement

In [35]:
# operating CF

fields = ['TR.F.NETCASHFLOWOP.date', 'TR.F.NETCASHFLOWOP']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './cashflow/operatingCF_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


In [36]:
# investing CF

fields = ['TR.F.NETCASHFLOWINVST.date', 'TR.F.NETCASHFLOWINVST']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './cashflow/investingCF_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


In [37]:
# financing CF

fields = ['TR.F.NETCASHFLOWFIN.date', 'TR.F.NETCASHFLOWFIN']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './cashflow/financingCF_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


## Company Information

### TRBC industry classification

In [33]:
fields = ['TR.TRBCEconomicSector', 'TR.TRBCBusinessSector',
         'TR.TRBCIndustryGroup', 'TR.TRBCIndustry']
filename ='./comp_info/TRBC_industry_classification'


# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

    
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)

        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
        
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break


Success


### Employees

In [34]:
# Number of employees over time

fields = ['TR.CompanyNumEmployDate', 'TR.CompanyNumEmploy']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './comp_info/CompanyNumEmploy_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


### Free Float Percentage

In [40]:
# Number of employees over time

fields = ['TR.FreeFloatPct.date', 'TR.FreeFloatPct']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './comp_info/FreeFloatPct_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


In [39]:
# ESG score

fields = ['TR.TRESGScore.date', 'TR.TRESGScore']
params = {'SDate': '2000-01-01', 'EDate': '2023-05-01', 'FRQ': 'M'}
filename = './comp_info/ESGScore_Jan2000_May2023.csv'

# Check if the file already exists
if os.path.exists(filename):
    # If the file exists, read it to get the last processed RIC
    dataitems = pd.read_csv(filename)
    last_processed_ric = dataitems['RIC'].iloc[-1]
    start_index = rics_sp500.tolist().index(last_processed_ric) + 1
else:
    # If the file doesn't exist, start from the beginning
    start_index = 0

# Fetch the data, starting from the last processed RIC
for ric in rics_sp500[start_index:]:
    try:
        data, err = ek.get_data([ric], fields, params)
        data['RIC'] = ric

        # If file does not exist, write with header 
        if not os.path.isfile(filename):
            data.to_csv(filename, index=False)
        else: # else it exists so append without writing the header
            data.to_csv(filename, mode='a', header=False, index=False)
            
        if rics_sp500.tolist().index(ric) == len(rics_sp500) - 1:
            print("Success")
            
    except Exception as e:
        print(f"Error for RIC {ric} (index: {rics_sp500.tolist().index(ric)}): {e}")
        break

Success


In [41]:
df=ek.get_timeseries(
    ['USCPI=ECI'],
    start_date='2000-01-01', end_date='2023-05-01', interval='monthly')
df

USCPI=ECI,VALUE
Date,Unnamed: 1_level_1
2000-01-31,0.3
2000-02-29,0.4
2000-03-31,0.6
2000-04-30,-0.1
2000-05-31,0.2
...,...
2022-12-31,0.1
2023-01-31,0.5
2023-02-28,0.4
2023-03-31,0.1


In [42]:
df.iloc[5]

USCPI=ECI
VALUE    0.6
Name: 2000-06-30 00:00:00, dtype: Float64