# Overview
This file reads in a list of tickers, gets the correct Eikon name of the ticker, reads in fundamental quarterly data for the list of tickers, and does some intial clean up of the data read in. All data is then stored in csvs.

In [3]:
import eikon as ek
import pandas as pd
import numpy as np
import datetime as dt
import bs4 as bs
import requests
import urllib2

ek.set_app_id('DeNovoQuantFund')

## Stock List
Different functions for pulling stock tickers from different sources.

In [None]:
# List of S&P 500 tickers

def save_sp500_tickers():
    resp = requests.get('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    soup = bs.BeautifulSoup(resp.text, 'lxml')
    table = soup.find('table', {'class':'wikitable sortable'})
    tickers = []
    for row in table.findAll('tr')[1:]:
        ticker = row.findAll('td')[0].text
        tickers.append(ticker)
        
    
    return tickers

ticker_list = save_sp500_tickers()

In [None]:
ticker_list = pd.read_csv('/Users/paindox/Documents/Udemy Apps/DeNovo Quant Fund Stuff/Eikon_Datasets/NYSE_Ticker_Symbols.csv')

In [9]:
# Get tickers from NASDAQ text file.
NASDAQ_tickers = open("/Users/paindox/Documents/Udemy Apps/DeNovo Quant Fund Stuff/Eikon_Datasets/nasdaqlisted.txt","r")
lines = NASDAQ_tickers.readlines()
ticker_list = []
N = len(lines)
for i, line in enumerate(lines):
    if i > 0 and i < N:
        ticker_list.append(line.split("|")[0])
NASDAQ_tickers.close()

ticker_list = ticker_list[:-1]

In [None]:
# Only run for certain files.
ticker_list = np.array(ticker_list['Symbol'],dtype=np.string0)
ticker_list = [tic for tic in ticker_list]

3483

## Getting relevant fields

List of fields to add:
Fundamental Data
Numerical Data
[
ek.TR_Field('TR.DilutedEpsInclExtra'),
ek.TR_Field('TR.RevenuePerShare'),
ek.TR_Field('TR.BookValuePerShare'),
ek.TR_Field('TR.DPSMean'),
ek.TR_Field('TR.EBITMarginPercent'),
ek.TR_Field('TR.TotalLongTermDebt'),
ek.TR_Field('TR.EBIT'),
ek.TR_Field('TR.PriceClose'),
ek.TR_Field('TR.PriceClose.date')
ek.TR_Field('TR.PE.date'),
ek.TR_Field('TR.DPSActValue'),
ek.TR_Field('TR.DilutedEpsExclExtra'),
ek.TR_Field('TR.TtlDebtToTtlEquityPct'), 
ek.TR_Field('TR.EBITMarginPercent'),
ek.TR_Field('TR.ROATotalAssetsPercent'),
ek.TR_Field('TR.TotalDebtToEV'),
ek.TR_Field('TR.TotalDebtToEBITDA')
ek.TR_Field('TR.OperatingIncome'),
ek.TR_Field('TR.Revenue'),
ek.TR_Field('TR.NormIncAvailToCommon'),
ek.TR_Field('TR.GrossDividendsCmnStock'),
ek.TR_Field('TR.TotalCurrentAssets'),
ek.TR_Field('TR.TotalCurrLiabilities'),
ek.TR_Field('TR.TotalAssetsReported'),
ek.TR_Field('TR.TotalLiabilities'),
ek.TR_Field('TR.TotalLongTermDebt'),
ek.TR_Field('TR.TotalEquity'),
ek.TR_Field('TR.EBIT'),
ek.TR_Field('TR.NetIncomeBeforeTaxes'),
ek.TR_Field('TR.OperatingIncome'),
ek.TR_Field('TR.OperatingExpense'),
ek.TR_Field('TR.CostOfRevenueTotal'),
ek.TR_Field('TR.GrossMargin'),
ek.TR_Field('TR.OperatingMarginPercent'),
ek.TR_Field('TR.TotalDebtOutstanding'),
ek.TR_Field('TR.TotalInventory'),
ek.TR_Field('TR.CashAndSTInvestments'),
ek.TR_Field('TR.TotalReceivablesNet'),
ek.TR_Field('TR.TRBCEconomicSector'),
ek.TR_Field('TR.PropertyPlantEquipmentTotalNet'),
ek.TR_Field('TR.GoodwillNet'),
ek.TR_Field('TR.TangibleBVPS'),
ek.TR_Field('TR.PriceTargetMean'),
ek.TR_Field('TR.NumberOfAnalysts'),
ek.TR_Field('TR.QuickRatio'),
ek.TR_Field('TR.CurrentRatio')
]

Categorical Data
[ek.TR_Field('TR.TRBCEconomicSector'),
ek.TR_Field('TR.CommonName'),
ek.TR_Field('TR.HeadquartersCountry'),
ek.TR_Field('TR.ExchangeName')
]

Technical Daily Data
[
ek.TR_Field('TR.EVToSales'),
ek.TR_Field('TR.PE'),
ek.TR_Field('TR.PriceToSalesPerShare'),
ek.TR_Field('TR.PriceToBVPerShare'),
ek.TR_Field('TR.PriceToCFPerShare'),
ek.TR_Field('TR.TotalDebtToEBITDA'),
ek.TR_Field('TR.TotalDebtToEV'),
ek.TR_Field('TR.PricePctChg1D'),
ek.TR_Field('TR.PricePctChg2D'),
ek.TR_Field('TR.PricePctChg5D'),
ek.TR_Field('TR.PricePctChg4W'),
ek.TR_Field('TR.PricePctChg4M'),
ek.TR_Field('TR.PricePctChg8M'),
ek.TR_Field('TR.PricePctChg11M'),
ek.TR_Field('TR.Volume'),
ek.TR_Field('TR.AvgDailyVolume120D'),
ek.TR_Field('TR.AvgDailyVolume250D'),
ek.TR_Field('TR.PriceAvg100D'),
ek.TR_Field('TR.PriceAvg250D'),

]


# **OLD** List of pertinant numerical fields
numerical_fields = [
ek.TR_Field('TR.DilutedEpsInclExtra'),
ek.TR_Field('TR.RevenuePerShare'),
ek.TR_Field('TR.BookValuePerShare'),
#ek.TR_Field('TR.BookValuePerShare.date'),
ek.TR_Field('TR.DPSMean'),
ek.TR_Field('TR.EBITMarginPercent'),
ek.TR_Field('TR.TotalLongTermDebt'),
ek.TR_Field('TR.EBIT'),
ek.TR_Field('TR.PriceClose'),
#ek.TR_Field('TR.PriceClose.date'),
ek.TR_Field('TR.DPSActValue'),
ek.TR_Field('TR.DilutedEpsExclExtra'),
ek.TR_Field('TR.TtlDebtToTtlEquityPct'), 
ek.TR_Field('TR.EBITMarginPercent'),
ek.TR_Field('TR.ROATotalAssetsPercent'),
ek.TR_Field('TR.OperatingIncome'),
ek.TR_Field('TR.TotalRevenue'),
ek.TR_Field('TR.NormIncAvailToCommon'),
ek.TR_Field('TR.GrossDividendsCmnStock'),
ek.TR_Field('TR.TotalCurrentAssets'),
ek.TR_Field('TR.TotalCurrLiabilities'),
ek.TR_Field('TR.TotalAssetsReported'),
ek.TR_Field('TR.TotalLiabilities'),
ek.TR_Field('TR.TotalLongTermDebt'),
ek.TR_Field('TR.TotalEquity'),
ek.TR_Field('TR.EBIT'),
ek.TR_Field('TR.NetIncomeBeforeTaxes'),
ek.TR_Field('TR.OperatingIncome'),
ek.TR_Field('TR.OperatingExpenses'),
ek.TR_Field('TR.CostOfRevenueTotal'),
ek.TR_Field('TR.GrossMargin'),
ek.TR_Field('TR.OperatingMarginPercent'),
ek.TR_Field('TR.TotalDebtOutstanding'),
ek.TR_Field('TR.TotalInventory'),
ek.TR_Field('TR.CashAndSTInvestments'),
ek.TR_Field('TR.TotalReceivablesNet'),
ek.TR_Field('TR.TRBCEconomicSector'),
ek.TR_Field('TR.PropertyPlantEquipmentTotalNet'),
ek.TR_Field('TR.GoodwillNet'),
ek.TR_Field('TR.TangibleBVPS'),
ek.TR_Field('TR.PriceTargetMean'),
ek.TR_Field('TR.NumberOfAnalysts'),
ek.TR_Field('TR.QuickRatio'),
ek.TR_Field('TR.CurrentRatio')  
]

numerical_fields.sort()

In [11]:
# List of pertinant numeric fields
numerical_fields = [
ek.TR_Field('TR.DilutedEpsInclExtra'),
ek.TR_Field('TR.RevenuePerShare'),
ek.TR_Field('TR.RevenuePerShare.date'),
ek.TR_Field('TR.BookValuePerShare'),
ek.TR_Field('TR.EBITMarginPercent'),
ek.TR_Field('TR.EBIT'),
ek.TR_Field('TR.DilutedEpsExclExtra'),
ek.TR_Field('TR.TtlDebtToTtlEquityPct'), 
ek.TR_Field('TR.ROATotalAssetsPercent'),
ek.TR_Field('TR.GrossMargin'),
ek.TR_Field('TR.TotalRevenue'),
ek.TR_Field('TR.NormIncAvailToCommon'),
ek.TR_Field('TR.GrossDividendsCmnStock'),
ek.TR_Field('TR.TotalCurrentAssets'),
ek.TR_Field('TR.TotalCurrLiabilities'),
ek.TR_Field('TR.TotalAssetsReported'),
ek.TR_Field('TR.TotalLiabilities'),
ek.TR_Field('TR.TotalLongTermDebt'),
ek.TR_Field('TR.TotalEquity'),
ek.TR_Field('TR.NetIncomeBeforeTaxes'),
ek.TR_Field('TR.OperatingIncome'),
ek.TR_Field('TR.OperatingExpenses'),
ek.TR_Field('TR.CostOfRevenueTotal'),
ek.TR_Field('TR.OperatingMarginPercent'),
ek.TR_Field('TR.TotalDebtOutstanding'),
ek.TR_Field('TR.TotalInventory'),
ek.TR_Field('TR.CashAndSTInvestments'),
ek.TR_Field('TR.TotalReceivablesNet'),
ek.TR_Field('TR.PropertyPlantEquipmentTotalNet'),
ek.TR_Field('TR.GoodwillNet'),
ek.TR_Field('TR.TangibleBVPS'),
ek.TR_Field('TR.QuickRatio'),
ek.TR_Field('TR.CurrentRatio'),
ek.TR_Field('TR.TtlCmnSharesOut')
]

numerical_fields.sort()

In [12]:
# Other numeric fields that have a different date key, and so are pulled separately.
# Leave this out for now.
numerical_fields2 = [
ek.TR_Field('TR.DPSActValue'),
ek.TR_Field('TR.PriceTargetMean'),
ek.TR_Field('TR.PriceTargetMean.date'),
ek.TR_Field('TR.NumberOfAnalysts'),
]

numerical_fields2.sort()

In [13]:
# List of pertinant categorical fields
categorical_fields = [ek.TR_Field('TR.TRBCEconomicSector'),
ek.TR_Field('TR.CommonName'),
ek.TR_Field('TR.HeadquartersCountry'),
ek.TR_Field('TR.ExchangeName')
]

categorical_fields.sort()

## Getting Eikon Tickers
Tickers read from files don't have the proper Eikon extensions. This function tries to find the appropriate Eikon extension for each ticker.

In [14]:
# Getting correct ticker name
def get_eikon_ticker_name(ticker,f = [ek.TR_Field('TR.RevenuePerShare')], end_date = '2019-01-01'):

    ticker = str(ticker)
    ticker1 = str(ticker)
    
    ticker = ticker1 + '.O'
    df_fundamental, err = ek.get_data([ticker], fields= f,parameters={'SDate':'2017-01-01', 'EDate': end_date,
                                                                      'Frq': 'D'})
    if err is None:
        return ticker

    ticker = ticker1 + '.OQ'
    df_fundamental, err = ek.get_data([ticker], fields= f,parameters={'SDate':'2017-01-01', 'EDate': end_date, 
                                                                      'Frq': 'D'})
    if err is None:
        return ticker
    
    ticker = ticker1 + '.TO'
    df_fundamental, err = ek.get_data([ticker], fields= f,parameters={'SDate':'2017-01-01', 'EDate': end_date, 
                                                                      'Frq': 'D'})
    if err is None:
        return ticker

    ticker = ticker1 + '.N'
    df_fundamental, err = ek.get_data([ticker], fields= f,parameters={'SDate':'2017-01-01', 'EDate': end_date, 
                                                                      'Frq': 'D'})
    if err is None:
        return ticker
    
    ticker = ticker1
    df_fundamental, err = ek.get_data([ticker], fields= f,parameters={'SDate':'2017-01-01', 'EDate': '2018-01-01'})

    if err is None:
        return ticker

    print('Ticker {} not found'.format(ticker1))
    return None

## Grab Fundamental Data

In [15]:
# Getting Data for Analysis

def get_fundamental_data(ticker, f,f1, start_date1, end_date1):
    
    try:
        # Grab numerical data for each quarter.
        df_fundamentals,err = ek.get_data([ticker], fields= f,
                                          parameters={'SDate': start_date1,
                                                      'EDate': end_date1,
                                                      'Period':'FQ0',
                                                      'Frq': 'FQ' })
        # Grab categorical data for each quarter.
        sector, err = ek.get_data([ticker], f1)
        
        df_fundamentals = df_fundamentals.merge(sector,how='left', on='Instrument')
        
        return(df_fundamentals)
    
    except:
        
        print('Error for {}'.format(ticker))
        return(0)

In [16]:
# Getting Start and End Dates

start_date = dt.datetime(1990,1,1)
end_date = dt.datetime.today()
start_date1 = start_date.strftime('%Y-%m-%d')
end_date1 = end_date.strftime('%Y-%m-%d')

## Creating fundamentals dataset

In [17]:
def write_fund_data(final_file, file_name):
    df_fundamentals = final_file
    df_fundamentals.sort_values(by= ['Instrument','Date'],inplace=True)
    
    # Handling missing dividend data
    df_fundamentals['Has Div'] = 0
    df_fundamentals.loc[df_fundamentals['Gross Dividends - Common Stock'].isnull() == False, 'Has Div'] = 1
    
    # Fill nas with zero for dividends
    df_fundamentals['Gross Dividends - Common Stock']= np.nan_to_num(df_fundamentals['Gross Dividends - Common Stock'])

    # Write data to csv.
    df_fundamentals.to_csv(file_name, index=False)
    print("File Printed")

In [18]:
# ticker_list = ['AAPL', 'TD', 'FB', 'GOOGL']

In [None]:
final_file = None
cur_file = None
write_file = False
file_full_path_name = '/Users/paindox/Documents/Udemy Apps/DeNovo Quant Fund Stuff/Eikon_Datasets/NASDAQ_2019_Fund_Data/NASDAQ_fund_data_{}.csv'
for i,tick in enumerate(ticker_list):
    
    # Write data to csv every 500 iterations.
    # Flag used in case on the 500th iteration the API call fails.
    # Want to make it so the current dataframe is written on the next successsful API call.
    if i > 0 and i % 500 == 0:
        write_file = True
    try:
        tick = get_eikon_ticker_name(tick, end_date = end_date1)
        if tick:
            cur_file = get_fundamental_data(tick, numerical_fields, categorical_fields,
                                            start_date1, end_date1)

        if (final_file is None and isinstance(cur_file, pd.DataFrame)):
            final_file = cur_file
        
        # Write to csv evey 500 tickers, if the final_file dataframe is populated.
        elif (write_file and isinstance(final_file, pd.DataFrame)):
            write_file = False
            file_name = file_full_path_name.format(i)
            write_fund_data(final_file, file_name)
            if isinstance(cur_file, pd.DataFrame):
                final_file = cur_file
            else:
                final_file = None

        elif (isinstance(cur_file, pd.DataFrame)):
            final_file = final_file.append(cur_file, ignore_index = True)

    except urllib2.HTTPError as err:
        print('HTTPError for tick {}'.format(tick))
        print('Error message: {}'.format(err))
    except:
        print('Some error occurred for tick {}.'.format(tick))
    
   
    print(i)

if isinstance(final_file, pd.DataFrame): 
    file_name = file_full_path_name.format(len(ticker_list))
    write_fund_data(final_file, file_name)
print('DONE!')

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
Ticker ACT not found
40
41
42
43
44
Ticker ACWI not found
45
Ticker ACWX not found
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
Ticker AGND not found
97
98
99
100
Ticker AGZD not found
101
102
Ticker AIA not found
103
104
105
106
107
Ticker AIQ not found
108
109
Ticker AIRR not found
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
Ticker ALTY not found
154
155
156
157
158
159
160
161
162
Ticker AMCA not found
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
2

In [None]:
df = get_fundamental_data('FB.O',numerical_fields2,categorical_fields,start_date1,end_date1)

In [None]:
df

In [None]:
df_fundamentals,err = ek.get_data(['AAPL.O'], fields= numerical_fields,parameters={'SDate': start_date1, 'EDate': end_date1,
                                                               'Period':'FQ0',
                                                               'Frq': 'FQ' })

In [None]:
sorted(df_fundamentals["Date"])

In [None]:
df_fundamentals.columns.sort_values()

In [None]:
numerical_fields1= [
ek.TR_Field('TR.PriceClose'),
ek.TR_Field('TR.PriceClose.date')    
]

In [None]:
df_fundamentals1,err = ek.get_data(['FB.O'], fields= numerical_fields1,parameters={'SDate': start_date1, 'EDate': end_date1,
                                                                                 'Period':'FQ0', 'Frq': 'FQ' })

In [None]:
df_fundamentals

In [None]:
numerical_fields2 = [
# Group 1
ek.TR_Field('TR.DilutedEpsInclExtra'),
ek.TR_Field('TR.RevenuePerShare'),
ek.TR_Field('TR.RevenuePerShare.date'),
ek.TR_Field('TR.BookValuePerShare'),
ek.TR_Field('TR.EBITMarginPercent'),
# ek.TR_Field('TR.TotalLongTermDebt'),
# ek.TR_Field('TR.EBIT'),
# ek.TR_Field('TR.DilutedEpsExclExtra'),
# ek.TR_Field('TR.TtlDebtToTtlEquityPct'), 
# ek.TR_Field('TR.EBITMarginPercent'),
# ek.TR_Field('TR.ROATotalAssetsPercent'),
# ek.TR_Field('TR.OperatingIncome'),
# ek.TR_Field('TR.GrossMargin'),
# ek.TR_Field('TR.TotalRevenue'),
# ek.TR_Field('TR.NormIncAvailToCommon'),
# ek.TR_Field('TR.GrossDividendsCmnStock'),
# ek.TR_Field('TR.TotalCurrentAssets'),
# ek.TR_Field('TR.TotalCurrLiabilities'),
# ek.TR_Field('TR.TotalAssetsReported'),
# ek.TR_Field('TR.TotalLiabilities'),
# ek.TR_Field('TR.TotalLongTermDebt'),
# ek.TR_Field('TR.TotalEquity'),
# ek.TR_Field('TR.EBIT'),
# ek.TR_Field('TR.NetIncomeBeforeTaxes'),
# ek.TR_Field('TR.OperatingIncome'),
# ek.TR_Field('TR.OperatingExpenses'),
# ek.TR_Field('TR.CostOfRevenueTotal'),
# ek.TR_Field('TR.OperatingMarginPercent'),
# ek.TR_Field('TR.TotalDebtOutstanding'),
ek.TR_Field('TR.TotalInventory'),
ek.TR_Field('TR.CashAndSTInvestments'),
# ek.TR_Field('TR.TotalReceivablesNet'),
# ek.TR_Field('TR.PropertyPlantEquipmentTotalNet'),
# ek.TR_Field('TR.GoodwillNet'),
# ek.TR_Field('TR.TangibleBVPS'),
# ek.TR_Field('TR.QuickRatio'),
# ek.TR_Field('TR.CurrentRatio'),
# ek.TR_Field('TR.TtlCmnSharesOut')
# End Group 1

# GROUP 2
#     ek.TR_Field('TR.DPSActValue'),
# ek.TR_Field('TR.DPSActValue.date'),
# ek.TR_Field('TR.PriceTargetMean'),
#     ek.TR_Field('TR.PriceTargetMean.date'),
# ek.TR_Field('TR.NumberOfAnalysts'),
#     ek.TR_Field('TR.NumberOfAnalysts.date')
]
df_fundamentals2,err = ek.get_data(['FB.O'], fields= numerical_fields2,parameters={'SDate': start_date1, 'EDate': end_date1,
                                                                                 'Period':'FQ0', 'Frq': 'FQ' })

In [None]:
df_fundamentals2

In [None]:
df_fundamentals2.loc[:,["Date", "Book Value Per Share", "EBIT", "Total Revenue"]]

## Cleaning up the dataset

In [None]:
# Already done above now.

### Handling all remaining NA data ###
# Leaving this for later cleanup.
# df_fundamentals.dropna(inplace= True)