In [3]:
#!pip install quandl
%reset -f
import os
import sys


PROJ_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))

sys.path.append(PROJ_PATH) 

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import itertools
import random

import datetime as DT
from datetime import date
from datetime import datetime, timedelta
#import yfinance as yf

import time
from multiprocessing import Pool as ProcessPool 
from multiprocessing.dummy import Pool as ThreadPool

import requests
from io import StringIO
import helper_functions as hfs


from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry 

import warnings
warnings.simplefilter("ignore", UserWarning)
warnings.filterwarnings("ignore")

pd.set_option("display.max_columns", None)  # to see all columns of dataframe
pd.set_option("display.max_rows", None)  # to see all rows of dataframe
pd.set_option("display.max_colwidth", None)  # to see the full (un-truncated) dataframe field/cell value 

In [4]:
import configparser

env_config = configparser.ConfigParser()
env_config.read(f'{PROJ_PATH}/kye/env_config.ini')

# <<<<<<<<<<<<<<< >>>>>>>>>>>>>>>>>>>>>>> #
SPLIT_DATE = env_config['ENV_VARS']['SPLIT_DATE']
config = {}
config['api_key'] = env_config['KEYS']['DATA_KEY']
ML_DATA_FOLDER_PATH = env_config['LOCAL_PATHS']['ML_DATA_FOLDER_PATH']
PROCESSING_TEMP_FOLDER_PATH = env_config['LOCAL_PATHS']['PROCESSING_TEMP_FOLDER_PATH'] 

#@@@@@@ **********************<<<<<<<<<<<<#

print("SPLIT_DATE: ", SPLIT_DATE)
today = str(date.today())
FROM_DATE = pd.to_datetime(date.today(), format='%Y-%m-%d') - DT.timedelta(days=((365*4) + 100)) # "2016-01-01"
to_date = str(date.today())
print(FROM_DATE)

SPLIT_DATE:  2023-08-09
2019-05-02 00:00:00


# Fundamentals

In [5]:
def get_stock_fundamentals(symbol="AAPL", api_token = config['api_key']):

    url = "https://eodhistoricaldata.com/api/fundamentals/" + symbol + ".US?api_token=" + config['api_key']

    #while int(r.headers['X-RateLimit-Limit']) < 1
    r = hfs.requests_retry_session().get(url)
    
    c = 0
    while r.status_code != requests.codes.ok:

        if r.status_code == 429: 
            #print("waiting 5 sec for ", symbol)
            if c == 5:
                print("c == 5 breaking", symbol)
                break
            time.sleep(random.randint(4, 8))
            r = hfs.requests_retry_session().get(url)
            c+=1

        else:
            print(symbol, " breaking : " + str(r.status_code) + " <> status code") 
            continue
            #break
            #return pd.DataFrame([{'Ticker' : symbol,  'Open' : r.reason,  'Date' : r.status_code,  'Close': url}]).reset_index(drop=True)
            print("******* raising Exception *****",type(r.status_code), r.status_code)
            raise Exception(symbol, r.status_code, r.reason, url) 

    else:

        fundamentals_dict = r.json() 

        key_fundamentals = ['General', 'Highlights', 'Valuation', 'SharesStats', 
                            'Technicals', 'AnalystRatings', 'ESGScores'] 

        res = []
        #print("now: ", symbol)
        [res.append(pd.json_normalize(fundamentals_dict[x]).add_prefix(x + "__"))  \
         for x in key_fundamentals if x in fundamentals_dict.keys()]
        
        try:
            stock_key_current_fundamentals = pd.DataFrame()
            stock_key_current_fundamentals = pd.concat(res, axis=1)
            stock_key_current_fundamentals['Ticker'] = symbol
            
        except:
            pass

        return stock_key_current_fundamentals

### Getting Fundamental Data

In [6]:
sample_stocks_filtered = pd.read_pickle(f'{PROJ_PATH}/{ML_DATA_FOLDER_PATH}all_stocks_ohlcv.pkl')
print(sample_stocks_filtered.shape)
sample_stocks_filtered.head(2) 

(755570, 10)


Unnamed: 0,company_Name,Ticker,Date,Adjusted_Open,Adjusted_High,Adjusted_Low,Adjusted_close,Adjusted_Volume,Exchange,close_to_adj_close_ratio
0,Alcoa Corp,AA,2021-05-03,36.548,37.408,35.437,36.971,4738228.467,NYSE,1.017
1,Alcoa Corp,AA,2021-05-04,37.374,38.603,37.01,38.406,6820820.783,NYSE,1.017


In [7]:
#eligible_tickers = pd.read_csv("./input_data/ML_training_latest_tickers.csv") 
SP_tickers = [x for x in sample_stocks_filtered['Ticker'].unique()]
print("eligible_tickers: ", len(SP_tickers))

#tickers = list(set([x for x in sample_stocks_filtered['Ticker'].unique()] + [x for x in Penny_Tickers_with_eligible_lifetime['Ticker'].unique()])) #stocks_NYSE_NASDAQ['Code'].unique()
tickers = list(set([x for x in sample_stocks_filtered['Ticker'].unique()])) #stocks_NYSE_NASDAQ['Code'].unique()
print(tickers[0:5])
args = ((symbol,      
        config['api_key']) for symbol in tickers )

stocks_current_fundamentals = hfs.get_data_from_API(tickers = tickers,
                              func_to_run = get_stock_fundamentals,
                              func_args = args,
                              return_dfs = 1)

print("YAY got result stocks_current_fundamentals: ", stocks_current_fundamentals.shape)

key_fundamental_cols = {"General" : ['Exchange','FiscalYearEnd','InternationalDomestic','Sector','Industry','GicSector',
                                   'GicGroup','GicIndustry','GicSubIndustry','HomeCategory','AddressData.Country', 'FullTimeEmployees'],

                    "Highlights" : ['MarketCapitalization', 'MarketCapitalizationMln', 'EBITDA', 'PERatio',
                               'PEGRatio', 'WallStreetTargetPrice', 'BookValue', 'DividendShare',
                               'DividendYield', 'EarningsShare', 'EPSEstimateCurrentYear',
                               'EPSEstimateNextYear', 'EPSEstimateNextQuarter',
                               'EPSEstimateCurrentQuarter', 'MostRecentQuarter', 'ProfitMargin',
                               'OperatingMarginTTM', 'ReturnOnAssetsTTM', 'ReturnOnEquityTTM',
                               'RevenueTTM', 'RevenuePerShareTTM', 'QuarterlyRevenueGrowthYOY',
                               'GrossProfitTTM', 'DilutedEpsTTM', 'QuarterlyEarningsGrowthYOY'],

                    "Valuation" : ['TrailingPE', 'ForwardPE', 'PriceSalesTTM', 'PriceBookMRQ',
                       'EnterpriseValueRevenue', 'EnterpriseValueEbitda'],

                    "SharesStats" : ['SharesOutstanding', 'SharesFloat', 'PercentInsiders',
                               'PercentInstitutions', 'SharesShort', 'SharesShortPriorMonth',
                               'ShortRatio', 'ShortPercentOutstanding', 'ShortPercentFloat'],

                    "Technicals" : ['Beta', '52WeekHigh', '52WeekLow', '50DayMA', '200DayMA', 'SharesShort',
                       'SharesShortPriorMonth', 'ShortRatio', 'ShortPercent'],

                    "AnalystRatings" : ['Rating', 'TargetPrice', 'StrongBuy', 'Buy', 'Hold', 'Sell', 'StrongSell'],

                    "ESGScores" : ['RatingDate', 'TotalEsg', 'TotalEsgPercentile']
                    } 

key_fundamental_cols = ['Ticker'] +  [ k + "__" + v for k in key_fundamental_cols.keys() for v in key_fundamental_cols[k]]
key_fundamental_cols = [x for x in key_fundamental_cols if x in stocks_current_fundamentals.columns]

stocks_current_fundamentals = stocks_current_fundamentals[key_fundamental_cols]

print("unique #tickers stocks_current_fundamentals: ", stocks_current_fundamentals['Ticker'].nunique()) 

stocks_current_fundamentals.head(2) 

eligible_tickers:  1340
['WMB', 'FUTU', 'DRUG', 'KZR', 'GFS']
chunk  1 :  995
chunk  2 :  345
total time:  61.413398027420044
returning df
YAY got result stocks_current_fundamentals:  (1340, 214)
unique #tickers stocks_current_fundamentals:  1340


Unnamed: 0,Ticker,General__Exchange,General__FiscalYearEnd,General__InternationalDomestic,General__Sector,General__Industry,General__GicSector,General__GicGroup,General__GicIndustry,General__GicSubIndustry,General__HomeCategory,General__AddressData.Country,General__FullTimeEmployees,Highlights__MarketCapitalization,Highlights__MarketCapitalizationMln,Highlights__EBITDA,Highlights__PERatio,Highlights__PEGRatio,Highlights__WallStreetTargetPrice,Highlights__BookValue,Highlights__DividendShare,Highlights__DividendYield,Highlights__EarningsShare,Highlights__EPSEstimateCurrentYear,Highlights__EPSEstimateNextYear,Highlights__EPSEstimateNextQuarter,Highlights__EPSEstimateCurrentQuarter,Highlights__MostRecentQuarter,Highlights__ProfitMargin,Highlights__OperatingMarginTTM,Highlights__ReturnOnAssetsTTM,Highlights__ReturnOnEquityTTM,Highlights__RevenueTTM,Highlights__RevenuePerShareTTM,Highlights__QuarterlyRevenueGrowthYOY,Highlights__GrossProfitTTM,Highlights__DilutedEpsTTM,Highlights__QuarterlyEarningsGrowthYOY,Valuation__TrailingPE,Valuation__ForwardPE,Valuation__PriceSalesTTM,Valuation__PriceBookMRQ,Valuation__EnterpriseValueRevenue,Valuation__EnterpriseValueEbitda,SharesStats__SharesOutstanding,SharesStats__SharesFloat,SharesStats__PercentInsiders,SharesStats__PercentInstitutions,SharesStats__SharesShort,SharesStats__SharesShortPriorMonth,SharesStats__ShortRatio,SharesStats__ShortPercentOutstanding,SharesStats__ShortPercentFloat,Technicals__Beta,Technicals__52WeekHigh,Technicals__52WeekLow,Technicals__50DayMA,Technicals__200DayMA,Technicals__SharesShort,Technicals__SharesShortPriorMonth,Technicals__ShortRatio,Technicals__ShortPercent,AnalystRatings__Rating,AnalystRatings__TargetPrice,AnalystRatings__StrongBuy,AnalystRatings__Buy,AnalystRatings__Hold,AnalystRatings__Sell,AnalystRatings__StrongSell,ESGScores__RatingDate,ESGScores__TotalEsg,ESGScores__TotalEsgPercentile
0,WMB,NYSE,December,Domestic,Energy,Oil & Gas Midstream,Energy,Energy,"Oil, Gas & Consumable Fuels",Oil & Gas Storage & Transportation,Domestic,United States,5043,42429558784,42429.5588,6157000192.0,16.4292,1.5454,36.96,9.568,1.745,0.0521,2.12,1.89,1.81,0.4,0.38,2023-06-30,0.2465,0.3834,0.0535,0.2037,10773999616,8.84,-0.155,6078000000,2.12,0.151,16.4292,18.8324,3.5522,3.6408,5.6381,11.5127,1218189952,1211263836,0.356,87.527,,,,,,1.203,35.445,27.1324,32.1986,31.5651,30700569,21727372,3.53,0.0282,3.8182,36.84,6.0,6.0,10.0,0.0,0.0,2019-01-01,28.0,39.58
1,FUTU,NASDAQ,December,,Financial Services,Capital Markets,Financials,Diversified Financials,Capital Markets,Investment Banking & Brokerage,ADR,Hong Kong,2784,7579170816,7579.1708,,15.3768,0.0,57.6,159.107,0.0,0.0,3.53,3.65,4.0,0.82,0.82,2023-03-31,0.4385,0.5327,0.0335,0.1667,8088757760,57.577,0.479,6617895000,3.53,1.208,15.3768,21.0084,1.4567,3.9834,0.0,0.0,139631008,56984457,16.407,39.594,,,,,,0.8429,72.2,28.0,44.9241,47.7045,8481867,8594350,5.11,0.0866,4.0,56.55,10.0,1.0,3.0,2.0,1.0,,,


### Saving Fundamental data 

In [9]:
print(stocks_current_fundamentals.shape) 

key_current_funda_for_super_per = ['Ticker','General__AddressData.Country', 'General__FiscalYearEnd', 'Highlights__MostRecentQuarter', 'General__Sector', 'General__GicIndustry', 'General__GicSubIndustry',
'General__FullTimeEmployees', 'Highlights__MarketCapitalization', 'Highlights__PERatio', 'Highlights__EarningsShare',  'Highlights__PEGRatio', 
'Highlights__EPSEstimateCurrentQuarter',   'Highlights__ReturnOnEquityTTM',  'Highlights__QuarterlyEarningsGrowthYOY', 
'Valuation__TrailingPE', 'Valuation__ForwardPE', 'Valuation__PriceBookMRQ', 'SharesStats__PercentInsiders', 'SharesStats__PercentInstitutions', 
'SharesStats__ShortRatio', 'SharesStats__SharesFloat','SharesStats__ShortPercentFloat', 'Technicals__Beta','AnalystRatings__Rating', 
'Highlights__WallStreetTargetPrice', 'AnalystRatings__TargetPrice', 'AnalystRatings__StrongBuy', 'AnalystRatings__StrongSell'] 

num_tickers = str(stocks_current_fundamentals['Ticker'].nunique())

stocks_current_fundamentals = stocks_current_fundamentals[['Ticker', 'General__AddressData.Country', 'General__Sector', 'General__GicIndustry', 
        'Highlights__EPSEstimateCurrentQuarter', 'Highlights__ReturnOnEquityTTM',
        'Valuation__PriceBookMRQ', 'SharesStats__PercentInsiders',
        'SharesStats__PercentInstitutions', 'SharesStats__SharesFloat']]


stocks_current_fundamentals['SharesStats__PercentRetailers'] = 100 - stocks_current_fundamentals['SharesStats__PercentInsiders'] - stocks_current_fundamentals['SharesStats__PercentInstitutions'] 
stocks_current_fundamentals['SharesStats__SharesFloat'] = stocks_current_fundamentals['SharesStats__SharesFloat'] / 1000000

stocks_current_fundamentals["General__Sector"] = np.where(pd.isnull(stocks_current_fundamentals["General__Sector"]),
                                                               "Missing",
                                                               stocks_current_fundamentals["General__Sector"]
                                                              ) 

stocks_current_fundamentals["General__GicIndustry"] = np.where(pd.isnull(stocks_current_fundamentals["General__GicIndustry"]),
                                                               stocks_current_fundamentals["General__Sector"],
                                                               stocks_current_fundamentals["General__GicIndustry"]
                                                              ) 
del stocks_current_fundamentals['General__GicIndustry']

stocks_current_fundamentals['General__AddressData.Country'] = np.where( stocks_current_fundamentals['General__AddressData.Country'].isin(
        ['United States', 'China',  'Hong Kong', 'Taiwan', 'Israel', 'Cayman Islands', 'Monaco' ]),
                                                                        stocks_current_fundamentals['General__AddressData.Country'],
                                                                        'Other' 
                                                                      )

stocks_current_fundamentals.to_pickle(f'{PROJ_PATH}/{ML_DATA_FOLDER_PATH}/stocks_current_fundamentals_{num_tickers}_{str(today)}.pkl')


(1340, 72)


In [11]:
%reset -f 
