# All API calls to Assemble Dataframe

In [1]:
import pandas as pd 
import numpy as np
import datetime
from datetime import date
import time
import requests
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
import os
import io
import pickle

In [2]:
pd.set_option('display.max_columns', None)

## Functions

In [3]:
def dates_to_str(last_date = date.today(), historical_days = 1450):
    first_date = last_date-datetime.timedelta(days=historical_days)
    return first_date.strftime('%Y-%m-%d'), last_date.strftime('%Y-%m-%d')

In [4]:
def fetch_stock(symbol, last_date = date.today(), historical_days = 1450):
    '''
    Get the trading information about a stock for a range of days in "historical_days" before the "last_date"
    The output is a DataFrame with columns "close","high","low","open","volume","splitFactor"
    The output are adjusted prices
    '''
    first_date, curr_date = dates_to_str(last_date, historical_days)
    

    #The request itself
    url = f'https://api.tiingo.com/tiingo/daily/{symbol}/prices?startDate={first_date}&endDate={curr_date} '
    headers = {
            'Content-Type': 'application/json',
            'Authorization' : f'Token {Tiingo_API}'
            }
    r = requests.get(url, headers=headers)
    response = r.json()
    response = pd.DataFrame(response, index=np.arange(0,len(response)))
    response.set_index(["date"], inplace = True)
    response.index = pd.to_datetime(response.index)
    response.drop(columns = ["close","high","low","open","volume","splitFactor"],axis = 1, inplace = True)
    response.rename(columns = {"adjClose":"close","adjHigh":"high","adjLow":"low","adjOpen":"open","adjVolume":"volume"}, inplace = True)
    return response

In [5]:
def fetch_fundamentals(symbol, last_date = date.today(), historical_days = 1450):
    '''
    The output is a dataframe with daily fundamentals:
    Market_Cap, Enterprise_Value, PE_Ratio, PB_Ratio, Trailing PEG
    '''
    #Get latest and historical day, month, year for API request
    first_date, curr_date = dates_to_str(last_date, historical_days)
    
    url = f'https://api.tiingo.com/tiingo/fundamentals/{symbol}/daily?token={Tiingo_API}?startDate={first_date}&endDate={curr_date}'
    headers = {
            'Content-Type': 'application/json',
            'Authorization' : f'Token {Tiingo_API}'
            }
    r = requests.get(url, headers=headers)
    response = r.json()
    response = pd.DataFrame(response, index=np.arange(0,len(response)))
    response.set_index(["date"], inplace = True)
    response.index = pd.to_datetime(response.index)
    return response

In [6]:
def fetch_statements(symbol, last_date = date.today(), historical_days = 1450):
    '''
    Gets historical financial data about the stock like Total Assets, Accounts Payable, Short & Long term Debts etc.
    '''
    #Get latest and historical day, month, year for API request
    hist_date_str, latest_date_str = dates_to_str(last_date, historical_days)
    
    url = f'https://api.tiingo.com/tiingo/fundamentals/{symbol}/statements?token={Tiingo_API}?startDate={hist_date_str}&endDate={latest_date_str}'
    headers = {
            'Content-Type': 'application/json',
            'Authorization' : f'Token {Tiingo_API}'
            }
    r = requests.get(url, headers=headers)
    response = r.json()
    final_df = pd.DataFrame()
    
    
    for i in range(len(response)):
        balanceSheet_df = pd.DataFrame(response[i]["statementData"]["balanceSheet"]).T
        overview_df = pd.DataFrame(response[i]["statementData"]["overview"]).T
        cashFlow_df = pd.DataFrame(response[i]["statementData"]["cashFlow"]).T
        incomeStatement_df = pd.DataFrame(response[i]["statementData"]["incomeStatement"]).T
        dfs = [balanceSheet_df, overview_df, cashFlow_df, incomeStatement_df]
        general_df = pd.concat(dfs, axis =1)
        
        #Make 1st row a header
        new_header = list(general_df.loc["dataCode"]) #grab the first row for the header
        general_df.drop("dataCode",axis = 0, inplace = True) #take the data less the header row
        general_df.columns = new_header
        
        general_df["date"] = response[i]["date"]
        general_df["quarter"] = response[i]["quarter"]
        general_df["year"] = response[i]["year"]
        print(f'Combining Statements for {response[i]["year"]} and {response[i]["quarter"]}')
        final_df = pd.concat([final_df, general_df], axis = 0)
    final_df.set_index(["date"], inplace = True)
    #converting date from str to datetime object
    final_df.index = pd.to_datetime(final_df.index, yearfirst = True, utc = True, origin = "unix")
    final_df_copy = final_df.copy()
    #Dealing with duplicated indexes of datetime
    if final_df.index.duplicated().sum()>0:
        duplicates_in_df = final_df.index.duplicated()
        final_df.dropna(axis = 0, inplace = True)
    final_df = final_df.apply(pd.to_numeric, errors='ignore')
    return final_df

In [7]:
def fetch_metadata(): 
    '''
    Get's metadata about all available stocks, like:
        Industry, Location, Full Name, Ticker, etc.
    '''
    url = f'https://api.tiingo.com/tiingo/fundamentals/meta?token={Tiingo_API}'
    headers = {
            'Content-Type': 'application/json',
            'Authorization' : f'Token {Tiingo_API}'
            }
    r = requests.get(url, headers=headers)
    response = r.json()
    response = pd.DataFrame(response)
    return response

In [8]:
def combine_tables(stock_df, statemets_data, fundamentals_data, dates_range, todays_date = date.today()):
    
    '''
    Combining into one Dataframe information about daily trading values, statements, fundamentals
    '''
    #We create an index of dates range
    dates = pd.date_range(todays_date-datetime.timedelta(days=dates_range),todays_date,freq='d')    
    #We change the name to make join with other dataframes available by the same index
    dates.rename('date', inplace = True)
    #Convert index to dataframe
    dates_df = pd.DataFrame(index = dates)
    #Delete timezone from the statements to make join available
    statemets_data.index = statemets_data.index.tz_convert(None)
    stock_df.index = stock_df.index.tz_convert(None)
    fundamentals_data.index = fundamentals_data.index.tz_convert(None)
    
    dates_and_statemts = dates_df.merge(statemets_data, on='date', how='left')
    #Statements table contains data about quarters only while dates DF is much bigger. We populate quarter results to days
    dates_and_statemts.ffill(axis = 0, inplace = True)
    stock_df = stock_df.merge(dates_and_statemts, on='date', how = 'left')
    stock_df = stock_df.merge(fundamentals_data, on='date', how = 'left')
    return stock_df

In [9]:
def search_symbol(symbol, driver):
    elem = driver.find_element(By.ID,"ticker")
    elem.send_keys(symbol)
    elem.send_keys(Keys.RETURN)
    driver.implicitly_wait(3)

In [10]:
def find_estim_tables(driver):
    '''
    get a table with earnings on a page  ZACK
    '''
    #Find required table
    elem = driver.find_element(By.XPATH,'//*[@id="earnings_announcements_earnings_table"]') 
    #get all rows from the table
    rows = [row.text.encode("utf8") for row in elem.find_elements(By.TAG_NAME,'tr')]
    #convert list of bytes to list of strings
    rows = [row.decode("utf-8").replace("\n"," ") for row in rows]
    #convert list of strings to a dataframe 
    earnings_history_df = pd.read_csv(io.StringIO('\n'.join(rows)), delim_whitespace=True, header = 0, names = ['date', 'Period_Ending', 'Estimate', 'Reported', 'Surprise', 'Surprise_%', "str1", "str2"])
    
    #Find the latest estimate 
    elem = driver.find_element(By.XPATH,'//*[@id="right_content"]/section[2]/div') 
    #get all rows from the table
    rows1 = [row.text.encode("utf8") for row in elem.find_elements(By.TAG_NAME,'td')]
    rows2 = [row.text.encode("utf8") for row in elem.find_elements(By.TAG_NAME,'th')]
    #convert list of bytes to list of strings
    rows1 = [row.decode("utf-8").replace("\n"," ") for row in rows1]
    rows2 = [row.decode("utf-8").replace("\n"," ") for row in rows2]    
    #convert list of strings to a dataframe 
    earnings_latest_df = pd.DataFrame(rows1).transpose()
    earnings_latest_df.columns =  ['Period_Ending', 'Estimate', 'Surprise_%']
    earnings_latest_df["date"] = rows2[-1].split(" ")[0]
    
    earnings = pd.concat([earnings_latest_df, earnings_history_df], ignore_index = True, sort = False)
    earnings['date'] = pd.to_datetime(earnings['date'])
    earnings.set_index(["date"], inplace = True)
    
    return earnings

In [11]:
def find_divid_tables(driver):
    '''
    get a table with dividends on a page  ZACK
    '''
    element = driver.find_element(By.XPATH,'//*[@id="earnings_announcements_tabs"]/ul')
    driver.execute_script('arguments[0].scrollIntoView({block: "center", inline: "center"})', element)
    
    elem = driver.find_element(By.XPATH,'//*[@id="ui-id-7"]')
    elem.click()
    time.sleep(3)
    #Expand 100 records
    dropdown = driver.find_element(By.NAME,"earnings_announcements_dividends_table_length")
    Select(dropdown).select_by_visible_text("100")
    
    #Find required table
    elem = driver.find_element(By.XPATH,'//*[@id="earnings_announcements_dividends_table"]') 
    #get all rows from the table
    rows = [row.text.encode("utf8") for row in elem.find_elements(By.TAG_NAME,'tr')]
    #convert list of bytes to list of strings
    rows = [row.decode("utf-8").replace("\n"," ") for row in rows]
    #convert list of strings to a dataframe 
    
    dividends_history_df = pd.read_csv(io.StringIO('\n'.join(rows)), delim_whitespace=True, header = 0, names = ['Date_Paid', 'Amount', 'Date_Announced', 'Ex-Dividend_Date'])
    dividends_history_df.dropna(axis = 0, inplace = True)
    dividends_history_df['Date_Paid'] = pd.to_datetime(dividends_history_df['Date_Paid'])
    dividends_history_df['Date_Announced'] = pd.to_datetime(dividends_history_df['Date_Announced'])
    dividends_history_df['Ex-Dividend_Date'] = pd.to_datetime(dividends_history_df['Ex-Dividend_Date'])
    dividends_history_df.rename(columns = {"Date_Announced":"date"}, inplace = True)
    dividends_history_df.set_index(["date"], inplace = True)
    
    return dividends_history_df

In [12]:
def get_earn_and_dividends(symbol):
    '''
    This function launches browser for data load and fetches earnings and dividends data
    '''
    #Start Chrome 
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    #Go to the website
    driver.get('https://www.zacks.com/stock/research/CSCO/earnings-calendar')
    
    #Search stock
    search_symbol(symbol, driver)
    
    #expand_100_earnings values:
    dropdown = driver.find_element(By.NAME,"earnings_announcements_earnings_table_length")
    Select(dropdown).select_by_visible_text("100")
    time.sleep(3)
    
    #Get earnings
    earnings = find_estim_tables(driver)
    time.sleep(3)
    
    #Ge dividends
    dividends = find_divid_tables(driver)
    
    #Close browser
    driver.close()
    
    #Transforming Earnings dataframe to a final version
    #Transform string values to numeric
    earnings.replace({"--":np.nan},inplace = True)
    earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
    earnings["surprise_%"] = earnings["Surprise_%"]/100
    earnings["date_of_report"] = earnings.index
    #getting expected future earnings change
    earnings["future_estimate"] = earnings.Estimate.shift(1)
    earnings["previous_surprise"] = earnings["surprise_%"].shift(-1)
    earnings["expected_growth"]= (earnings.future_estimate - earnings.Reported)/earnings.Reported
    earnings = earnings[["surprise_%", "expected_growth", "previous_surprise", "date_of_report"]]
    
    #Transforming Dividends dataframe to a final version
    #STR to value
    dividends.replace({"--":np.nan},inplace = True)
    dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))
    #get date that we later can use to count days after the announcement
    dividends["date_announced"] = dividends.index
    #Getting dividends trend
    dividends["previous_divid"] = dividends.Amount.shift(-1)
    dividends["dividends_change"] = (dividends.Amount - dividends.previous_divid)/dividends.previous_divid
    dividends = dividends[dividends.dividends_change != 0]
    dividends["prev_div_change"] = dividends.dividends_change.shift(-1)
    dividends = dividends[["dividends_change","prev_div_change","date_announced"]]
    
    #earnings = a.copy()
    #dividends = b.copy()
    
    #Match earnings with dates
    #Creating Dates dataframe with all possible dates values
    dates_df=pd.DataFrame()
    dates_df["date"] = pd.date_range(start=earnings.index.min(), end=earnings.index.max())
    #Set dates column as index
    dates_df.set_index(["date"], inplace = True)
    #Creating a dates_earnings dataset where we extrapolate existing quarterly data to daily
    dates_earnings = dates_df.copy()
    dates_earnings = dates_earnings.join(earnings, how = 'left')
    dates_earnings.sort_values(by = 'date', axis = 0, ascending = True, inplace = True)
    dates_earnings.ffill(axis = 0, inplace = True)
    dates_earnings.sort_values(by = 'date', axis = 0, ascending = False, inplace = True)
    dates_earnings["days_after_earn_report"] = dates_earnings.index - dates_earnings["date_of_report"] 
    dates_earnings['days_after_earn_report'] = pd.to_numeric(dates_earnings['days_after_earn_report'].dt.days, downcast='integer')
    dates_earnings.drop(["date_of_report"], axis = 1, inplace = True)
    
    #Match dividends with dates
    #Creating Dates dataframe with all possible dates values
    if dividends.empty:
        dates_dividends = pd.DataFrame(columns = ["days_after_divid_report", "dividends_change","prev_div_change"])
        dates_dividends.index.names = ['date']
    else:
        dates_df=pd.DataFrame()
        dates_df["date"] = pd.date_range(start=dividends.index.min(), end=date.today())
        #Set dates column as index
        dates_df.set_index(["date"], inplace = True)
        #Creating a dates_ividends dataset where we extrapolate existing quarterly data to daily
        dates_dividends = dates_df.copy()
        dates_dividends = dates_dividends.join(dividends, how = 'left')
        dates_dividends.sort_values(by = 'date', axis = 0, ascending = True, inplace = True)
        dates_dividends.ffill(axis = 0, inplace = True)
        dates_dividends.sort_values(by = 'date', axis = 0, ascending = False, inplace = True)
        dates_dividends["days_after_divid_report"] = dates_dividends.index - dates_dividends["date_announced"] 
        dates_dividends['days_after_divid_report'] = pd.to_numeric(dates_dividends['days_after_divid_report'].dt.days, downcast='integer')
        dates_dividends.drop(["date_announced"], axis = 1, inplace = True)
    
    return dates_earnings, dates_dividends

In [13]:
def jump_returns(dataset):
    dataset['15d_Return'] = -(dataset['close'] - dataset['close'].shift(-15)) / dataset['close']
    dataset['30d_Return'] = -(dataset['close'] - dataset['close'].shift(-30)) / dataset['close']
    dataset['60d_Return'] = -(dataset['close'] - dataset['close'].shift(-60)) / dataset['close']
    return dataset

# Assemble Complete Dataset

### Setting a few overall variables

In [100]:
#Enter TIINGO
print("Enter 40 signs tiingo API: ")  
Tiingo_API = input()
todays_date = date.today()
first = todays_date 
historical_dates_range = 1825
# ab72115a4a65c514653d132f6cddd6e1c3e0205d

Enter 40 signs tiingo API: 
ab72115a4a65c514653d132f6cddd6e1c3e0205d


In [101]:
# for add on
first = date.today()
historical_dates_range = (first - date(2023, 2, 27)).days

Of the ~25 input predictors, 3 are market condition features, which are scraped from the web only once, and not for each stock. The code blocks to pull these and write them to csv's are directly below, whereas the rest of the features are stock-specific, and are pulled after the list of functions.

## Scrape 10Y Bond Prices (Currently bugged, download manually)

In [None]:
last_date = date.today() 
historical_days = 1825
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get('https://finance.yahoo.com/quote/%5ETNX/history?period1=1492732800&period2=1650326400&interval=1d&filter=history&frequency=1d&includeAdjustedClose=true')

#remove old file with data if it's available
filePath = 'C:\\Users\\HP\\OneDrive\\Documents\\Self\\Projects\\Fundamental_Analysis\\TNX.csv'

# Check whether the  specified file is available and remove it  
if os.path.exists(filePath):
    os.remove(filePath)
    print("We remove old csv file with metadata and replace with the newer one")

#Calculating current and historical periods
todays_year = last_date.year
todays_month = last_date.month
todays_day = last_date.day
historical_date = last_date-datetime.timedelta(days=historical_days)
historical_year = historical_date.year
historical_month = historical_date.month
historical_day = historical_date.day

#Searching an input field for dates
elem = driver.find_element(By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/div[1]/div/div/div/span')
elem.click()
time.sleep(10)

#Enter historical date
elem = driver.find_element(By.XPATH,'//*[@id="dropdown-menu"]/div/div[1]/input')
elem.send_keys(historical_year)
elem.send_keys(Keys.TAB)
elem.send_keys(historical_month)
elem.send_keys(historical_day)
time.sleep(8)

#Enter last date
elem = driver.find_element(By.XPATH,'//*[@id="dropdown-menu"]/div/div[2]/input')
elem.send_keys(todays_year)
elem.send_keys(Keys.TAB)
elem.send_keys(todays_month)
elem.send_keys(todays_day)
time.sleep(8)

#Press done after entering dates
elem = driver.find_element(By.XPATH,'//*[@id="dropdown-menu"]/div/div[3]/button[1]')
elem.click()
time.sleep(5)

#Press apply dates range
elem = driver.find_element(By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[1]/button')
elem.click()
time.sleep(5)

#press download data
elem = driver.find_element(By.XPATH,'//*[@id="Col1-1-HistoricalDataTable-Proxy"]/section/div[1]/div[2]/span[2]/a/span')
elem.click()
time.sleep(3)

driver.close

TNX = pd.read_csv("C:\\Users\\HP\\OneDrive\\Documents\\Self\\Projects\\Fundamental_Analysis\\TNX.csv")

TNX.Date = pd.to_datetime(data.Date)
TNX.rename(columns = {"Date":"date", "Adj Close":"10Y_bonds"}, inplace = True)
TNX.set_index(["date"], inplace = True)
TNX = data["10Y_bonds"]
TNX.dropna(axis = 0, inplace = True)



In [81]:
TNX = pd.read_csv("C:\\Users\\HP\\OneDrive\\Documents\\Self\\Projects\\Fundamental_Analysis\\TNX.csv")

In [113]:
# Due to 30d bond return feature, put addon as YTD instead of simply days since last recorded
TNX_addon = pd.read_csv("C:\\Users\\HP\\OneDrive\\Documents\\Self\\Projects\\Fundamental_Analysis\\TNX_addon.csv")

## Download volatilty index

In [114]:
def get_vix(last_date = date.today(), historical_days = 1450):
    historical_date = last_date-datetime.timedelta(days=historical_days)
    
    url="https://cdn.cboe.com/api/global/us_indices/daily_prices/VIX_History.csv"
    response=pd.read_csv(url)
    
    response["DATE"]=pd.to_datetime(response["DATE"])
    response.rename(columns = {"DATE":"Date"}, inplace = True)
    response.set_index(["Date"], inplace = True)
    response = response[historical_date:last_date]["HIGH"]
    response = response.rename("VIX_high")
    
    return response

## Import S&P500 List and construct loop to pull data

In [115]:
TNX = TNX.drop(columns=['Open', 'High', 'Low', 'Adj Close', 'Volume'])
TNX = TNX.loc[~TNX['Close'].isna()]

KeyError: "['Open', 'High', 'Low', 'Adj Close', 'Volume'] not found in axis"

In [116]:
# for add on
TNX_addon = TNX_addon.drop(columns=['Open', 'High', 'Low', 'Adj Close', 'Volume'])
TNX_addon = TNX_addon.loc[~TNX_addon['Close'].isna()]

In [134]:
df_seed = TNX_addon
#df_seed = TNX
df_seed['Date'] = pd.to_datetime(df_seed['Date'])
df_seed = df_seed.set_index('Date')
df_seed['30dBondReturn'] = (df_seed['Close'] - df_seed['Close'].shift(30)) / df_seed['Close'].shift(30)
df_seed.columns = ['10YBond', '30dBondRet']
df_seed = df_seed.loc[~df_seed['30dBondRet'].isna()]

In [140]:
df_seed

Unnamed: 0_level_0,10YBond,30dBondRet,VIX_high
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-02-28,3.916,0.115352,21.37


In [136]:
# for addon
df_seed = df_seed.loc[df_seed.index > pd.to_datetime('2023-02-27')]

In [137]:
vix = pd.DataFrame(get_vix(date.today(), historical_dates_range))

In [138]:
df_seed = df_seed.merge(vix, on='Date', how='left')
#df_seed = df_seed.set_index('Date')

In [139]:
# tickers = pd.read_csv('S&P500_List.txt')
tickers = pd.read_csv('DOW30_List.csv')

#### Get SPY S&P price

In [141]:
spy = fetch_stock('SPY', date.today())
spy = spy.drop(columns=['high', 'low', 'open', 'volume', 'divCash'])
spy.rename(columns = {"close":"spy_close"}, inplace = True)
spy.index = spy.index.tz_convert(None)
spy = spy.reset_index()

In [142]:
spy_mapping = spy['spy_close'].to_dict()

In [143]:
spy['spy_5dret'] = spy.index - 5
spy['spy_5dret'] = (spy.index.map(spy_mapping) - spy['spy_5dret'].map(spy_mapping)) / spy['spy_5dret'].map(spy_mapping)
spy['spy_10dret'] = spy.index - 10 
spy['spy_10dret'] = (spy.index.map(spy_mapping) - spy['spy_10dret'].map(spy_mapping)) / spy['spy_10dret'].map(spy_mapping)
spy['spy_15dret'] = spy.index  - 15 
spy['spy_15dret'] = (spy.index.map(spy_mapping) - spy['spy_15dret'].map(spy_mapping)) / spy['spy_15dret'].map(spy_mapping)


In [144]:
df_seed = df_seed.merge(spy, left_on='Date', right_on='date', how='left')

In [92]:
df_seed = df_seed[df_seed['date']>pd.to_datetime('2019-01-01')]

In [145]:
df_seed

Unnamed: 0,10YBond,30dBondRet,VIX_high,date,spy_close,spy_5dret,spy_10dret,spy_15dret
0,3.916,0.115352,21.37,2023-02-28,396.26,-0.007091,-0.040138,-0.033111


#### Activate cell immediately below for creating a new df

In [96]:
df_total = pd.DataFrame()

#### Activate cell immediately below for tacking on to existing df saved to folder

In [147]:
df_total = pd.read_csv('All_Data.csv')
df_total = df_total.set_index('date')

### Loop over stocks in list

In [98]:
# tickers.iloc[i,0] is ticker, tickers.iloc[i,2] is sector
# first is 2/17/23
# historical_dates_range = 120

for i in range(16,len(tickers)):
    # base_df is VIX and TNX info that goes along with each stock
    base_df = df_seed.copy()
    # scrape earnings and dividends from the web
    earnings, dividends = get_earn_and_dividends(tickers.iloc[i,0])
    # Fetch stock data, fundamentals, and fundamentals from Tiingo api
    
    stock_dataset = fetch_stock(tickers.iloc[i,0], first, historical_dates_range)
    fundamentals = fetch_fundamentals(tickers.iloc[i,0], first, historical_dates_range)
    
    statements = fetch_statements(tickers.iloc[i,0], first, historical_dates_range)
    
    
    # Combining all stock's data
    big_dataset = combine_tables(stock_dataset, statements, fundamentals, historical_dates_range)

    big_dataset.sort_values(by = 'date', axis = 0, ascending = False, inplace = True)
    big_dataset["Debt-to-Equity_Ratio"] = big_dataset["totalAssets"]/big_dataset["totalLiabilities"]
    big_dataset["DividendsYield"] = big_dataset["payDiv"]/big_dataset["marketCap"]
    big_dataset["PayoutRatio"] = big_dataset["payDiv"]/big_dataset["grossProfit"]
    big_dataset["Acc_Rec_Pay_Ration"] = big_dataset["acctRec"]/big_dataset["acctPay"]
    big_dataset["Earnings_per_stock"] = big_dataset["epsDil"]/big_dataset["close"]
    
    
    # forming complete df for one individual stock
    
    base_df = base_df.reset_index().merge(earnings, left_on='date', right_on='date', how='left').set_index('date')
    base_df = base_df.reset_index().merge(dividends, left_on='date', right_on='date', how='left').set_index('date')
    base_df = base_df.reset_index().merge(big_dataset, left_on='date', right_on='date', how='left').set_index('date')
    base_df = jump_returns(base_df)
    base_df['Stock'] = tickers.iloc[i,0]
    # sample only one data point per week
    
    # base_df = base_df.iloc[len(base_df)-725:len(base_df),:]
    #base_df = base_df.reset_index()
    #base_df = base_df[base_df.index % 7 == 1]
    #base_df = base_df.set_index('Date')
    
    
    # concatenate entire stock's results to overall df_total
    
    df_total = pd.concat([df_total,base_df])
    print(f'Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i={i+1}')

  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 3
Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=17


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=18


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=19


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=20


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=21


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=22


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=23


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=24


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=25


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=26


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=27


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=28


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=29


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=30


## Loop to add additional data on top of df_total ending at 2/17

In [31]:
df_total = pd.read_csv('All_Data.csv')
df_total = df_total.set_index('date') 

In [148]:
first = date.today()
historical_dates_range = (first - date(2023, 2, 27)).days

In [151]:
for i in range(13,len(tickers)):
    # base_df is VIX and TNX info that goes along with each stock
    base_df = df_seed.copy()
    # scrape earnings and dividends from the web
    earnings, dividends = get_earn_and_dividends(tickers.iloc[i,0])
    # Fetch stock data, fundamentals, and fundamentals from Tiingo api
    
    stock_dataset = fetch_stock(tickers.iloc[i,0], first, historical_dates_range)
    fundamentals = fetch_fundamentals(tickers.iloc[i,0], first, historical_dates_range)
    
    statements = fetch_statements(tickers.iloc[i,0], first, historical_dates_range)
    
    
    # Combining all stock's data
    big_dataset = combine_tables(stock_dataset, statements, fundamentals, historical_dates_range)

    big_dataset.sort_values(by = 'date', axis = 0, ascending = False, inplace = True)
    big_dataset["Debt-to-Equity_Ratio"] = big_dataset["totalAssets"]/big_dataset["totalLiabilities"]
    big_dataset["DividendsYield"] = big_dataset["payDiv"]/big_dataset["marketCap"]
    big_dataset["PayoutRatio"] = big_dataset["payDiv"]/big_dataset["grossProfit"]
    big_dataset["Acc_Rec_Pay_Ration"] = big_dataset["acctRec"]/big_dataset["acctPay"]
    big_dataset["Earnings_per_stock"] = big_dataset["epsDil"]/big_dataset["close"]
    
    
    # forming complete df for one individual stock
    
    base_df = base_df.reset_index().merge(earnings, left_on='date', right_on='date', how='left').set_index('date')
    base_df = base_df.reset_index().merge(dividends, left_on='date', right_on='date', how='left').set_index('date')
    base_df = base_df.reset_index().merge(big_dataset, left_on='date', right_on='date', how='left').set_index('date')
    base_df = jump_returns(base_df)
    base_df['Stock'] = tickers.iloc[i,0]
    # sample only one data point per week
    
    # base_df = base_df.iloc[len(base_df)-725:len(base_df),:]
    #base_df = base_df.reset_index()
    #base_df = base_df[base_df.index % 7 == 1]
    #base_df = base_df.set_index('Date')
    
    
    # concatenate entire stock's results to overall df_total
    
    df_total = pd.concat([df_total,base_df])
    print(f'Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i={i+1}')

  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=14


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 4
Combining Statements for 2023 and 0
Combining Statements for 2023 and 3
Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 0
Combining Statements for 2020 and 4
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=15


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=16


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 3
Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=17


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=18


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=19


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=20


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=21


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=22


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=23


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=24


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=25


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=26


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 2
Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=27


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=28


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2023 and 1
Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=29


  earnings.Reported, earnings.Estimate, earnings["Surprise_%"] = pd.to_numeric(earnings.Reported.str.replace("$","")), pd.to_numeric(earnings.Estimate.str.replace("$","")), pd.to_numeric(earnings["Surprise_%"].str.replace("%","").str.replace(",",""))
  dividends.Amount = pd.to_numeric(dividends.Amount.str.replace("$",""))


Combining Statements for 2022 and 4
Combining Statements for 2022 and 0
Combining Statements for 2022 and 3
Combining Statements for 2022 and 2
Combining Statements for 2022 and 1
Combining Statements for 2021 and 4
Combining Statements for 2021 and 0
Combining Statements for 2021 and 3
Combining Statements for 2021 and 2
Combining Statements for 2021 and 1
Combining Statements for 2020 and 4
Combining Statements for 2020 and 0
Combining Statements for 2020 and 3
Combining Statements for 2020 and 2
Combining Statements for 2020 and 1
Loop Interrupt: If this is the last Loop Interrupt message, restart loop at i=30


In [79]:
statements

Unnamed: 0_level_0,debtCurrent,taxAssets,investmentsCurrent,totalAssets,acctPay,accoci,inventory,totalLiabilities,acctRec,intangibles,ppeq,deferredRev,cashAndEq,assetsNonCurrent,taxLiabilities,investments,equity,retainedEarnings,deposits,assetsCurrent,investmentsNonCurrent,debt,debtNonCurrent,liabilitiesNonCurrent,liabilitiesCurrent,sharesBasic,longTermDebtEquity,shareFactor,bookVal,roa,currentRatio,roe,grossMargin,piotroskiFScore,epsQoQ,revenueQoQ,profitMargin,rps,bvps,ncfi,capex,ncfx,ncff,sbcomp,ncf,payDiv,businessAcqDisposals,issrepayDebt,issrepayEquity,investmentsAcqDisposals,freeCashFlow,ncfo,depamor,ebitda,netIncComStock,epsDil,consolidatedIncome,nonControllingInterests,shareswaDil,intexp,rnd,eps,netIncDiscOps,grossProfit,shareswa,opex,ebt,netinc,revenue,ebit,taxExp,opinc,sga,costRev,prefDVDs,quarter,year
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1
2022-12-31,4367000000.0,0.0,17194000000.0,182103000000.0,9595000000.0,-562000000.0,13224000000.0,78817000000.0,4133000000.0,33609000000.0,80860000000.0,0.0,11144000000.0,131696000000.0,6249000000.0,23106000000.0,101423000000.0,70405000000.0,0.0,50407000000.0,5912000000.0,42051000000.0,37684000000.0,46662000000.0,32155000000.0,4127000000.0,0.371553,1.0,103286000000.0,0.045552,1.567626,0.079021,0.391682,3.0,-1.141593,-0.315959,0.391682,3.402472,25.026896,-3431000000.0,-5699000000.0,0.0,2343000000.0,736000000.0,6615000000.0,-1509000000.0,0.0,2490000000.0,5000000.0,1194000000.0,2004000000.0,7703000000.0,3287000000.0,2338000000.0,-664000000.0,-0.16,-661000000.0,3000000.0,4123000000.0,-150000000.0,4464000000.0,-0.16,0.0,5500000000.0,4120000000.0,6632000000.0,-799000000.0,-664000000.0,14042000000.0,-949000000.0,-135000000.0,-1132000000.0,1706000000.0,8542000000.0,0.0,4,2022
2022-10-01,2283000000.0,0.0,18030000000.0,174841000000.0,7133000000.0,-2051000000.0,12831000000.0,74956000000.0,7469000000.0,33859000000.0,75763000000.0,0.0,4529000000.0,125578000000.0,4143000000.0,23852000000.0,99885000000.0,71024000000.0,0.0,49263000000.0,5822000000.0,39523000000.0,37240000000.0,47143000000.0,27813000000.0,4106000000.0,0.372829,1.0,99885000000.0,0.077105,1.771222,0.133133,0.426066,3.0,-0.85119,-0.200813,0.426066,3.735509,24.326595,-4574000000.0,-7299000000.0,0.0,3683000000.0,793000000.0,139000000.0,-1502000000.0,0.0,4661000000.0,383000000.0,3672000000.0,-6269000000.0,1030000000.0,3252000000.0,2926000000.0,1019000000.0,0.25,1019000000.0,0.0,4125000000.0,-138000000.0,4302000000.0,0.25,0.0,6535000000.0,4118000000.0,6710000000.0,-188000000.0,1019000000.0,15338000000.0,-326000000.0,-1207000000.0,-175000000.0,1744000000.0,8803000000.0,0.0,3,2022
2022-07-02,2882000000.0,0.0,22654000000.0,170418000000.0,7945000000.0,-1625000000.0,12174000000.0,69200000000.0,6063000000.0,34014000000.0,71660000000.0,0.0,4390000000.0,119830000000.0,4256000000.0,28583000000.0,101218000000.0,72985000000.0,0.0,50588000000.0,5929000000.0,35430000000.0,32548000000.0,41982000000.0,27218000000.0,4089000000.0,0.321563,1.0,101218000000.0,0.111865,1.858623,0.196033,0.364663,4.0,-1.088,-0.219551,0.364663,3.746882,24.75373,168000000.0,-7255000000.0,0.0,-2802000000.0,892000000.0,-1825000000.0,-1499000000.0,35000000.0,-1688000000.0,0.0,8587000000.0,-6446000000.0,809000000.0,3148000000.0,2358000000.0,-454000000.0,-0.11,-454000000.0,0.0,4100000000.0,119000000.0,4400000000.0,-0.11,0.0,5587000000.0,4100000000.0,6287000000.0,-909000000.0,-454000000.0,15321000000.0,-790000000.0,-455000000.0,-700000000.0,1800000000.0,9734000000.0,0.0,2,2022
2022-04-02,4459000000.0,0.0,32481000000.0,176356000000.0,7210000000.0,-1002000000.0,11935000000.0,73220000000.0,7074000000.0,33824000000.0,66718000000.0,0.0,6215000000.0,113788000000.0,5919000000.0,38517000000.0,103136000000.0,74894000000.0,0.0,62568000000.0,6036000000.0,37247000000.0,32788000000.0,43898000000.0,29322000000.0,4072000000.0,0.31791,1.0,103136000000.0,0.147575,2.133824,0.263442,0.503678,5.0,1.39759,-0.067097,0.503678,4.507122,25.328094,-2640000000.0,-4797000000.0,0.0,-1863000000.0,707000000.0,1388000000.0,-1487000000.0,6544000000.0,-299000000.0,589000000.0,-3919000000.0,1094000000.0,5891000000.0,3348000000.0,12012000000.0,8113000000.0,1.98,8113000000.0,0.0,4107000000.0,-997000000.0,4362000000.0,1.99,0.0,9244000000.0,4079000000.0,4903000000.0,9661000000.0,8113000000.0,18353000000.0,8664000000.0,1548000000.0,4341000000.0,1752000000.0,9109000000.0,0.0,1,2022
2021-12-25,4591000000.0,0.0,24426000000.0,168406000000.0,5747000000.0,-880000000.0,10776000000.0,73015000000.0,9457000000.0,34233000000.0,63245000000.0,0.0,4827000000.0,109848000000.0,8048000000.0,30724000000.0,95391000000.0,68265000000.0,0.0,58558000000.0,6298000000.0,38101000000.0,33510000000.0,45553000000.0,27462000000.0,4067000000.0,0.351291,1.0,95391000000.0,0.123868,2.132328,0.226744,0.536292,6.0,-0.20979,0.02753,0.536292,5.047455,23.454881,-4906000000.0,-7632000000.0,0.0,-3540000000.0,449000000.0,-3043000000.0,-1413000000.0,0.0,-2000000000.0,4000000.0,2325000000.0,-2229000000.0,5403000000.0,3074000000.0,8422000000.0,4623000000.0,1.13,4623000000.0,0.0,4093000000.0,154000000.0,4049000000.0,1.13,0.0,11009000000.0,4071000000.0,6020000000.0,5194000000.0,4623000000.0,20528000000.0,5348000000.0,571000000.0,4989000000.0,1942000000.0,9519000000.0,0.0,4,2021
2021-09-25,4694000000.0,0.0,26765000000.0,167962000000.0,6792000000.0,-1147000000.0,9798000000.0,77875000000.0,8400000000.0,34470000000.0,59733000000.0,62000000.0,7870000000.0,106658000000.0,7242000000.0,33768000000.0,90087000000.0,63642000000.0,0.0,61304000000.0,7003000000.0,40304000000.0,35610000000.0,48303000000.0,29572000000.0,4057000000.0,0.395285,1.0,90087000000.0,0.134778,2.073042,0.25111,0.559921,6.0,0.647059,0.046855,0.559921,4.730589,22.205324,-10629000000.0,-4441000000.0,0.0,3849000000.0,543000000.0,3124000000.0,-1410000000.0,0.0,4974000000.0,427000000.0,-6186000000.0,5463000000.0,9904000000.0,2959000000.0,9893000000.0,6823000000.0,1.67,6823000000.0,0.0,4086000000.0,76000000.0,3803000000.0,1.68,0.0,10746000000.0,4061000000.0,5519000000.0,6858000000.0,6823000000.0,19192000000.0,6934000000.0,35000000.0,5227000000.0,1674000000.0,8446000000.0,0.0,3,2021
2021-06-26,3695000000.0,0.0,20111000000.0,154597000000.0,5917000000.0,-1095000000.0,8817000000.0,69390000000.0,7460000000.0,34786000000.0,58166000000.0,68000000.0,4746000000.0,105225000000.0,7443000000.0,27028000000.0,85207000000.0,59647000000.0,0.0,49372000000.0,6917000000.0,35409000000.0,31714000000.0,44554000000.0,24836000000.0,4038000000.0,0.372199,1.0,85207000000.0,0.122968,1.987921,0.231499,0.570832,7.0,0.041667,-0.004917,0.570832,4.861565,21.101288,-6913000000.0,-3868000000.0,0.0,-2334000000.0,619000000.0,-446000000.0,-1410000000.0,0.0,-500000000.0,-90000000.0,-2947000000.0,4933000000.0,8801000000.0,2857000000.0,8698000000.0,5061000000.0,1.24,5061000000.0,0.0,4084000000.0,96000000.0,3715000000.0,1.25,0.0,11206000000.0,4049000000.0,5660000000.0,5745000000.0,5061000000.0,19631000000.0,5841000000.0,684000000.0,5546000000.0,1599000000.0,8425000000.0,0.0,2,2021
2021-03-27,2647000000.0,0.0,17205000000.0,150622000000.0,5434000000.0,-1103000000.0,8487000000.0,70815000000.0,7208000000.0,35379000000.0,57330000000.0,90000000.0,5192000000.0,104849000000.0,8015000000.0,24018000000.0,79807000000.0,54638000000.0,0.0,45773000000.0,6813000000.0,35884000000.0,33237000000.0,46664000000.0,24151000000.0,4063000000.0,0.416467,1.0,79807000000.0,0.123681,1.895284,0.234385,0.551721,5.0,-0.37594,-0.007817,0.551721,4.841989,19.642382,-2001000000.0,-4388000000.0,0.0,-4020000000.0,425000000.0,-673000000.0,-1411000000.0,0.0,0.0,-1736000000.0,1521000000.0,960000000.0,5348000000.0,2902000000.0,6964000000.0,3361000000.0,0.82,3361000000.0,0.0,4096000000.0,156000000.0,3623000000.0,0.83,0.0,10854000000.0,4056000000.0,7160000000.0,3906000000.0,3361000000.0,19673000000.0,4062000000.0,545000000.0,3694000000.0,1328000000.0,8819000000.0,0.0,1,2021
2020-12-26,2504000000.0,0.0,18030000000.0,153091000000.0,5581000000.0,-751000000.0,8427000000.0,72053000000.0,6782000000.0,35997000000.0,56584000000.0,1367000000.0,5865000000.0,105842000000.0,8421000000.0,25374000000.0,81038000000.0,56233000000.0,0.0,47249000000.0,7344000000.0,36401000000.0,33897000000.0,47299000000.0,24754000000.0,4098000000.0,0.418285,1.0,81038000000.0,0.139652,1.908742,0.266267,0.568025,5.0,-0.100629,-0.011431,0.568025,4.875061,19.775012,-6412000000.0,-4061000000.0,0.0,-1449000000.0,461000000.0,2509000000.0,-1353000000.0,123000000.0,0.0,0.0,-2088000000.0,6309000000.0,10370000000.0,3003000000.0,10579000000.0,5857000000.0,1.42,5857000000.0,0.0,4121000000.0,88000000.0,3655000000.0,1.43,0.0,11348000000.0,4097000000.0,5464000000.0,7488000000.0,5857000000.0,19978000000.0,7576000000.0,1631000000.0,5884000000.0,1757000000.0,8630000000.0,0.0,4,2020
2020-09-26,504000000.0,0.0,14897000000.0,145261000000.0,5159000000.0,-940000000.0,9273000000.0,70707000000.0,7140000000.0,36836000000.0,59205000000.0,1381000000.0,3356000000.0,108476000000.0,7806000000.0,21296000000.0,74554000000.0,52159000000.0,0.0,36785000000.0,6399000000.0,36563000000.0,36059000000.0,48595000000.0,22112000000.0,4253000000.0,0.483663,1.0,74554000000.0,0.15083,1.663576,0.282802,0.531337,5.0,-0.25,-0.044659,0.531337,4.310604,17.529744,-766000000.0,-3716000000.0,0.0,-12793000000.0,452000000.0,-5380000000.0,-1404000000.0,0.0,-1750000000.0,-9615000000.0,2451000000.0,4463000000.0,8179000000.0,3123000000.0,8238000000.0,4276000000.0,1.02,4276000000.0,0.0,4211000000.0,74000000.0,3272000000.0,1.02,0.0,9741000000.0,4188000000.0,4682000000.0,5041000000.0,4276000000.0,18333000000.0,5115000000.0,765000000.0,5059000000.0,1435000000.0,8592000000.0,0.0,3,2020


### Log of completed iterations

In [None]:
# DOW30 Complete in All_Data.csv, saved to df_DOW_30.csv
# DOW30 Recent data (2023) in Recent_DOW30.csv
# DOW30 entirely raw data saved to All_Data.csv (no weekly sampling, lag and future return calcs fixed, NaN's managed)

In [152]:
df_total.to_csv('All_Data.csv', header = df_total.columns, index = True, encoding = 'utf-8')