In [5]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import csv


# Get the list of NASDAQ companies
nasdaq_tickers = pd.read_csv('nasdaq_screener.csv')
#nasdaq_tickers= pd.read_csv('https://datahub.io/core/nasdaq-listings/r/nasdaq-listed-symbols.csv')
tickers_total = nasdaq_tickers['Symbol'].tolist()
tickers = tickers_total[10:12]

# Prepare the CSV file
header_written = False
csv_file = 'nasdaq_financial_data.csv'


# Helper function to get the stock price on a specific date
def get_stock_price(ticker, date):
    try:
        stock_data = yf.download(ticker, start=date - timedelta(days=2), end=date + timedelta(days=5))
        if not stock_data.empty:
            return stock_data['Close'].iloc[2]
        else:
            print(f'error retrieving the data for this {date}')
            return None
    except:
        return None



# Function that flattens the data of a dataframe concatenate the columns and put everything in a row
def flatten_data(df, ticker):    
    
    df_u = df.unstack().reset_index()
    df_u['Ticker'] = df_u.apply(lambda row: str(row['level_0'].year) + ' ' + row['level_1'], axis=1)
    df_u.set_index(keys = 'Ticker')
    df_ordered = df_u.drop(columns = ['level_0','level_1'])
    df_transposed = df_ordered.set_index(keys='Ticker').T
    df_transposed['Ticker'] = ticker
    df_transposed.set_index(keys='Ticker',inplace = True)  
    
    return df_transposed




In [6]:
#function to get the stock value corresponding to the date date
def get_stock_value(df, ticker) :
    lst = []
    for date in df.columns :
        date_str = date.strftime('%Y-%m-%d')
        lst.append(get_stock_price(ticker, date))
    df.loc['Stock value'] = lst 
    return df

In [7]:
def create_df(tickers):
    df_complete = pd.DataFrame()
    for ticker in tickers:  
        df_row = pd.DataFrame()
        try: 
            # Get financial statements for the ticker ticker
            stock = yf.Ticker(ticker)
            balance_sheet = stock.balance_sheet
            income_statement = stock.financials
            cashflow_statement = stock.cashflow
            # add the stock value and flatten the dataframe
            df_bs = get_stock_value(balance_sheet,ticker)
            df_bs_f = flatten_data(df_bs,ticker)
            df_is_f = flatten_data(income_statement,ticker)
            df_cs_f = flatten_data(cashflow_statement,ticker)
            #concatenate
            df_row = pd.concat([df_bs_f,df_is_f,df_cs_f],axis=1)
            df_complete = pd.concat([df_complete, df_row])
    
        except Exception as e: 
            print(f"Error processing ticker {ticker}: {e}")
    return df_complete
        
     

In [11]:
df_complete = create_df(tickers)


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1640754000, endDate = 1641358800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1609218000, endDate = 1609822800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1577595600, endDate = 1578200400")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


error retrieving the data for this 2021-12-31 00:00:00
error retrieving the data for this 2020-12-31 00:00:00
error retrieving the data for this 2019-12-31 00:00:00


In [13]:
df_complete

Ticker,2023 Treasury Shares Number,2023 Ordinary Shares Number,2023 Share Issued,2023 Net Debt,2023 Total Debt,2023 Tangible Book Value,2023 Invested Capital,2023 Working Capital,2023 Net Tangible Assets,2023 Capital Lease Obligations,...,2019 Cash Dividends Paid,2019 Preferred Stock Dividend Paid,2019 Common Stock Dividend Paid,2019 Net Short Term Debt Issuance,2019 Short Term Debt Issuance,2019 Dividend Received Cfo,2019 Amortization Of Securities,2019 Deferred Tax,2019 Deferred Income Tax,2019 Earnings Losses From Equity Investments
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAMC,2129973.0,2554512.0,4684485.0,,900000.0,-125843000.0,,,-125843000.0,900000.0,...,,,,,,,,,,
AAME,,,,,,,,,,,...,-802000.0,-399000.0,-403000.0,,,379000.0,17288000.0,-913000.0,-913000.0,


In [14]:
df_row_2 = create_df(['AAMC'])
df_row_1 = create_df(['AAME'])

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1640754000, endDate = 1641358800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1609218000, endDate = 1609822800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1577595600, endDate = 1578200400")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

error retrieving the data for this 2021-12-31 00:00:00
error retrieving the data for this 2020-12-31 00:00:00
error retrieving the data for this 2019-12-31 00:00:00





In [16]:
df_row_1

Ticker,2022 Treasury Shares Number,2022 Preferred Shares Number,2022 Ordinary Shares Number,2022 Share Issued,2022 Net Debt,2022 Total Debt,2022 Tangible Book Value,2022 Invested Capital,2022 Net Tangible Assets,2022 Common Stock Equity,...,2019 Stock Based Compensation,2019 Unrealized Gain Loss On Investment Securities,2019 Amortization Of Securities,2019 Deferred Tax,2019 Deferred Income Tax,2019 Depreciation And Amortization,2019 Operating Gains Losses,2019 Earnings Losses From Equity Investments,2019 Gain Loss On Investment Securities,2019 Net Income From Continuing Operations
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAME,1993665.0,55000.0,20407229.0,22400894.0,6884000.0,35747000.0,99594000.0,137885000.0,99649000.0,102138000.0,...,353000.0,-5511000.0,17288000.0,-913000.0,-913000.0,996000.0,-1574000.0,,-1574000.0,-386000.0


In [39]:
list_r2 = df_row_2.columns
list_r1 = df_row_1.columns

In [42]:
df_complete['2022 Treasury Shares Number']

Ticker
AAMC    2802334.0
AAME    1993665.0
Name: 2022 Treasury Shares Number, dtype: object

In [35]:
df_row_1

Ticker,2022 Treasury Shares Number,2022 Preferred Shares Number,2022 Ordinary Shares Number,2022 Share Issued,2022 Net Debt,2022 Total Debt,2022 Tangible Book Value,2022 Invested Capital,2022 Net Tangible Assets,2022 Common Stock Equity,...,2019 Stock Based Compensation,2019 Unrealized Gain Loss On Investment Securities,2019 Amortization Of Securities,2019 Deferred Tax,2019 Deferred Income Tax,2019 Depreciation And Amortization,2019 Operating Gains Losses,2019 Earnings Losses From Equity Investments,2019 Gain Loss On Investment Securities,2019 Net Income From Continuing Operations
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AAME,1993665.0,55000.0,20407229.0,22400894.0,6884000.0,35747000.0,99594000.0,137885000.0,99649000.0,102138000.0,...,353000.0,-5511000.0,17288000.0,-913000.0,-913000.0,996000.0,-1574000.0,,-1574000.0,-386000.0


In [17]:
df_complete = pd.DataFrame()
for ticker in tickers:    
    try: 
        # Get financial statements for the ticker ticker
        stock = yf.Ticker(ticker)
        balance_sheet = stock.balance_sheet
        income_statement = stock.financials
        cashflow_statement = stock.cashflow
        # add the stock value and flatten the dataframe
        df_bs = get_stock_value(balance_sheet,ticker)
        df_bs_f = flatten_data(df_bs)
        df_is_f = flatten_data(income_statement)
        df_cs_f = flatten_data(cashflow_statement)
        #concatenate
        df_row = pd.concat([df_bs_f,df_is_f,df_cs_f],axis=1)
        df
        df_complete = pd.concat([df_complete, df_row])

    except Exception as e: 
        print(f"Error processing ticker {ticker}: {e}")
        
        
        

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1640754000, endDate = 1641358800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1609218000, endDate = 1609822800")
[*********************100%%**********************]  1 of 1 completed

1 Failed download:
['AAMC']: YFChartError("%ticker%: Data doesn't exist for startDate = 1577595600, endDate = 1578200400")
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

error retrieving the data for this 2021-12-31 00:00:00
error retrieving the data for this 2020-12-31 00:00:00
error retrieving the data for this 2019-12-31 00:00:00
Error processing ticker AAMC: name 'df' is not defined
Error processing ticker AAME: name 'df' is not defined





In [None]:
df_row

In [None]:
df_preprocessed.columns

In [None]:
tickers 

In [None]:
df_data = pd.DataFrame

In [None]:
df_data.mean()

In [None]:
# Loop through tickers

for ticker in tickers:
    try:
        stock = yf.Ticker(ticker)
        
        # Get financial statements
        balance_sheet = stock.balance_sheet
        income_statement = stock.financials
        cashflow_statement = stock.cashflow
        
        # Ensure we have data for the past 4 years
        if len(balance_sheet.columns) >= 4 and len(income_statement.columns) >= 4 and len(cashflow_statement.columns) >= 4:
            row = {'Ticker' : ticker}
            for i in range(4):
                date = balance_sheet.columns[i]
                date_str = date.strftime('%Y-%m-%d')
                stock_price = get_stock_price(ticker, date)
                
                # Add data to the row with year-specific keys
                row[f'Date_{i+1}'] = date_str
                row[f'Stock_Price_{i+1}'] = stock_price
                
                for col in balance_sheet.index:
                    row[f'BS_{col}_{i+1}'] = balance_sheet.loc[col, date]
                for col in income_statement.index:
                    row[f'IS_{col}_{i+1}'] = income_statement.loc[col, date]
                for col in cashflow_statement.index:
                    row[f'CF_{col}_{i+1}'] = cashflow_statement.loc[col, date]
            print(row)
            # Write the row to the CSV file
            with open(csv_file, mode='a', newline='') as file:
                writer = csv.DictWriter(file, fieldnames=row.keys())
                
                if not header_written:
                    writer.writeheader()
                    header_written = True
                
                writer.writerow(row)
            
                    
    except Exception as e:
        print(f"Error processing ticker {ticker}: {e}")

print(f"Data extraction complete. Data saved to '{csv_file}'.")

In [None]:
data = pd.read_csv(csv_file)

In [None]:
data

In [None]:
# Define the ticker symbol
ticker = 'AAPL'

# Get data for this ticker
stock_data = yf.download(ticker, start='2019-01-01', end='2023-12-31')

stock_data = yf.Ticker(tickers[0])

df_balance = stock_data.balance_sheet
df_income_s = stock.financials
df_cash = stock.cashflow

# flatten the data and set the ticker as index
df_balance_u = df_balance.unstack().reset_index()
df_balance_u['Ticker'] = df_balance_u.apply(lambda row: str(row['level_0'].year) + ' ' + row['level_1'], axis=1)
df_balance_u.set_index(keys = 'Ticker')
df_ordered = df_balance_u.drop(columns = ['level_0','level_1'])
df_transposed = df_ordered.set_index(keys='Ticker').T
df_transposed['Ticker'] = ticker
df_transposed.set_index(keys='Ticker')


In [None]:
stock_data.balance_sheet

In [None]:
df_bs = stock_data.balance_sheet
df_bs

In [None]:
#function to get the stock value corresponding to the date date

lst = []
for date in df_bs.columns :
    date_str = date.strftime('%Y-%m-%d')
    new_row[date] = get_stock_price(ticker, date)
    lst.append(get_stock_price(ticker, date))
df_bs.loc['Stock value'] = lst  

In [None]:
df_bs