In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime, timedelta
import csv


# Get the list of NASDAQ companies
nasdaq_tickers = pd.read_csv('../raw_data/nasdaq_tickers_raw.csv')
#nasdaq_tickers= pd.read_csv('https://datahub.io/core/nasdaq-listings/r/nasdaq-listed-symbols.csv')
tickers_total = nasdaq_tickers['Symbol'].tolist()
tickers = tickers_total[3566: 5349]

# Prepare the CSV file
header_written = False
csv_file = 'nasdaq_financial_data.csv'


# Helper function to get the stock price on a specific date
def get_stock_price(ticker, date, stock):
    stock_data = stock.history(start=date, end=date + timedelta(days=7))
    for i in range(7):        
        try:
            adjusted_date = date + timedelta(days=i)
            stock_data_l = stock.history(start=adjusted_date, end=adjusted_date+timedelta(days=1))
            if not stock_data_l.empty:
                return stock_data_l['Close'].iloc[0]
        except Exception as e: 
            print(f"get_stock_price : Error processing ticker {ticker}: {e}")


!
# Function that flattens the data of a dataframe concatenate the columns and put everything in a row
def flatten_data(df, ticker):    
    
    df_u = df.unstack().reset_index()
    df_u['Ticker'] = df_u.apply(lambda row: str(row['level_0'].year) + ' ' + row['level_1'], axis=1)
    df_u.set_index(keys = 'Ticker')
    df_ordered = df_u.drop(columns = ['level_0','level_1'])
    df_transposed = df_ordered.set_index(keys='Ticker').T
    df_transposed['Ticker'] = ticker
    df_transposed.set_index(keys='Ticker',inplace = True)  
    
    return df_transposed


#Compute the stock value ratio (item_stock%/sp500_stock%) in a year
def get_stock_value_ratio(date, ticker, stock, stock_sp):
    for i in range(7):
        try:
            # Adjust date by i days
            adjusted_date = date + timedelta(days=i)
            stock_data = get_stock_price(ticker, adjusted_date, stock)

            if stock_data is not None:
                date_diff = adjusted_date - timedelta(weeks=52)
                #change in s&p 500 price in %
                sp500_change = get_stock_price("^GSPC", adjusted_date, stock_sp) / get_stock_price("^GSPC",date_diff, stock_sp)
                #change in stock price in %
                stock_change = get_stock_price(ticker,adjusted_date, stock)/ get_stock_price(ticker,date_diff, stock)
                #ratio of change_stock / change_s&p500
                ratio = stock_change / sp500_change
                if ratio is not None:
                    return ratio
        except Exception as e:
            print(f"get_stock_value_ratio : Error processing ticker {ticker}: {e}")

#function to get the stock value corresponding to the date date

def get_stock_value(df, ticker, stock, stock_sp) :
    lst_stock_value = []
    lst_stock_ratio = []
    for date in df.columns :
        date_str = date.strftime('%Y-%m-%d')
        lst_stock_value.append(get_stock_price(ticker, date, stock))
        lst_stock_ratio.append(get_stock_value_ratio(date, ticker, stock, stock_sp))
        
    df.loc['Stock value'] = lst_stock_value
    df.loc['Ratio ticker sp500'] = lst_stock_ratio
    return df

def create_df(tickers):
    stock_sp = yf.Ticker('^GSPC')
    df_complete = pd.DataFrame()
    for ticker in tickers:  
        df_row = pd.DataFrame()
        try: 
            # Get financial statements for the ticker ticker
            stock = yf.Ticker(ticker)
            balance_sheet = stock.balance_sheet
            income_statement = stock.financials
            cashflow_statement = stock.cashflow
            #stock_history = stock.history
            
            # add the stock value and flatten the dataframe
            df_bs = get_stock_value(balance_sheet,ticker, stock, stock_sp)
            df_bs_f = flatten_data(df_bs,ticker)
            df_is_f = flatten_data(income_statement,ticker)
            df_cs_f = flatten_data(cashflow_statement,ticker)
            #concatenate
            df_row = pd.concat([df_bs_f,df_is_f,df_cs_f],axis=1)
            df_complete = pd.concat([df_complete, df_row])
    
        except Exception as e: 
            print(f"create_df : Error processing ticker {ticker}: {e}")
    return df_complete

In [None]:
df = create_df(tickers)
df.to_csv("../raw_data/Raw_data_new_version_part_three_Jorge")

In [2]:
len(tickers_total)

7133