# Download Stock Data

Imports

In [2]:
import yfinance as yf
from tiingo import TiingoClient
import pandas as pd
from dotenv import dotenv_values
import constants

config = dotenv_values(".env")

## Tiingo

In [9]:
client = TiingoClient({'session': True, 'api_key': config['TIINGO_API_TOKEN']})

In [None]:
def download_tiingo(ticker: str, start_date: str, end_date: str, frequency: str) -> pd.DataFrame:
    df = client.get_dataframe(ticker,
                              startDate=start_date,
                              endDate=end_date,
                              frequency=frequency)
    return df.sort_index()

def rename_and_drop_columns(df: pd.DataFrame, 
                            drop_columns: list | None = None, 
                            new_column_names: dict | None= None) -> pd.DataFrame:
    
    if drop_columns is None:
        drop_columns = ["open", "high", "low", "close", "volume", "splitFactor"]

    if new_column_names is None:
        new_column_names = {"adjClose": "close", 
                            "adjHigh": "high", 
                            "adjLow": "low", 
                            "adjOpen": "open", 
                            "adjVolume": "volume"}
        
    df = df.drop(columns=drop_columns, errors='ignore')
    df = df.rename(columns=new_column_names)
    return df

def save_to_parquet(df, filename: str):
    df.to_parquet(filename, index=True)

def read_from_parquet(filename: str) -> pd.DataFrame:
    return pd.read_parquet(filename)

def show_info(df: pd.DataFrame):
    print("\n\n\n=====types:=====")
    print(df.dtypes)

    print("\n\n\n=====index:=====")
    print(df.index)

    print("\n\n\n=====null values:=====")
    print(df.isnull().sum())


### Download 3 most valiuable stocks in the S&P 500 index

https://companiesmarketcap.com/usa/largest-companies-in-the-usa-by-market-cap/

### 1. NVIDIA daily Data

In [None]:
# nvidia_daily = download_tiingo(constants.TICKER_NVIDIA, constants.START_DATE, constants.END_DATE, constants.DAILY_TIINGO)
# save_to_parquet(nvidia_daily, "data/nvidia_daily.parquet")

nvidia_daily = read_from_parquet("data/nvidia_daily.parquet")

show_info(nvidia_daily)

nvidia_daily.head()

### 2. MICROSOFT daily data

In [None]:
# microsoft_daily = download_tiingo(constants.TICKER_MICROSOFT, constants.START_DATE, constants.END_DATE, constants.DAILY_TIINGO)
# save_to_parquet(microsoft_daily, "data/microsoft_daily.parquet")

microsoft_daily = read_from_parquet("data/microsoft_daily.parquet")

show_info(microsoft_daily)

microsoft_daily.head()

### 3. APPLE daily data

In [5]:
# apple_daily = download_tiingo(constants.TICKER_APPLE, constants.START_DATE, constants.END_DATE, constants.DAILY_TIINGO)
# apple_daily = rename_and_drop_columns(apple_daily)
# save_to_parquet(apple_daily, "data/apple_daily.parquet")

apple_daily = read_from_parquet("data/apple_daily.parquet")

show_info(apple_daily)

apple_daily.head()




=====types:=====
close      float64
high       float64
low        float64
open       float64
volume       int64
divCash    float64
dtype: object



=====index:=====
DatetimeIndex(['2010-01-04 00:00:00+00:00', '2010-01-05 00:00:00+00:00',
               '2010-01-06 00:00:00+00:00', '2010-01-07 00:00:00+00:00',
               '2010-01-08 00:00:00+00:00', '2010-01-11 00:00:00+00:00',
               '2010-01-12 00:00:00+00:00', '2010-01-13 00:00:00+00:00',
               '2010-01-14 00:00:00+00:00', '2010-01-15 00:00:00+00:00',
               ...
               '2025-06-16 00:00:00+00:00', '2025-06-17 00:00:00+00:00',
               '2025-06-18 00:00:00+00:00', '2025-06-20 00:00:00+00:00',
               '2025-06-23 00:00:00+00:00', '2025-06-24 00:00:00+00:00',
               '2025-06-25 00:00:00+00:00', '2025-06-26 00:00:00+00:00',
               '2025-06-27 00:00:00+00:00', '2025-06-30 00:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='date', length=3896, freq=None)




Unnamed: 0_level_0,close,high,low,open,volume,divCash
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-04 00:00:00+00:00,6.431808,6.446534,6.38282,6.414376,493730093,0.0
2010-01-05 00:00:00+00:00,6.442927,6.479292,6.408967,6.449539,601905401,0.0
2010-01-06 00:00:00+00:00,6.340444,6.468473,6.333832,6.442927,552160552,0.0
2010-01-07 00:00:00+00:00,6.328723,6.371399,6.282741,6.363886,477131677,0.0
2010-01-08 00:00:00+00:00,6.370798,6.371399,6.283041,6.320308,447611247,0.0


## Yahoo Finance API

In [18]:
apple_stock = yf.download('AAPL', start='2024-01-01', end='2025-04-01', interval='1d')
apple_stock.head()

[*********************100%***********************]  1 of 1 completed

1 Failed download:
['AAPL']: YFRateLimitError('Too Many Requests. Rate limited. Try after a while.')


Price,Adj Close,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
