# Downloading stock data

In [1]:
import pandas as pd
from openbb import obb
import os

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
obb.user.credentials.fmp_api_key="edc2977d918bfc0e71920ca88e986b39"
data_folder = 'data'

In [3]:
# for saving daily data for each company
def indicators(df: pd.DataFrame) -> pd.DataFrame:

    # is it worth using indicators that have multiple columns? idk if it will be meaningful after z-score normalization
    df['pct_change'] = df['close'].pct_change()
    feature = obb.technical.rsi(data=df, target='close')
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.adosc(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.aroon(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.cci(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.cg(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.hma(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.ichimoku(data=df)
    df = feature.to_df().reset_index(level=['date'])
    feature = obb.technical.vwap(data=df)
    df = feature.to_df().reset_index(level=['date'])
    df = df.dropna()

    return df

interval = '1d' # 1m, 1h, 1d options
start_date = '2014-01-01'
end_date = '2024-01-31'
constituents = pd.read_csv(os.path.join(data_folder, 'constituents.csv'))

# can change to use list of companies from SEC sorted by market cap
# see https://docs.openbb.co/platform/usage/find_symbols
for co in constituents['Symbol']:
    already_downloaded = False
    p = os.path.join(data_folder, co)
    if not os.path.isdir(p):
        os.mkdir(p)
    files = os.listdir(os.path.join(data_folder, co))

    if len(files) != 0:
        for fn in files:
            if interval in fn:
                already_downloaded = True
                print(f"{co} data already downloaded!")

    if not already_downloaded:
        try:
            results = obb.equity.price.historical(co, interval, start_date, end_date, provider="yfinance")
            df = results.to_df().reset_index(level='date')
            df = df.drop(columns=['dividends', 'stock_splits'])
            df = indicators(df) # add indicators to data

            fn = f"{co}_{start_date}_{interval}.csv"
            out_file = os.path.join(data_folder, co, fn)
            df.to_csv(out_file)
            print(f"{co} data saved!")

        except:
            print(f"{co} not found")

MMM data already downloaded!
AOS data already downloaded!
ABT data already downloaded!
ABBV data already downloaded!
ACN data already downloaded!
ADBE data already downloaded!
AMD data already downloaded!
AES data already downloaded!
AFL data already downloaded!
A data already downloaded!
APD data already downloaded!
ABNB data already downloaded!
AKAM data already downloaded!
ALB data already downloaded!
ARE data already downloaded!
ALGN data already downloaded!
ALLE data already downloaded!
LNT data already downloaded!
ALL data already downloaded!
GOOGL data already downloaded!
GOOG data already downloaded!
MO data already downloaded!
AMZN data already downloaded!
AMCR data already downloaded!
AEE data already downloaded!
AAL data already downloaded!
AEP data already downloaded!
AXP data already downloaded!
AIG data already downloaded!
AMT data already downloaded!
AWK data already downloaded!
AMP data already downloaded!
AME data already downloaded!
AMGN data already downloaded!
APH d

In [129]:
# for saving hourly data for each company and updating automatically
folders = os.listdir(data_folder)
if '.DS_Store' in folders:
    folders.pop(folders.index('.DS_Store'))
interval = '1h'
start_date = '2004-01-01'

for company in folders:
    already_downloaded = False
    files = os.listdir(os.path.join(data_folder, company))

    if len(files) != 0:
        for fn in files:
            if interval in fn:
                already_downloaded = True
                print(f"Previous {company} data found, updating!")
                df = obb.equity.price.historical(company, interval, start_date, provider='yfinance').to_df()
                out_file = os.path.join(data_folder, company, fn)
                prev = pd.read_csv(out_file)
                prev = prev.set_index(prev['date']).drop(columns=['date'])
                df = pd.concat([prev, df]).drop_duplicates()
                df.to_csv(out_file)

    if not already_downloaded:
        print(f"Looking for {company}...")
        df = obb.equity.price.historical(company, interval, start_date, provider="yfinance").to_df()
        min_date = df.idxmin()[0]
        date_str = f'{min_date.year}-{min_date.month}-{min_date.day}'
        fn = f"{company}_{date_str}_{interval}.csv"
        out_file = os.path.join(data_folder, company, fn)
        df.to_csv(out_file)
        print(f"Data saved to {out_file}!\n")

Previous AMZN data found, updating!
Previous SPY data found, updating!
Previous AAPL data found, updating!
Previous GOOG data found, updating!
Previous MSFT data found, updating!
Previous META data found, updating!
Previous NFLX data found, updating!
Previous NDAQ data found, updating!
Previous NVDA data found, updating!
Previous AVGO data found, updating!
