In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import sklearn
import matplotlib 
import seaborn as sns
import statsmodels.api as sm
import scipy.optimize
import os

In [2]:
spdr_sector_etfs = {
    "Communication Services": "XLC",
    "Consumer Discretionary": "XLY",
    "Consumer Staples": "XLP",
    "Energy": "XLE",
    "Financials": "XLF",
    "Health Care": "XLV",
    "Industrials": "XLI",
    "Materials": "XLB",
    "Real Estate": "XLRE",
    "Technology": "XLK",
    "Utilities": "XLU"
}

In [19]:
cryptos = {
    "Bitcoin": "BTC-USD",
    "Ethereum": "ETH-USD",
    "Ripple": "XRP-USD",
    "Litecoin": "LTC-USD",
    "Cardano": "ADA-USD",
    "Bitcoin Cash": "BCH-USD",
    "Binance Coin": "BNB-USD",
    "Dash": "DASH-USD",
    "EOS": "EOS-USD",
    "IOTA": "IOT-USD",
    "Chainlink": "LINK-USD",
    "Tron": "TRX-USD",
    "Tether": "USDT-USD",
    "Stellar": "XLM-USD",
    "Monero": "XMR-USD"
}

In [21]:
# combine the tickers
all_tickers = list(spdr_sector_etfs.values()) + list(cryptos.values())

len(all_tickers)

26

In [51]:
def download_tickers(ticker_list):
    output_folder = "historical_data"
    os.makedirs(output_folder, exist_ok=True)

    for ticker in ticker_list:
        print(f"Fetching data for: {ticker}")
        data = yf.download(ticker, progress=False)  
        if not data.empty:

            # columns look weird, need to clean it
            data_cleaned = data.droplevel(level=1, axis=1)
            data_cleaned.columns
            data_cleaned.columns.name = None
            data_cleaned.reset_index()

            # Save to CSV
            csv_file = os.path.join(output_folder, f"{ticker}.csv")
            data_cleaned.to_csv(csv_file)

            print(f"  Data saved to {csv_file}")
        else:
            print(f"  No data available for {ticker}.")

In [None]:
# DONT RUN THIS UNLESS U CHANGE TICKERS
# download_tickers(all_tickers)

Fetching data for: XLC
  Data saved to historical_data/XLC.csv
Fetching data for: XLY
  Data saved to historical_data/XLY.csv
Fetching data for: XLP
  Data saved to historical_data/XLP.csv
Fetching data for: XLE
  Data saved to historical_data/XLE.csv
Fetching data for: XLF
  Data saved to historical_data/XLF.csv
Fetching data for: XLV
  Data saved to historical_data/XLV.csv
Fetching data for: XLI
  Data saved to historical_data/XLI.csv
Fetching data for: XLB
  Data saved to historical_data/XLB.csv
Fetching data for: XLRE
  Data saved to historical_data/XLRE.csv
Fetching data for: XLK
  Data saved to historical_data/XLK.csv
Fetching data for: XLU
  Data saved to historical_data/XLU.csv
Fetching data for: BTC-USD
  Data saved to historical_data/BTC-USD.csv
Fetching data for: ETH-USD
  Data saved to historical_data/ETH-USD.csv
Fetching data for: XRP-USD
  Data saved to historical_data/XRP-USD.csv
Fetching data for: LTC-USD
  Data saved to historical_data/LTC-USD.csv
Fetching data for: AD

In [None]:
csv_directory = "historical_data" 

data_frames = {}

for file in os.listdir(csv_directory):
    if file.endswith(".csv"): 
        asset_name = file.split(".csv")[0]
        file_path = os.path.join(csv_directory, file)
        
        df = pd.read_csv(file_path, index_col="Date", parse_dates=True)
        
        # CHANGE HERE IF U DONT WANT TO USE ADJ CLOSE
        data_frames[asset_name] = df["Adj Close"].rename(asset_name)

combined_df = pd.concat(data_frames.values(), axis=1)

combined_df


Unnamed: 0_level_0,USDT-USD,TRX-USD,ETH-USD,XMR-USD,BTC-USD,EOS-USD,XLE,XLF,XLP,XLC,...,IOT-USD,XLY,XLK,LINK-USD,XLI,DASH-USD,BCH-USD,XLRE,LTC-USD,BNB-USD
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1998-12-22,,,,,,,12.034072,11.425532,14.558568,,...,,18.995108,24.064365,,14.745356,,,,,
1998-12-23,,,,,,,12.284619,11.594012,14.910511,,...,,19.076689,24.639273,,15.002665,,,,,
1998-12-24,,,,,,,12.219964,11.670580,14.884765,,...,,19.426298,24.545412,,15.200582,,,,,
1998-12-28,,,,,,,12.155301,11.517423,14.798926,,...,,19.251488,24.615818,,15.279748,,,,,
1998-12-29,,,,,,,12.276525,11.639956,15.125119,,...,,19.671015,24.686211,,15.497467,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-24,1.000863,0.208833,3363.659912,161.957047,98013.820312,0.849866,,,,,...,0.001024,,,17.944859,,34.138390,514.705444,,96.949509,660.317566
2024-11-25,0.999989,0.196054,3413.543945,156.306747,93102.296875,0.801506,95.349998,51.060001,82.290001,97.379997,...,0.000934,220.330002,233.589996,17.387096,143.539993,33.323322,491.657166,44.680000,92.363762,636.639709
2024-11-26,0.999654,0.194368,3326.517334,161.242722,91985.320312,0.796843,95.220001,51.160000,82.629997,97.739998,...,0.000957,221.130005,234.759995,17.338844,143.710007,33.187241,492.551544,44.900002,93.044876,613.587280
2024-11-27,1.000994,0.201255,3657.249268,156.106567,95962.531250,0.830196,95.120003,51.259998,82.800003,97.870003,...,0.001054,220.039993,231.580002,18.696129,143.179993,38.173279,519.697937,45.209999,97.276321,644.948486


In [23]:
data = yf.download("AAPL", start="2021-01-01", end="2021-12-31")


[*********************100%***********************]  1 of 1 completed


In [26]:
data.columns

MultiIndex([('Adj Close', 'AAPL'),
            (    'Close', 'AAPL'),
            (     'High', 'AAPL'),
            (      'Low', 'AAPL'),
            (     'Open', 'AAPL'),
            (   'Volume', 'AAPL')],
           names=['Price', 'Ticker'])

In [48]:
df_cleaned = data.droplevel(level=1, axis=1)
df_cleaned.columns
df_cleaned.columns.name = None
df_cleaned.reset_index()


Unnamed: 0,Date,Adj Close,Close,High,Low,Open,Volume
0,2021-01-04,126.544212,129.410004,133.610001,126.760002,133.520004,143301900
1,2021-01-05,128.108795,131.009995,131.740005,128.429993,128.889999,97664900
2,2021-01-06,123.796432,126.599998,131.050003,126.379997,127.720001,155088000
3,2021-01-07,128.020782,130.919998,131.630005,127.860001,128.360001,109578200
4,2021-01-08,129.125763,132.050003,132.630005,130.229996,132.429993,105158200
...,...,...,...,...,...,...,...
246,2021-12-23,173.438934,176.279999,176.850006,175.270004,175.850006,68356600
247,2021-12-27,177.423691,180.330002,180.419998,177.070007,177.089996,74919600
248,2021-12-28,176.400452,179.289993,181.330002,178.529999,180.160004,79144300
249,2021-12-29,176.488983,179.380005,180.630005,178.139999,179.330002,62348900
