## Import Libraries

In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import os
import time
from Utils import homepath


import warnings
warnings.filterwarnings('ignore')

## Fetch Data

### Index

In [None]:
# Define the S&P 500 and S&P 600 index symbols
indice_list = ["^GSPC", "SPSM"]

# Download historical data
indices = yf.download(indice_list, start="2020-01-01", end="2025-01-01")["Close"]


indices.columns = ["sp600","sp500"]
# indices.to_csv(homepath + 'data/indices.csv')

[*********************100%***********************]  2 of 2 completed

1 Failed download:
['SPSM']: SSLError(MaxRetryError("HTTPSConnectionPool(host='www.yahoo.com', port=443): Max retries exceeded with url: /?guccounter=1 (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1000)')))"))


### risk free rate

In [None]:
# Define risk-free rate tickers
risk_free_tickers = {
    "3M T-Bill Rate": "^IRX",   # 3-month risk-free rate
    "10Y Treasury Yield": "^TNX"  # 10-year bond yield
}

test = ["^IRX","^TNX"]

# Download historical data (latest 1 year)
rf_data = yf.download(list(risk_free_tickers.values()), start="2020-01-01", end="2025-01-01")["Close"]

# Rename columns for clarity
rf_data.columns = risk_free_tickers.keys()

# Convert percentages to decimal form (Yahoo Finance reports these as percentages)
rf_data = rf_data / 100 

# rf_data.to_csv(homepath + "data/interest_rates.csv")


[*********************100%***********************]  2 of 2 completed


### 30 Random Tickers timeseries

In [None]:
def get_stock_data(tickers, start_date, end_date):
    """
    Fetch adjusted close (or close) prices for a list of stock tickers from Yahoo Finance.
    
    Parameters:
    tickers (list): List of stock tickers (e.g., ['AAPL', 'MSFT']).
    start_date (str): Start date in 'YYYY-MM-DD' format.
    end_date (str): End date in 'YYYY-MM-DD' format.
    
    Returns:
    DataFrame: A pandas DataFrame with prices for the selected tickers.
    """
    raw_data = yf.download(tickers, start=start_date, end=end_date, progress=False)

    if 'Adj Close' in raw_data:
        stock_data = raw_data['Adj Close']
    elif 'Close' in raw_data:
        stock_data = raw_data['Close']
    else:
        raise ValueError("Neither 'Adj Close' nor 'Close' data is available for the tickers.")
    
    stock_data.dropna(axis=1, inplace=True)
    return stock_data

In [16]:
all_tickers = [
    'AAPL', 'MSFT', 'GOOGL', 'AMZN', 'META', 'TSLA', 'NVDA', 'JPM', 'V', 'JNJ',
    'WMT', 'PG', 'DIS', 'BAC', 'XOM', 'CVX', 'PFE', 'KO', 'PEP', 'NFLX',
    'ADBE', 'CSCO', 'INTC', 'CRM', 'ORCL', 'QCOM', 'IBM', 'AVGO', 'TXN', 'T'
]

start_date = "2020-01-01"
end_date = "2023-01-01" 

try:
    stock_prices = get_stock_data(all_tickers, start_date, end_date)
    print(f"Successfully fetched data for {len(stock_prices.columns)} tickers.")
except Exception as e:
    print(f"An error occurred: {e}")

Successfully fetched data for 30 tickers.


In [5]:
# stock_prices.to_csv(homepath + "data/stock_prices_30_tickers.csv")

### SP500 Tickers Timeseries

In [6]:
mock_tickers = pd.read_excel(homepath + "data/SP500Tickers.xlsx")["Symbol"].to_list()

In [37]:
# Batch size
batch_size = 30

def save_data_to_file(data, filename):
    data.to_csv(filename, sep='\t', index=True)

# Loop through batches of the tickers
for i in range(0, len(mock_tickers), batch_size):
    batch = mock_tickers[i:i + batch_size]  # Get the next 30 tickers
    print(f"Downloading batch {i // batch_size + 1}: {batch}")
    
    # Download data for the current batch
    try:
        data = yf.download(batch, start="2020-01-01", end="2025-01-01")  # Adjust date range
        if 'Adj Close' in data:
            stock_data = data['Adj Close']
        elif 'Close' in data:
            stock_data = data['Close']
        else:
            raise ValueError("Neither 'Adj Close' nor 'Close' data is available for the tickers.")
        # Save the data to a text file
        batch_filename = homepath + f"data/batch_{i // batch_size + 1}.txt"
        save_data_to_file(stock_data, batch_filename)
        print(f"Batch {i // batch_size + 1} data saved to {batch_filename}")
        
    except Exception as e:
        print(f"Error downloading/saving data for batch {i // batch_size + 1}: {e}")

print("All batches processed.")

Downloading batch 1: ['MMM', 'AOS', 'ABT', 'ABBV', 'ACN', 'ADBE', 'AMD', 'AES', 'AFL', 'A', 'APD', 'ABNB', 'AKAM', 'ALB', 'ARE', 'ALGN', 'ALLE', 'LNT', 'ALL', 'GOOGL', 'GOOG', 'MO', 'AMZN', 'AMCR', 'AEE', 'AEP', 'AXP', 'AIG', 'AMT', 'AWK']


[*********************100%***********************]  30 of 30 completed


Batch 1 data saved to C:/A.PROJECTS/stockprediction/data/batch_1.txt
Downloading batch 2: ['AMP', 'AME', 'AMGN', 'APH', 'ADI', 'ANSS', 'AON', 'APA', 'APO', 'AAPL', 'AMAT', 'APTV', 'ACGL', 'ADM', 'ANET', 'AJG', 'AIZ', 'T', 'ATO', 'ADSK', 'ADP', 'AZO', 'AVB', 'AVY', 'AXON', 'BKR', 'BALL', 'BAC', 'BAX', 'BDX']


[*********************100%***********************]  30 of 30 completed


Batch 2 data saved to C:/A.PROJECTS/stockprediction/data/batch_2.txt
Downloading batch 3: ['BRK.B', 'BBY', 'TECH', 'BIIB', 'BLK', 'BX', 'BK', 'BA', 'BKNG', 'BWA', 'BSX', 'BMY', 'AVGO', 'BR', 'BRO', 'BF.B', 'BLDR', 'BG', 'BXP', 'CHRW', 'CDNS', 'CZR', 'CPT', 'CPB', 'COF', 'CAH', 'KMX', 'CCL', 'CARR', 'CAT']


[*********************100%***********************]  30 of 30 completed

2 Failed downloads:
['BF.B']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2020-01-01 -> 2025-01-01)')
['BRK.B']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Batch 3 data saved to C:/A.PROJECTS/stockprediction/data/batch_3.txt
Downloading batch 4: ['CBOE', 'CBRE', 'CDW', 'CE', 'COR', 'CNC', 'CNP', 'CF', 'CRL', 'SCHW', 'CHTR', 'CVX', 'CMG', 'CB', 'CHD', 'CI', 'CINF', 'CTAS', 'CSCO', 'C', 'CFG', 'CLX', 'CME', 'CMS', 'KO', 'CTSH', 'CL', 'CMCSA', 'CAG', 'COP']


[*********************100%***********************]  30 of 30 completed


Batch 4 data saved to C:/A.PROJECTS/stockprediction/data/batch_4.txt
Downloading batch 5: ['ED', 'STZ', 'CEG', 'COO', 'CPRT', 'GLW', 'CPAY', 'CTVA', 'CSGP', 'COST', 'CTRA', 'CRWD', 'CCI', 'CSX', 'CMI', 'CVS', 'DHR', 'DRI', 'DVA', 'DAY', 'DECK', 'DE', 'DELL', 'DAL', 'DVN', 'DXCM', 'FANG', 'DLR', 'DFS', 'DG']


[*********************100%***********************]  30 of 30 completed


Batch 5 data saved to C:/A.PROJECTS/stockprediction/data/batch_5.txt
Downloading batch 6: ['DLTR', 'D', 'DPZ', 'DOV', 'DOW', 'DHI', 'DTE', 'DUK', 'DD', 'EMN', 'ETN', 'EBAY', 'ECL', 'EIX', 'EW', 'EA', 'ELV', 'EMR', 'ENPH', 'ETR', 'EOG', 'EPAM', 'EQT', 'EFX', 'EQIX', 'EQR', 'ERIE', 'ESS', 'EL', 'EG']


[*********************100%***********************]  30 of 30 completed


Batch 6 data saved to C:/A.PROJECTS/stockprediction/data/batch_6.txt
Downloading batch 7: ['EVRG', 'ES', 'EXC', 'EXPE', 'EXPD', 'EXR', 'XOM', 'FFIV', 'FDS', 'FICO', 'FAST', 'FRT', 'FDX', 'FIS', 'FITB', 'FSLR', 'FE', 'FI', 'FMC', 'F', 'FTNT', 'FTV', 'FOXA', 'FOX', 'BEN', 'FCX', 'GRMN', 'IT', 'GE', 'GEHC']


[*********************100%***********************]  30 of 30 completed


Batch 7 data saved to C:/A.PROJECTS/stockprediction/data/batch_7.txt
Downloading batch 8: ['GEV', 'GEN', 'GNRC', 'GD', 'GIS', 'GM', 'GPC', 'GILD', 'GPN', 'GL', 'GDDY', 'GS', 'HAL', 'HIG', 'HAS', 'HCA', 'DOC', 'HSIC', 'HSY', 'HES', 'HPE', 'HLT', 'HOLX', 'HD', 'HON', 'HRL', 'HST', 'HWM', 'HPQ', 'HUBB']


[*********************100%***********************]  30 of 30 completed


Batch 8 data saved to C:/A.PROJECTS/stockprediction/data/batch_8.txt
Downloading batch 9: ['HUM', 'HBAN', 'HII', 'IBM', 'IEX', 'IDXX', 'ITW', 'INCY', 'IR', 'PODD', 'INTC', 'ICE', 'IFF', 'IP', 'IPG', 'INTU', 'ISRG', 'IVZ', 'INVH', 'IQV', 'IRM', 'JBHT', 'JBL', 'JKHY', 'J', 'JNJ', 'JCI', 'JPM', 'JNPR', 'K']


[*********************100%***********************]  30 of 30 completed


Batch 9 data saved to C:/A.PROJECTS/stockprediction/data/batch_9.txt
Downloading batch 10: ['KVUE', 'KDP', 'KEY', 'KEYS', 'KMB', 'KIM', 'KMI', 'KKR', 'KLAC', 'KHC', 'KR', 'LHX', 'LH', 'LRCX', 'LW', 'LVS', 'LDOS', 'LEN', 'LII', 'LLY', 'LIN', 'LYV', 'LKQ', 'LMT', 'L', 'LOW', 'LULU', 'LYB', 'MTB', 'MPC']


[*********************100%***********************]  30 of 30 completed


Batch 10 data saved to C:/A.PROJECTS/stockprediction/data/batch_10.txt
Downloading batch 11: ['MKTX', 'MAR', 'MMC', 'MLM', 'MAS', 'MA', 'MTCH', 'MKC', 'MCD', 'MCK', 'MDT', 'MRK', 'META', 'MET', 'MTD', 'MGM', 'MCHP', 'MU', 'MSFT', 'MAA', 'MRNA', 'MHK', 'MOH', 'TAP', 'MDLZ', 'MPWR', 'MNST', 'MCO', 'MS', 'MOS']


[*********************100%***********************]  30 of 30 completed


Batch 11 data saved to C:/A.PROJECTS/stockprediction/data/batch_11.txt
Downloading batch 12: ['MSI', 'MSCI', 'NDAQ', 'NTAP', 'NFLX', 'NEM', 'NWSA', 'NWS', 'NEE', 'NKE', 'NI', 'NDSN', 'NSC', 'NTRS', 'NOC', 'NCLH', 'NRG', 'NUE', 'NVDA', 'NVR', 'NXPI', 'ORLY', 'OXY', 'ODFL', 'OMC', 'ON', 'OKE', 'ORCL', 'OTIS', 'PCAR']


[*********************100%***********************]  30 of 30 completed


Batch 12 data saved to C:/A.PROJECTS/stockprediction/data/batch_12.txt
Downloading batch 13: ['PKG', 'PLTR', 'PANW', 'PARA', 'PH', 'PAYX', 'PAYC', 'PYPL', 'PNR', 'PEP', 'PFE', 'PCG', 'PM', 'PSX', 'PNW', 'PNC', 'POOL', 'PPG', 'PPL', 'PFG', 'PG', 'PGR', 'PLD', 'PRU', 'PEG', 'PTC', 'PSA', 'PHM', 'PWR', 'QCOM']


[*********************100%***********************]  30 of 30 completed


Batch 13 data saved to C:/A.PROJECTS/stockprediction/data/batch_13.txt
Downloading batch 14: ['DGX', 'RL', 'RJF', 'RTX', 'O', 'REG', 'REGN', 'RF', 'RSG', 'RMD', 'RVTY', 'ROK', 'ROL', 'ROP', 'ROST', 'RCL', 'SPGI', 'CRM', 'SBAC', 'SLB', 'STX', 'SRE', 'NOW', 'SHW', 'SPG', 'SWKS', 'SJM', 'SW', 'SNA', 'SOLV']


[*********************100%***********************]  30 of 30 completed


Batch 14 data saved to C:/A.PROJECTS/stockprediction/data/batch_14.txt
Downloading batch 15: ['SO', 'LUV', 'SWK', 'SBUX', 'STT', 'STLD', 'STE', 'SYK', 'SMCI', 'SYF', 'SNPS', 'SYY', 'TMUS', 'TROW', 'TTWO', 'TPR', 'TRGP', 'TGT', 'TEL', 'TDY', 'TFX', 'TER', 'TSLA', 'TXN', 'TPL', 'TXT', 'TMO', 'TJX', 'TSCO', 'TT']


[*********************100%***********************]  30 of 30 completed


Batch 15 data saved to C:/A.PROJECTS/stockprediction/data/batch_15.txt
Downloading batch 16: ['TDG', 'TRV', 'TRMB', 'TFC', 'TYL', 'TSN', 'USB', 'UBER', 'UDR', 'ULTA', 'UNP', 'UAL', 'UPS', 'URI', 'UNH', 'UHS', 'VLO', 'VTR', 'VLTO', 'VRSN', 'VRSK', 'VZ', 'VRTX', 'VTRS', 'VICI', 'V', 'VST', 'VMC', 'WRB', 'GWW']


[*********************100%***********************]  30 of 30 completed


Batch 16 data saved to C:/A.PROJECTS/stockprediction/data/batch_16.txt
Downloading batch 17: ['WAB', 'WBA', 'WMT', 'DIS', 'WBD', 'WM', 'WAT', 'WEC', 'WFC', 'WELL', 'WST', 'WDC', 'WY', 'WMB', 'WTW', 'WDAY', 'WYNN', 'XEL', 'XYL', 'YUM', 'ZBRA', 'ZBH', 'ZTS']


[*********************100%***********************]  23 of 23 completed


Batch 17 data saved to C:/A.PROJECTS/stockprediction/data/batch_17.txt
All batches processed.


In [9]:
def combine_batch_files(num_batches):
    # List to hold DataFrames
    all_dataframes = []
    
    # Iterate over each batch file
    for batch_number in range(1, num_batches + 1):
        batch_filename = homepath + f"data/batch_{batch_number}.txt"
        
        # Check if the file exists
        if os.path.exists(batch_filename):
            print(f"Loading {batch_filename}")
            
            # Read the batch file into a DataFrame
            data = pd.read_csv(batch_filename, sep='\t', index_col=0, parse_dates=True)
            # Append DataFrame to the list
            all_dataframes.append(data)
        else:
            print(f"{batch_filename} does not exist.")
    
    # Concatenate all DataFrames in the list into a single DataFrame
    combined_data = pd.concat(all_dataframes, axis=1)
    
    return combined_data


number_of_batches = 17  # For example, replace with actual number of batch files you created

# Combine all batch data files into one DataFrame
combined_df = combine_batch_files(number_of_batches)

# Optional: Save the combined DataFrame to a file
combined_df_filename = homepath + "data/sp500_timeseries.txt"
print(f"Combined data saved to {combined_df_filename}")
# Example output of the combined DataFrame
print(combined_df.shape)

combined_df.to_csv(combined_df_filename, sep='\t')

Loading C:/A.PROJECTS/stockprediction/data/batch_1.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_2.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_3.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_4.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_5.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_6.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_7.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_8.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_9.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_10.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_11.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_12.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_13.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_14.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_15.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_16.txt
Loading C:/A.PROJECTS/stockprediction/data/batch_17.txt
Combined data saved to C:/A.PROJECTS/stockprediction/data

### SP600 Tickers Timeseries

In [4]:
sp600_tickers = pd.read_csv(homepath + "data/SP600Tickers.csv")["Symbol"].to_list()

In [5]:
# Batch size
batch_size = 30

def save_data_to_file(data, filename):
    data.to_csv(filename, sep='\t', index=True)


sp600_list = []
# Loop through batches of the tickers
for i in range(0, len(sp600_tickers), batch_size):
    batch = sp600_tickers[i:i + batch_size]  # Get the next 30 tickers
    print(f"Downloading batch {i // batch_size + 1}: {batch}")
    
    # Download data for the current batch
    try:
        data = yf.download(batch, start="2020-01-01", end="2025-01-01")  # Adjust date range
        if 'Adj Close' in data:
            stock_data = data['Adj Close']
        elif 'Close' in data:
            stock_data = data['Close']
        else:
            raise ValueError("Neither 'Adj Close' nor 'Close' data is available for the tickers.")
        # Save the data to a text file
        batch_filename = homepath + f"data/batch_{i // batch_size + 1}.txt"
        # save_data_to_file(stock_data, batch_filename)
        sp600_list.append(stock_data)
        print(f"Batch {i // batch_size + 1} data saved to {batch_filename}")
        
    except Exception as e:
        print(f"Error downloading/saving data for batch {i // batch_size + 1}: {e}")

print("All batches processed.")

Downloading batch 1: ['AAP', 'AAT', 'ABCB', 'ABG', 'ABM', 'ABR', 'ACA', 'ACAD', 'ACIW', 'ACLS', 'ADEA', 'ADMA', 'ADNT', 'ADUS', 'AEIS', 'AEO', 'AESI', 'AGO', 'AGYS', 'AHCO', 'AHH', 'AIN', 'AIR', 'AKR', 'AL', 'ALEX', 'ALG', 'ALGT', 'ALK', 'ALKS']


[*********************100%***********************]  28 of 30 completed

2 Failed downloads:
['ABR', 'ACIW']: SSLError(MaxRetryError("HTTPSConnectionPool(host='query2.finance.yahoo.com', port=443): Max retries exceeded with url: /v8/finance/chart/%ticker%?period1=1577854800&period2=1735707600&interval=1d&includePrePost=False&events=div%2Csplits%2CcapitalGains&crumb=AzA6uKTvfzG (Caused by SSLError(SSLEOFError(8, '[SSL: UNEXPECTED_EOF_WHILE_READING] EOF occurred in violation of protocol (_ssl.c:1000)')))"))
[***                    7%                       ]  2 of 30 completed

Batch 1 data saved to C:/A.PROJECTS/stockprediction/data/batch_1.txt
Downloading batch 2: ['ALRM', 'AMBC', 'AMN', 'AMPH', 'AMR', 'AMSF', 'AMTM', 'AMWD', 'ANDE', 'ANIP', 'AORT', 'AOSL', 'APAM', 'APLE', 'APOG', 'ARCB', 'ARI', 'ARLO', 'AROC', 'ARR', 'ARWR', 'ASIX', 'ASO', 'ASTE', 'ASTH', 'ATEN', 'ATGE', 'ATI', 'AUB', 'AVA']


[*********************100%***********************]  30 of 30 completedd


Batch 2 data saved to C:/A.PROJECTS/stockprediction/data/batch_2.txt
Downloading batch 3: ['AVAV', 'AVNS', 'AWI', 'AWR', 'AX', 'AXL', 'AZTA', 'AZZ', 'BANC', 'BANF', 'BANR', 'BBWI', 'BCC', 'BCPC', 'BDN', 'BFH', 'BFS', 'BGC', 'BGS', 'BHE', 'BHLB', 'BJRI', 'BKE', 'BKU', 'BL', 'BLFS', 'BLMN', 'BMI', 'BOH', 'BOOT']


[*********************100%***********************]  30 of 30 completed


Batch 3 data saved to C:/A.PROJECTS/stockprediction/data/batch_3.txt
Downloading batch 4: ['BOX', 'BRC', 'BRKL', 'BSIG', 'BTU', 'BXMT', 'CABO', 'CAKE', 'CAL', 'CALM', 'CALX', 'CARG', 'CARS', 'CASH', 'CATY', 'CBRL', 'CBU', 'CCOI', 'CCS', 'CENT', 'CENTA', 'CENX', 'CERT', 'CEVA', 'CFFN', 'CHCO', 'CHEF', 'CLB', 'CNK', 'CNMD']


[*********************100%***********************]  30 of 30 completed


Batch 4 data saved to C:/A.PROJECTS/stockprediction/data/batch_4.txt
Downloading batch 5: ['CNR', 'CNS', 'CNXN', 'COHU', 'COLL', 'CON', 'COOP', 'CORT', 'CPF', 'CPK', 'CPRX', 'CRC', 'CRGY', 'CRI', 'CRK', 'CRSR', 'CRVL', 'CSGS', 'CSR', 'CSWI', 'CTKB', 'CTRE', 'CTS', 'CUBI', 'CURB', 'CVBF', 'CVCO', 'CVI', 'CWEN', 'CWEN.A']


[*********************100%***********************]  30 of 30 completed

1 Failed download:
['CWEN.A']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Batch 5 data saved to C:/A.PROJECTS/stockprediction/data/batch_5.txt
Downloading batch 6: ['CWK', 'CWT', 'CXM', 'CXW', 'DAN', 'DCOM', 'DEA', 'DEI', 'DFH', 'DFIN', 'DGII', 'DIOD', 'DLX', 'DNOW', 'DOCN', 'DORM', 'DRH', 'DRQ', 'DV', 'DVAX', 'DXC', 'DXPE', 'DY', 'EAT', 'ECG', 'ECPG', 'EFC', 'EGBN', 'EIG', 'ELME']


[*********************100%***********************]  30 of 30 completed

1 Failed download:
['DRQ']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Batch 6 data saved to C:/A.PROJECTS/stockprediction/data/batch_6.txt
Downloading batch 7: ['EMBC', 'ENOV', 'ENR', 'ENVA', 'EPAC', 'EPC', 'EPRT', 'ESE', 'ETD', 'ETSY', 'EVTC', 'EXPI', 'EXTR', 'EYE', 'EZPW', 'FBK', 'FBNC', 'FBP', 'FBRT', 'FCF', 'FCPT', 'FDP', 'FELE', 'FFBC', 'FHB', 'FIZZ', 'FL', 'FLGT', 'FORM', 'FOXF']


[*********************100%***********************]  30 of 30 completed


Batch 7 data saved to C:/A.PROJECTS/stockprediction/data/batch_7.txt
Downloading batch 8: ['FSS', 'FTDR', 'FTRE', 'FUL', 'FULT', 'FUN', 'FWRD', 'GBX', 'GDEN', 'GDOT', 'GDYN', 'GEO', 'GES', 'GFF', 'GIII', 'GKOS', 'GMS', 'GNL', 'GNW', 'GO', 'GOGO', 'GOLF', 'GPI', 'GPRE', 'GRBK', 'GSHD', 'GTES', 'GTY', 'GVA', 'HAFC']


[*********************100%***********************]  30 of 30 completed


Batch 8 data saved to C:/A.PROJECTS/stockprediction/data/batch_8.txt
Downloading batch 9: ['HAIN', 'HASI', 'HAYW', 'HBI', 'HCC', 'HCI', 'HCSG', 'HELE', 'HFWA', 'HI', 'HIMS', 'HIW', 'HLIT', 'HLX', 'HMN', 'HNI', 'HOPE', 'HP', 'HRMY', 'HSII', 'HSTM', 'HTH', 'HTLD', 'HTZ', 'HUBG', 'HWKN', 'HZO', 'IAC', 'IART', 'IBP']


[*********************100%***********************]  30 of 30 completed


Batch 9 data saved to C:/A.PROJECTS/stockprediction/data/batch_9.txt
Downloading batch 10: ['ICHR', 'ICUI', 'IDCC', 'IIIN', 'IIPR', 'INDB', 'INN', 'INSP', 'INSW', 'INVA', 'IOSP', 'IPAR', 'ITGR', 'ITRI', 'JACK', 'JBGS', 'JBLU', 'JBSS', 'JBT', 'JJSF', 'JOE', 'JXN', 'KALU', 'KAR', 'KFY', 'KLG', 'KLIC', 'KMT', 'KN', 'KOP']


[*********************100%***********************]  30 of 30 completed


Batch 10 data saved to C:/A.PROJECTS/stockprediction/data/batch_10.txt
Downloading batch 11: ['KREF', 'KRYS', 'KSS', 'KTB', 'KW', 'KWR', 'LBRT', 'LCII', 'LEG', 'LESL', 'LGIH', 'LGND', 'LKFN', 'LMAT', 'LNC', 'LNN', 'LPG', 'LQDT', 'LRN', 'LTC', 'LUMN', 'LXP', 'LZB', 'MAC', 'MARA', 'MATV', 'MATW', 'MATX', 'MBC', 'MC']


[*********************100%***********************]  30 of 30 completed


Batch 11 data saved to C:/A.PROJECTS/stockprediction/data/batch_11.txt
Downloading batch 12: ['MCRI', 'MCW', 'MCY', 'MD', 'MDU', 'MGEE', 'MGPI', 'MGY', 'MHO', 'MLAB', 'MLKN', 'MMI', 'MMSI', 'MNRO', 'MODG', 'MOG.A', 'MP', 'MPW', 'MRCY', 'MRP', 'MRTN', 'MSEX', 'MSGS', 'MTH', 'MTRN', 'MTUS', 'MTX', 'MXL', 'MYGN', 'MYRG']


[*********************100%***********************]  29 of 30 completed

2 Failed downloads:
['MOG.A']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
['MRP']: YFPricesMissingError('$%ticker%: possibly delisted; no price data found  (1d 2020-01-01 -> 2025-01-01) (Yahoo error = "Data doesn\'t exist for startDate = 1577854800, endDate = 1735707600")')


Batch 12 data saved to C:/A.PROJECTS/stockprediction/data/batch_12.txt
Downloading batch 13: ['NABL', 'NARI', 'NATL', 'NAVI', 'NBHC', 'NBR', 'NBTB', 'NEO', 'NGVT', 'NHC', 'NMIH', 'NOG', 'NPK', 'NPO', 'NSIT', 'NTCT', 'NVEE', 'NVRI', 'NWBI', 'NWL', 'NWN', 'NX', 'NXRT', 'NYMT', 'ODP', 'OFG', 'OGN', 'OI', 'OII', 'OMCL']


[*********************100%***********************]  30 of 30 completed


Batch 13 data saved to C:/A.PROJECTS/stockprediction/data/batch_13.txt
Downloading batch 14: ['OMI', 'OSIS', 'OTTR', 'OUT', 'OXM', 'PAHC', 'PARR', 'PAYO', 'PATK', 'PBH', 'PBI', 'PCRX', 'PDCO', 'PDFS', 'PEB', 'PECO', 'PENN', 'PFBC', 'PFS', 'PGNY', 'PHIN', 'PI', 'PINC', 'PIPR', 'PJT', 'PLAB', 'PLAY', 'PLMR', 'PLUS', 'PLXS']


[*********************100%***********************]  30 of 30 completed


Batch 14 data saved to C:/A.PROJECTS/stockprediction/data/batch_14.txt
Downloading batch 15: ['PMT', 'POWL', 'PPBI', 'PRA', 'PRAA', 'PRDO', 'PRG', 'PRGS', 'PRK', 'PRLB', 'PRVA', 'PSMT', 'PTEN', 'PTGX', 'PUMP', 'PZZA', 'QDEL', 'QNST', 'QRVO', 'RAMP', 'RC', 'RCUS', 'RDN', 'RDNT', 'RES', 'REX', 'REZI', 'RGR', 'RHI', 'RNST']


[*********************100%***********************]  30 of 30 completed


Batch 15 data saved to C:/A.PROJECTS/stockprediction/data/batch_15.txt
Downloading batch 16: ['ROCK', 'ROG', 'RUN', 'RUSHA', 'RWT', 'RXO', 'SAFE', 'SABR', 'SAFT', 'SAH', 'SANM', 'SATS', 'SBCF', 'SBH', 'SBSI', 'SCHL', 'SCL', 'SCSC', 'SCVL', 'SDGR', 'SEDG', 'SEE', 'SEM', 'SFBS', 'SFNC', 'SGH', 'SHAK', 'SHEN', 'SHO', 'SHOO']


[*********************100%***********************]  30 of 30 completed

1 Failed download:
['SGH']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Batch 16 data saved to C:/A.PROJECTS/stockprediction/data/batch_16.txt
Downloading batch 17: ['SIG', 'SITC', 'SITM', 'SJW', 'SKT', 'SKY', 'SKYW', 'SLG', 'SLP', 'SLVM', 'SM', 'SMP', 'SMPL', 'SMTC', 'SNCY', 'SNDR', 'SNEX', 'SONO', 'SPNT', 'SPSC', 'SPTN', 'SPXC', 'SSTK', 'STAA', 'STBA', 'STC', 'STEL', 'STEP', 'STRA', 'SUPN']


[*********************100%***********************]  30 of 30 completed


Batch 17 data saved to C:/A.PROJECTS/stockprediction/data/batch_17.txt
Downloading batch 18: ['SXC', 'SXI', 'SXT', 'SWI', 'TALO', 'TBBK', 'TDS', 'TDW', 'TFIN', 'TGI', 'TGNA', 'TGTX', 'THRM', 'THRY', 'THS', 'TILE', 'TMDX', 'TMP', 'TNC', 'TNDM', 'TPH', 'TR', 'TRIP', 'TRMK', 'TRN', 'TRNO', 'TRST', 'TRUP', 'TTGT', 'TTMI']


[*********************100%***********************]  30 of 30 completed


Batch 18 data saved to C:/A.PROJECTS/stockprediction/data/batch_18.txt
Downloading batch 19: ['TWI', 'TWO', 'UCBI', 'UCTT', 'UE', 'UFCS', 'UFPT', 'UHT', 'UNF', 'UNFI', 'UNIT', 'UPBD', 'URBN', 'USNA', 'USPH', 'UTL', 'UVV', 'VBTX', 'VCEL', 'VECO', 'VFC', 'VIAV', 'VICR', 'VIR', 'VIRT', 'VRE', 'VRRM', 'VRTS', 'VSAT', 'VSCO']


[*********************100%***********************]  30 of 30 completed

1 Failed download:
['UCBI']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')


Batch 19 data saved to C:/A.PROJECTS/stockprediction/data/batch_19.txt
Downloading batch 20: ['VSH', 'VSTS', 'VTOL', 'VTLE', 'VVI', 'VYX', 'WABC', 'WAFD', 'WD', 'WDFC', 'WERN', 'WGO', 'WHD', 'WKC', 'WLY', 'WNC', 'WOLF', 'WOR', 'WRLD', 'WS', 'WSFS', 'WSR', 'WT', 'WWW', 'XHR', 'XNCR', 'XPEL', 'XRX', 'YELP', 'YOU']


[*********************100%***********************]  30 of 30 completed


Batch 20 data saved to C:/A.PROJECTS/stockprediction/data/batch_20.txt
Downloading batch 21: ['ZD', 'ZWS']


[*********************100%***********************]  2 of 2 completed

Batch 21 data saved to C:/A.PROJECTS/stockprediction/data/batch_21.txt
All batches processed.





In [None]:
sp600_timeseries = pd.concat(sp600_list, axis=1)
sp600_timeseries = sp600_timeseries.dropna(axis=1, thresh=len(sp600_timeseries)-20)
# sp600_timeseries.to_csv(homepath+'data/sp600_timeseries.csv')

### NYSE All Tickers Timeseries