In [4]:
import yfinance as yf
import pandas as pd
import pickle
import logging
from statsmodels.tsa.arima.model import ARIMA
from datetime import date

# Logging setup
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger()

# ARIMA model parameters
p, d, q = 30, 0, 10

# Stock tickers list
tickers = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "NVDA", "META", "TSLA", "AVGO", "PEP", "COST",
    "CSCO", "ADBE", "TXN", "INTC", "AMD", "SBUX", "CHTR", "INTU", "BKNG", "MRNA"
]
# Date range
start_date = "2015-01-01"
end_date = date.today().strftime("%Y-%m-%d")

for ticker in tickers:
    try:
        logger.info(f"Fetching data for {ticker} from {start_date} to {end_date}")
        data = yf.download(ticker, start=start_date, end=end_date)
        data.index = pd.DatetimeIndex(data.index).to_period("D")

        if data.empty:
            logger.error(f"No data found for {ticker}. Skipping...")
            continue

        data["Close"] = pd.to_numeric(data["Close"], errors="coerce")
        data.dropna(subset=["Close"], inplace=True)

        logger.info(f"Data cleaned for {ticker}. Records: {len(data)}")

        # Fit ARIMA model
        logger.info(f"Fitting ARIMA({p}, {d}, {q}) model for {ticker}")
        arima_model = ARIMA(data["Close"], order=(p, d, q))
        arima_result = arima_model.fit()

        # Save model to pickle
        model_filename = f"arima_models/{ticker}.pickle"
        with open(model_filename, "wb") as f:
            pickle.dump(arima_result, f)

        logger.info(f"Model saved: {model_filename}")

    except Exception as e:
        logger.error(f"Error processing {ticker}: {e}")

logger.info("All models processed successfully.")

2025-02-10 16:06:10,671 - INFO - Fetching data for AAPL from 2015-01-01 to 2025-02-10
[*********************100%%**********************]  1 of 1 completed
2025-02-10 16:06:10,772 - INFO - Data cleaned for AAPL. Records: 2541
2025-02-10 16:06:10,773 - INFO - Fitting ARIMA(30, 0, 10) model for AAPL
  warn('Non-stationary starting autoregressive parameters'
2025-02-10 16:09:28,073 - INFO - Model saved: arima_models/AAPL.pickle
2025-02-10 16:09:28,074 - INFO - Fetching data for MSFT from 2015-01-01 to 2025-02-10
[*********************100%%**********************]  1 of 1 completed
2025-02-10 16:09:28,125 - INFO - Data cleaned for MSFT. Records: 2541
2025-02-10 16:09:28,126 - INFO - Fitting ARIMA(30, 0, 10) model for MSFT
  warn('Non-stationary starting autoregressive parameters'
2025-02-10 16:12:41,880 - INFO - Model saved: arima_models/MSFT.pickle
2025-02-10 16:12:41,880 - INFO - Fetching data for GOOGL from 2015-01-01 to 2025-02-10
[*********************100%%**********************]  1 of 