In [None]:
# Cell 1 : Standard Library Imports

import os
import time
import logging
import joblib
import warnings
from datetime import datetime, timedelta

# Third-Party Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import yfinance as yf
import keras_tuner as kt
import matplotlib as mpl
import ta
import tensorflow as tf
import pandas_market_calendars as mcal
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, GRU, Bidirectional, Input, Dropout, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
from sklearn.metrics import (mean_squared_error, mean_absolute_error, 
                             mean_absolute_percentage_error, r2_score)
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import skew, kurtosis, shapiro
from statsmodels.stats.outliers_influence import variance_inflation_factor
from tensorflow.keras.layers import Input, Dense, LayerNormalization, Dropout, MultiHeadAttention, Embedding
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers.schedules import LearningRateSchedule
from xgboost import XGBRegressor

In [None]:
# Cell 2: Fetch the Stock Data (Time-series Only)

# Setup logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Initialize dictionaries to store dataframes
hourly_data_dict = {}

# List of stocks to fetch data for
stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN']

# Define the time frames for data
end_date = datetime.now()
start_date_hourly = end_date - timedelta(days=729)    # 2 years of hourly data

# Create directories for the data
os.makedirs('../data/stock_data', exist_ok=True)

# Function to fetch stock data
def fetch_stock_data(ticker, start, end, interval):
    try:
        data = yf.download(ticker, start=start, end=end, interval=interval)
        if data.empty:
            logging.warning(f"No data retrieved for {ticker} from {start} to {end} with interval {interval}")
        return data.drop(columns=['Adj Close'], errors='ignore')
    except Exception as e:
        logging.error(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()

# Fetch and save time-series hourly data
for stock in tqdm(stocks, desc="Fetching stocks data"):
    
    # Hourly Data (2 years)
    hourly_data = fetch_stock_data(stock, start_date_hourly, end_date, '1h')
    if not hourly_data.empty:
        hourly_data_dict[stock] = hourly_data
        hourly_data.to_csv(f'../data/stock_data/{stock}_hourly.csv', index=True)

    # Add a delay to avoid API rate limits
    time.sleep(2)

print("Time-series data fetching and saving complete.")