In [18]:
import pandas as pd
from yahoo_fin import stock_info as si
from datetime import datetime, timedelta
import joblib
import plotly.graph_objects as go
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def calculate_technical_indicators(data):
    data['SMA_10'] = data['Close'].rolling(window=10).mean()
    
    delta = data['Close'].diff(1)
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)
    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()
    rs = avg_gain / avg_loss
    data['RSI_14'] = 100 - (100 / (1 + rs))
    
    data['20_SMA'] = data['Close'].rolling(window=20).mean()
    data['20_std'] = data['Close'].rolling(window=20).std()
    data['Upper_Band'] = data['20_SMA'] + (data['20_std'] * 2)
    data['Lower_Band'] = data['20_SMA'] - (data['20_std'] * 2)
    
    data['Close_1'] = data['Close'].shift(1)
    data['Close_2'] = data['Close'].shift(2)
    
    data.dropna(inplace=True)
    return data


In [20]:
from sklearn.preprocessing import StandardScaler

def fetch_historical_data(ticker, start_date):
    try:
        end_date = datetime(2024, 2, 9)
        data = si.get_data(ticker, start_date=start_date, end_date=end_date)
        data = data.reset_index()

        if 'close' not in data.columns:
            raise ValueError("'Close' column not found in the data")

        data = data[['index', 'close', 'open', 'high', 'low', 'volume']]
        data.rename(columns={'index': 'Date', 'close': 'Close', 'open': 'Open', 'high': 'High', 'low': 'Low', 'volume': 'Volume'}, inplace=True)
        
        data = calculate_technical_indicators(data)
        return data

    except Exception as e:
        print(f"Error fetching data for {ticker}: {e}")
        return pd.DataFrame()

def normalize_features(df, feature_columns):
    scaler = StandardScaler()
    df[feature_columns] = scaler.fit_transform(df[feature_columns])
    return df, scaler

In [21]:
def fetch_actual_close_prices(ticker, start_date, end_date):
    try:
        data = si.get_data(ticker, start_date=start_date, end_date=end_date)
        data = data.reset_index()
        data = data[['index', 'close']]
        data.rename(columns={'index': 'Date', 'close': 'Actual Close'}, inplace=True)
        return data
    except Exception as e:
        print(f"Error fetching actual close prices for {ticker}: {e}")
        return pd.DataFrame()

In [45]:
import numpy as np


def calculate_volatility(data):
    data['Log_Returns'] = np.log(data['y'] / data['y'].shift(1))
    volatility = data['Log_Returns'].std() * np.sqrt(252)  # Annualized volatility
    return volatility

def determine_rolling_window(volatility):
    if volatility > 0.04:
        return 10
    elif volatility > 0.02:
        return 20
    else:
        return 30

def determine_prediction_period(volatility):
    if volatility > 0.04:
        return 45
    elif volatility > 0.02:
        return 60
    else:
        return 90

In [46]:
from plotly.subplots import make_subplots

def display_forecasts_from_data(df, ticker):
    if df.empty:
        print(f"Error: Empty DataFrame for {ticker}")
        return

    df.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)
    df.dropna(inplace=True)

    feature_columns = ['SMA_10', 'RSI_14', 'Upper_Band', 'Lower_Band', 'Close_1', 'Close_2']
    df, scaler = normalize_features(df, feature_columns)

    model_filename = f"Models/{ticker}_prophet_model.pkl"
    try:
        model = joblib.load(model_filename)
    except Exception as e:
        print(f"Error loading model for {ticker}: {e}")
        return

    volatility = calculate_volatility(df)
    prediction_period = determine_prediction_period(volatility)

    last_date = df['ds'].iloc[-1]
    future_dates = pd.date_range(start=last_date + timedelta(days=1), periods=prediction_period, freq='D')
    future = pd.DataFrame({'ds': future_dates})
    
    future_features = df[feature_columns].iloc[-1:].values
    future_features = scaler.inverse_transform(future_features)
    for i, feature in enumerate(feature_columns):
        future[feature] = future_features[0, i]

    forecast = model.predict(future)
    
    forecasted_df = future[['ds']].copy()
    forecasted_df['Forecasted Close'] = df['y'].iloc[-1] + (forecast['yhat'] - forecast['yhat'].iloc[0])
    forecasted_df['Forecasted High'] = df['y'].iloc[-1] + (forecast['yhat_upper'] - forecast['yhat_upper'].iloc[0])
    forecasted_df['Forecasted Low'] = df['y'].iloc[-1] + (forecast['yhat_lower'] - forecast['yhat_lower'].iloc[0])
    
    # Only fetch actual close prices for dates that are in the past or up to the current date
    end_date = min(future_dates.max(), datetime.now())
    actual_close_prices = fetch_actual_close_prices(ticker, future_dates.min(), end_date)
    
    comparison_df = pd.merge(forecasted_df, actual_close_prices, how='left', left_on='ds', right_on='Date')
    comparison_df.drop(columns=['Date'], inplace=True)

    # Fill missing actual close prices using interpolation
    comparison_df['Actual Close'] = comparison_df['Actual Close'].interpolate()

    # Calculate error metrics
    comparison_df['Absolute Error'] = abs(comparison_df['Forecasted Close'] - comparison_df['Actual Close'])
    comparison_df['Squared Error'] = (comparison_df['Forecasted Close'] - comparison_df['Actual Close'])**2
    comparison_df['Percentage Error'] = abs((comparison_df['Forecasted Close'] - comparison_df['Actual Close']) / comparison_df['Actual Close']) * 100

    # Calculate rolling average of the error metrics to identify deterioration point
    rolling_window = determine_rolling_window(volatility)

    comparison_df['MAE'] = comparison_df['Absolute Error'].rolling(window=rolling_window).mean()
    comparison_df['RMSE'] = (comparison_df['Squared Error'].rolling(window=rolling_window).mean())**0.5
    comparison_df['MAPE'] = comparison_df['Percentage Error'].rolling(window=rolling_window).mean()

    # display(comparison_df)
    
    # Identify the point where the MAE starts to increase significantly
    deterioration_point = comparison_df['MAE'].idxmax()

    print(f"Stock: {ticker}")
    print(f"Model performance starts to deteriorate after day: {deterioration_point}")
    print(f"MAE: {comparison_df['MAE'].iloc[deterioration_point]:.4f}")
    print(f"RMSE: {comparison_df['RMSE'].iloc[deterioration_point]:.4f}")
    print(f"MAPE: {comparison_df['MAPE'].iloc[deterioration_point]:.4f}")

    print("\nRecommendation:")
    print(f"The model should be retrained after approximately {deterioration_point} days due to the increase in error metrics.")

    # Create subplots for each error metric
    fig = make_subplots(rows=3, cols=1, shared_xaxes=True, 
                        subplot_titles=("MAE", "RMSE", "MAPE"))

    fig.add_trace(go.Scatter(x=comparison_df['ds'], y=comparison_df['MAE'], mode='lines+markers', name='MAE'), row=1, col=1)
    fig.add_trace(go.Scatter(x=comparison_df['ds'], y=comparison_df['RMSE'], mode='lines+markers', name='RMSE'), row=2, col=1)
    fig.add_trace(go.Scatter(x=comparison_df['ds'], y=comparison_df['MAPE'], mode='lines+markers', name='MAPE'), row=3, col=1)

    fig.update_layout(height=1000, width=1000, title_text="Error Metrics Over Time", xaxis_title="Date", yaxis_title="Value")
    fig.show()


In [47]:
# Example usage
tickers = ["AAPL", "ABBV", "ADBE", "AMZN", "AVGO", "BRK-B", "CRM", "COST", "CVX", "HD", 
          "JNJ", "JPM", "LLY", "MA", "META", "MRK", "MSFT", "NVDA", "PG", "TSLA", "UNH", "V", "XOM"]  # Array of tickers
start_date = datetime(2024, 2, 9) - timedelta(days=730)  # Approximately 2 years ago from 9 February 2024

for ticker in tickers:
    historical_data = fetch_historical_data(ticker, start_date)
    if not historical_data.empty:
        display_forecasts_from_data(historical_data, ticker)

Stock: AAPL
Model performance starts to deteriorate after day: 34
MAE: 17.3527
RMSE: 17.4121
MAPE: 10.1402

Recommendation:
The model should be retrained after approximately 34 days due to the increase in error metrics.


Stock: ABBV
Model performance starts to deteriorate after day: 34
MAE: 5.2482
RMSE: 5.3054
MAPE: 2.9142

Recommendation:
The model should be retrained after approximately 34 days due to the increase in error metrics.


Stock: ADBE
Model performance starts to deteriorate after day: 44
MAE: 121.8307
RMSE: 123.5820
MAPE: 24.1277

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: AMZN
Model performance starts to deteriorate after day: 42
MAE: 7.1261
RMSE: 7.2692
MAPE: 4.0333

Recommendation:
The model should be retrained after approximately 42 days due to the increase in error metrics.


Stock: AVGO
Model performance starts to deteriorate after day: 30
MAE: 14.5352
RMSE: 16.3000
MAPE: 10.6210

Recommendation:
The model should be retrained after approximately 30 days due to the increase in error metrics.


Stock: BRK-B
Model performance starts to deteriorate after day: 20
MAE: 13.6840
RMSE: 14.0196
MAPE: 3.3212

Recommendation:
The model should be retrained after approximately 20 days due to the increase in error metrics.


Stock: CRM
Model performance starts to deteriorate after day: 28
MAE: 13.4300
RMSE: 14.6882
MAPE: 4.3313

Recommendation:
The model should be retrained after approximately 28 days due to the increase in error metrics.


Stock: COST
Model performance starts to deteriorate after day: 27
MAE: 30.8339
RMSE: 33.5184
MAPE: 4.0454

Recommendation:
The model should be retrained after approximately 27 days due to the increase in error metrics.


Stock: CVX
Model performance starts to deteriorate after day: 31
MAE: 3.5784
RMSE: 3.7111
MAPE: 2.3879

Recommendation:
The model should be retrained after approximately 31 days due to the increase in error metrics.


Stock: HD
Model performance starts to deteriorate after day: 44
MAE: 15.3853
RMSE: 18.2459
MAPE: 3.9768

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: JNJ
Model performance starts to deteriorate after day: 23
MAE: 5.0127
RMSE: 5.0279
MAPE: 3.1066

Recommendation:
The model should be retrained after approximately 23 days due to the increase in error metrics.


Stock: JPM
Model performance starts to deteriorate after day: 44
MAE: 19.1233
RMSE: 19.3528
MAPE: 9.8128

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: LLY
Model performance starts to deteriorate after day: 30
MAE: 63.5072
RMSE: 69.4261
MAPE: 8.1916

Recommendation:
The model should be retrained after approximately 30 days due to the increase in error metrics.


Stock: MA
Model performance starts to deteriorate after day: 44
MAE: 30.1307
RMSE: 31.0328
MAPE: 6.2560

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: META
Model performance starts to deteriorate after day: 44
MAE: 29.1942
RMSE: 30.5728
MAPE: 5.8071

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: MRK
Model performance starts to deteriorate after day: 17
MAE: 7.0784
RMSE: 9.4493
MAPE: 5.5097

Recommendation:
The model should be retrained after approximately 17 days due to the increase in error metrics.


Stock: MSFT
Model performance starts to deteriorate after day: 16
MAE: 9.8006
RMSE: 10.5035
MAPE: 2.4212

Recommendation:
The model should be retrained after approximately 16 days due to the increase in error metrics.


Stock: NVDA
Model performance starts to deteriorate after day: 44
MAE: 22.1284
RMSE: 22.5820
MAPE: 24.3301

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: PG
Model performance starts to deteriorate after day: 41
MAE: 3.0250
RMSE: 3.1263
MAPE: 1.8703

Recommendation:
The model should be retrained after approximately 41 days due to the increase in error metrics.


Stock: TSLA
Model performance starts to deteriorate after day: 44
MAE: 31.0394
RMSE: 34.9869
MAPE: 18.2650

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: UNH
Model performance starts to deteriorate after day: 33
MAE: 36.0856
RMSE: 36.8990
MAPE: 7.5119

Recommendation:
The model should be retrained after approximately 33 days due to the increase in error metrics.


Stock: V
Model performance starts to deteriorate after day: 44
MAE: 13.6944
RMSE: 14.3306
MAPE: 4.8000

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.


Stock: XOM
Model performance starts to deteriorate after day: 44
MAE: 12.9701
RMSE: 13.8410
MAPE: 11.5044

Recommendation:
The model should be retrained after approximately 44 days due to the increase in error metrics.
