# TSLA Model Monitoring (Evidently)

Reference window: **2020-01-01 → yesterday**
Current window: **today only**

This notebook fetches features for TSLA and SPY, scales them with your saved `scaler.pkl`, runs predictions using `final_model.keras`, and builds Evidently reports for data & prediction drift.

In [115]:
!pip install  evidently yfinance pandas numpy tensorflow joblib ta


Defaulting to user installation because normal site-packages is not writeable


In [116]:
import sys
python = sys.executable
print("Notebook Python:", python)

! "{python}" -m pip install --upgrade evidently==0.7.16


Notebook Python: c:\Program Files\Python310\python.exe
Defaulting to user installation because normal site-packages is not writeable


In [117]:
import evidently
print(evidently.__version__)

0.7.16


In [118]:
import pandas as pd
import numpy as np
import yfinance as yf
import joblib
import tensorflow as tf
import ta
from datetime import datetime, timedelta

# NEW Evidently imports
from evidently import Report
from evidently.presets import DataDriftPreset, RegressionPreset

print("Libraries imported successfully")

Libraries imported successfully


In [119]:
# Load model & scaler
import os
import tensorflow as tf
import joblib

# Path to the best_model folder
MODEL_DIR = os.path.join("best_model")

# Load model & scaler from best_model/
model_path = os.path.join(MODEL_DIR, "final_model.keras")
scaler_path = os.path.join(MODEL_DIR, "scaler.pkl")

print("Loading resources...")

model = tf.keras.models.load_model(model_path)
scaler = joblib.load(scaler_path)

# Determine model input shape
input_shape = model.input_shape
NEEDS_SEQUENCE = len(input_shape) == 3
SEQ_LENGTH = input_shape[1] if NEEDS_SEQUENCE else 1

print(f"Model loaded. Sequence model: {NEEDS_SEQUENCE}, SEQ_LENGTH: {SEQ_LENGTH}")

Loading resources...
Model loaded. Sequence model: False, SEQ_LENGTH: 1


In [120]:
def engineer(df):
    # Ensure normal columns when yfinance returns MultiIndex
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]
    df = df.copy()
    df['Volatility'] = (df['High'] - df['Low']) / df['Close']
    df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
    df['SMA'] = ta.trend.SMAIndicator(df['Close'], window=21).sma_indicator()
    df['Return'] = (df['Close'] - df['Open']) / df['Open']
    return df[["Volatility", "RSI", "SMA", "Return", "Volume"]]

print('feature engineering function ready')

feature engineering function ready


In [121]:
def fetch_dataset_by_date(ticker, start_date, end_date):
    # Download target and SPY for the same period
    df_target = yf.download(ticker, start=start_date, end=end_date, progress=False)
    df_spy = yf.download('SPY', start=start_date, end=end_date, progress=False)

    if df_target.empty or df_spy.empty:
        raise ValueError(f'No data for given range: {start_date} to {end_date}')

    f_target = engineer(df_target)
    f_spy = engineer(df_spy)

    merged = pd.merge(f_target, f_spy, left_index=True, right_index=True, suffixes=("_TSLA","_SPY"))
    merged = merged.dropna()

    # Scale features
    scaled = scaler.transform(merged.values)

    # Build model inputs based on whether model uses sequences
    if NEEDS_SEQUENCE:
        if len(scaled) <= SEQ_LENGTH:
            raise ValueError('Not enough rows to form one sequence')
        X = np.array([scaled[i-SEQ_LENGTH:i] for i in range(SEQ_LENGTH, len(scaled))])
        y = merged.iloc[SEQ_LENGTH:].copy()
    else:
        X = scaled
        y = merged.copy()

    preds = model.predict(X).flatten()
    y['prediction'] = preds
    return y

print('fetch function ready')

fetch function ready


In [122]:
def fetch_dataset_by_date(ticker, start_date, end_date):

    # Convert dates
    start_dt = pd.to_datetime(start_date)
    preload_start = (start_dt - timedelta(days=60)).strftime("%Y-%m-%d")

    # Fetch extended history for SMA/RSI warmup
    df_target = yf.download(ticker, start=preload_start, end=end_date, progress=False)
    df_spy    = yf.download('SPY',    start=preload_start, end=end_date, progress=False)

    if df_target.empty or df_spy.empty:
        raise ValueError(f'No data for given range: {start_date} to {end_date}')

    f_target = engineer(df_target)
    f_spy = engineer(df_spy)

    # Align only AFTER engineering
    merged = pd.merge(f_target, f_spy, left_index=True, right_index=True, suffixes=("_TSLA","_SPY"))

    # Keep only rows within the actual range requested
    merged = merged.loc[start_date:end_date]

    # Drop NaN rows AFTER trimming to date range
    merged = merged.dropna()

    # If nothing survives, it's normal — user requested too short period
    if len(merged) == 0:
        raise ValueError("Merged dataset empty — need more days for indicators (SMA, RSI).")

    # Scale features
    scaled = scaler.transform(merged.values)

    # Build model inputs
    if NEEDS_SEQUENCE:
        if len(scaled) <= SEQ_LENGTH:
            raise ValueError('Not enough rows to form one sequence')
        X = np.array([scaled[i-SEQ_LENGTH:i] for i in range(SEQ_LENGTH, len(scaled))])
        y = merged.iloc[SEQ_LENGTH:].copy()
    else:
        X = scaled
        y = merged.copy()

       # Model predictions
    
    preds = model.predict(X).flatten()
    y['prediction'] = preds
    
    # ---------------------------------------------
    #  ADD TARGET COLUMN (rule)
    # ---------------------------------------------
    y['target'] = (y['prediction'] >= 0.5).astype(int)

    return y


In [131]:
from datetime import datetime, timedelta

# Reference range: from 2020-01-01 to yesterday
REFERENCE_START = "2025-01-01"
REFERENCE_END = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

print('Building reference dataset from', REFERENCE_START, 'to', REFERENCE_END)

reference = fetch_dataset_by_date('TSLA', REFERENCE_START, REFERENCE_END)
reference.to_csv('reference_dataset_tsla.csv')
reference.tail()

Building reference dataset from 2025-01-01 to 2025-11-21



YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True






X does not have valid feature names, but MinMaxScaler was fitted with feature names



Unnamed: 0_level_0,Volatility_TSLA,RSI_TSLA,SMA_TSLA,Return_TSLA,Volume_TSLA,Volatility_SPY,RSI_SPY,SMA_SPY,Return_SPY,Volume_SPY,prediction,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-11-14,0.072734,41.068227,442.574764,0.046725,105506700,0.018439,48.271921,676.885713,0.009844,96846700,0.796764,1
2025-11-17,0.061675,42.637983,441.127622,0.02553,102214300,0.017336,43.209748,676.946664,-0.006018,90456100,0.800741,1
2025-11-18,0.037857,40.67949,438.928574,-0.010188,80688600,0.014029,39.25138,676.41238,-0.003051,114467500,0.734298,1
2025-11-19,0.032872,41.709545,437.090002,-0.005392,72047700,0.012964,41.867554,676.0,0.0028,94703000,0.682935,1
2025-11-20,0.086532,39.356599,435.007145,-0.046789,113548800,0.036274,35.370223,675.272859,-0.030286,165293500,0.931952,1


In [132]:
def get_last_n_trading_days(ticker="TSLA", n=7):
    """Returns a list of the last n trading day dates for the given ticker."""
    today = datetime.now().date()
    found_days = []

    # Search backwards up to 30 days (covers long holidays)
    for i in range(30):
        day = today - timedelta(days=i)
        next_day = day + timedelta(days=1)

        df = yf.download(
            ticker,
            start=day.strftime("%Y-%m-%d"),
            end=next_day.strftime("%Y-%m-%d"),
            progress=False
        )

        if not df.empty:
            found_days.append(day)

        # Stop when we have enough valid days
        if len(found_days) == n:
            break

    if len(found_days) < n:
        raise ValueError("Not enough trading days found in last 30 days.")
    
    print('Returned days',found_days )
    # Return sorted oldest → newest
    return sorted(found_days)


In [133]:
from datetime import datetime, timedelta

# Get last n trading days for TSLA
trading_days = get_last_n_trading_days("TSLA", n=22)

CURRENT_START = trading_days[0].strftime("%Y-%m-%d")  # oldest of the n
CURRENT_END   = trading_days[-1].strftime("%Y-%m-%d") # most recent trading day

print('Building reference dataset from', CURRENT_START, 'to', CURRENT_END)

current = fetch_dataset_by_date('TSLA', CURRENT_START, CURRENT_END)
current.to_csv('current_dataset_tsla.csv')
current.tail()


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


1 Failed download:
['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-11-16 -> 2025-11-17)')

YF.download() has changed argument auto_adjust default to True


1 Failed download:
['TSLA']: YFPricesMissingError('possibly delisted; no price data found  (1d 2025-11-15 -> 2025-11-16)')

YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument auto_adjust default to True


YF.download() has changed argument a

Returned days [datetime.date(2025, 11, 22), datetime.date(2025, 11, 21), datetime.date(2025, 11, 20), datetime.date(2025, 11, 19), datetime.date(2025, 11, 18), datetime.date(2025, 11, 17), datetime.date(2025, 11, 14), datetime.date(2025, 11, 13), datetime.date(2025, 11, 12), datetime.date(2025, 11, 11), datetime.date(2025, 11, 10), datetime.date(2025, 11, 7), datetime.date(2025, 11, 6), datetime.date(2025, 11, 5), datetime.date(2025, 11, 4), datetime.date(2025, 11, 3), datetime.date(2025, 10, 31), datetime.date(2025, 10, 30), datetime.date(2025, 10, 29), datetime.date(2025, 10, 28), datetime.date(2025, 10, 27), datetime.date(2025, 10, 24)]
Building reference dataset from 2025-10-24 to 2025-11-22


Unnamed: 0_level_0,Volatility_TSLA,RSI_TSLA,SMA_TSLA,Return_TSLA,Volume_TSLA,Volatility_SPY,RSI_SPY,SMA_SPY,Return_SPY,Volume_SPY,prediction,target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-11-17,0.061675,42.507084,441.127622,0.02553,102214300,0.017336,43.055384,676.946664,-0.006018,90456100,0.801463,1
2025-11-18,0.037857,40.541245,438.928574,-0.010188,80688600,0.014029,39.076828,676.41238,-0.003051,114467500,0.735258,1
2025-11-19,0.032872,41.580641,437.090002,-0.005392,72047700,0.012964,41.722363,676.0,0.0028,94703000,0.683865,1
2025-11-20,0.086532,39.220094,435.007145,-0.046789,113548800,0.036274,35.202082,675.272859,-0.030286,165293500,0.932222,1
2025-11-21,0.048684,38.118699,432.250478,-0.027913,100180500,0.020788,41.534569,674.66667,0.006076,123872700,0.796136,1


In [179]:
import subprocess
def trigger_retraining():
    """
    Mechanism to trigger action. 
    In a real system, this might call an Airflow DAG or AWS Lambda.
    """
    print("\n[ACTION TRIGGERED]  MODEL RETRAINING REQUESTED")
    print("Reason: Significant Data Drift detected.")
    try:
        print("[INFO] Launching local retraining job using train_model.py...")
        subprocess.run(["python", "train_model.py"], check=True)
        print("[SUCCESS] Retraining completed. New model ready.")
    except Exception as e:
        print("[ERROR] Retraining failed:", str(e))

In [None]:
import smtplib
from email.mime.text import MIMEText
import yaml

def load_config(path="config.yaml"):
    """
    Load YAML configuration file.
    """
    try:
        with open(path, "r") as f:
            return yaml.safe_load(f)
    except Exception as e:
        print(f"[CONFIG ERROR] Failed to load {path}: {e}")
        return None


def send_email_alert(subject, body, config_path="config.yaml"):
    """
    Sends an email using SMTP settings loaded from config.yaml.
    """

    config = load_config(config_path)
    if config is None:
        return False

    # Read settings from YAML
    try:
        smtp_server = config["smtp"]["server"]
        smtp_port   = config["smtp"]["port"]
        smtp_user   = config["smtp"]["user"]
        smtp_pass   = config["smtp"]["pass"]

        email_from  = config["email"]["from"]
        email_to    = config["email"]["to"]

    except KeyError as e:
        print(f"[EMAIL ERROR] Missing required YAML key: {e}")
        return False

    # Create the email message
    msg = MIMEText(body)
    msg["Subject"] = subject
    msg["From"]    = email_from
    msg["To"]      = email_to

    # Send using SMTP server
    try:
        server = smtplib.SMTP(smtp_server, smtp_port)
        server.starttls()
        server.login(smtp_user, smtp_pass)
        server.sendmail(email_from, [email_to], msg.as_string())
        server.quit()

        print("[EMAIL SENT] Alert email successfully delivered.")
        return True

    except Exception as e:
        print(f"[EMAIL ERROR] Failed to send email: {e}")
        return False


In [180]:
def send_alert(drift_score, drifted_features):
    """
    Sends email alert when drift is detected.
    """

    subject = f"[ML Drift Alert] Drift Score {drift_score:.2f}"
    body = (
        " MACHINE LEARNING DRIFT DETECTED \n\n"
        f"Drift Score: {drift_score:.4f}\n"
        f"Drifting Features:\n - " + "\n - ".join(drifted_features) + "\n\n"
        "Action Required: Model retraining triggered.\n"
    )

    print("\n[ALERT] Data Drift Detected!")
    print(f"Sending Email...")

    send_email_alert(subject, body)


In [173]:
# Training Dataset Summary
from evidently import Report
from evidently.metrics import *
from evidently.presets import *

reference = pd.read_csv("reference_dataset_tsla.csv")
current = pd.read_csv("current_dataset_tsla.csv")

report = Report([
    DataSummaryPreset()
])

my_eval = report.run(reference, None)
#my_eval
print("Saved reports/referncedatasetsummary_report.html")
my_eval.save_html("reports/referncedatasetsummary_report.html")


Saved reports/referncedatasetsummary_report.html


In [176]:
# Current  Dataset Summary
from evidently import Report
from evidently.metrics import *
from evidently.presets import *

reference = pd.read_csv("reference_dataset_tsla.csv")
current = pd.read_csv("current_dataset_tsla.csv")

report = Report([
    DataSummaryPreset()
])

my_eval = report.run(reference_data=reference,
        current_data=current)
#my_eval
print("Saved reports/refernce_current_datasetsummary_report.html")
my_eval.save_html("reports/refernce_current_datasetsummary_report.html")


Saved reports/refernce_current_datasetsummary_report.html


In [144]:
# Build Evidently report only if we have current data
import os

os.makedirs("reports", exist_ok=True)

from evidently import Report
from evidently.presets import DataDriftPreset, RegressionPreset
import pandas as pd

reference = pd.read_csv("reference_dataset_tsla.csv")
current = pd.read_csv("current_dataset_tsla.csv")


if current.empty:
    print("No current data available.")
else:
    print("Running Evidently report...")

    # Data Drift Evaluation

    feature_cols = [
    'Volatility_TSLA','RSI_TSLA','SMA_TSLA','Return_TSLA','Volume_TSLA',
    'Volatility_SPY','RSI_SPY','SMA_SPY','Return_SPY','Volume_SPY'
    ]

    prediction_cols =['prediction','target']

    reference_features = reference[feature_cols]
    current_features = current[feature_cols]
    
    report = Report(metrics=[
        DataDriftPreset()
    ])

    data_eval=report.run(
        reference_data=reference_features,
        current_data=current_features
    )

    report

    data_eval.save_html("reports/tsla_datadrift_report.html")

    print("Saved reports/tsla_datadrift_report.html")

    # Prediction Drift Evaluation

    reference_prediction_features = reference[prediction_cols]
    current_prediction_features = current[prediction_cols]
    
    
    pred_eval=report.run(
        reference_data=reference_prediction_features,
        current_data=current_prediction_features
    )

    pred_eval.save_html("reports/tsla_targetdrift_report.html")

    print("Saved reports/tsla_targetdrift_report.html")

    #  Check for Anomalies programmatically
    # Extract metric result as a Python dict
    data_eval.save_json("reports/tsla_datadrift_report.json")
    results = eval.dict()
    
    # Navigate JSON structure to find drift share
    DRIFT_THRESHOLD = 0.3   # you choose

    # --- Extract global drift share from DriftedColumnsCount ---
    drift_count_metric = results["metrics"][0]   # ALWAYS DriftedColumnsCount

    drift_share = drift_count_metric["value"]["share"]
    num_drifted = drift_count_metric["value"]["count"]

    print(f"Drift Share: {drift_share}, Drifted Features: {num_drifted}")

    # --- Extract per-feature drift based on ValueDrift metrics ---
    drifted_columns = []

    for metric in results["metrics"]:
        if metric["config"]["type"] == "evidently:metric_v2:ValueDrift":
            col = metric["config"]["column"]
            score = metric["value"]
            threshold = metric["config"]["threshold"]

            if score > threshold:
                drifted_columns.append(col)

    # --- Decision Logic ---
    if drift_share > DRIFT_THRESHOLD:
        print("[ALERT] Significant drift detected!")
        print("Drifted columns:", drifted_columns)
        send_alert(drift_share, drifted_columns)
        trigger_retraining()
    else:
        print("[STATUS] Healthy — no significant drift detected.")

Running Evidently report...
Saved reports/tsla_datadrift_report.html
Saved reports/tsla_targetdrift_report.html
Drift Share: 1.0, Drifted Features: 12.0
[ALERT] Significant drift detected!
Drifted columns: ['Volatility_TSLA', 'RSI_TSLA', 'SMA_TSLA', 'Return_TSLA', 'Volume_TSLA', 'Volatility_SPY', 'RSI_SPY', 'SMA_SPY', 'Return_SPY', 'Volume_SPY', 'prediction', 'Date']

[ALERT]  Data Drift Detected!
Send an Email Drift Score: 1.00
Drifting Features: ['Volatility_TSLA', 'RSI_TSLA', 'SMA_TSLA', 'Return_TSLA', 'Volume_TSLA', 'Volatility_SPY', 'RSI_SPY', 'SMA_SPY', 'Return_SPY', 'Volume_SPY', 'prediction', 'Date']

[ACTION TRIGGERED]  MODEL RETRAINING REQUESTED
Reason: Significant Data Drift detected.


## Notes
- If markets are closed (weekends/holidays) today's download may be empty. The notebook will detect and skip the Evidently run.
- The notebook expects `final_model.keras` and `scaler.pkl` to be present in the same working directory.
- Generated files:
  - `reference_dataset_tsla.csv`
  - `current_dataset_tsla.csv` (may be empty on closed-market days)
  - `tsla_monitoring_report.html`
  - `tsla_monitoring_dashboard.html`
