In [None]:
from evidently.report import Report
from evidently import ColumnMapping
from evidently.metrics import ColumnDriftMetric, DatasetDriftMetric, DatasetMissingValuesMetric

import pandas as pd

In [None]:
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Get database connection details from environment variables
db_user = os.getenv("POSTGRES_USER")
db_password = os.getenv("POSTGRES_PASSWORD")
db_host = os.getenv("POSTGRES_HOST")
db_port = os.getenv("POSTGRES_PORT")
db_name = os.getenv("POSTGRES_DBNAME")


# Create the connection string
connection_string = f"postgresql://{db_user}:{db_password}@localhost:4321/{db_name}"

# Create the SQLAlchemy engine
engine = create_engine(connection_string)


In [None]:

# Query the customer_features table
reference_data = pd.read_sql("select * from customer_features where extract(month from date) = 1", engine)
current_data = pd.read_sql("select * from customer_features where extract(month from date) = 1", engine)


In [None]:
reference_data.drop(columns=['date'], inplace=True)
current_data.drop(columns=['date'], inplace=True)

In [None]:
# Create and run the data drift report
# report = Report([DataDriftPreset(drift_share=0.7)])
report = Report(metrics=[DatasetDriftMetric()])
report.run(reference_data=reference_data, current_data=current_data)

# Show the report in the notebook
report.show()

In [None]:

# Get dataset drift detection result as boolean
drift_result = report.as_dict()["metrics"][0]["result"]["dataset_drift"]
print(drift_result)

In [None]:
reference_data.head(2)

In [None]:
current_data.head(2)