In [0]:
%pip install databricks-feature_engineering

In [0]:
%pip install mlflow==3.6.0

In [0]:
%pip install lightgbm

In [0]:
%restart_python

In [0]:
from datetime import datetime,date
from pyspark.sql import functions as F
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score,f1_score
import mlflow
from mlflow.tracking import MlflowClient
import pandas as pd
from pyspark.sql.functions import col
from databricks.feature_engineering import FeatureEngineeringClient
from pyspark.sql.types import (
    StructType,
    StructField,
    StringType,
    DateType,
    DoubleType,
    IntegerType,
    BooleanType,
    TimestampType
)


# --- Load weekly sample dataset ---
# Different Dataset
df=pd.read_csv('/Volumes/ispl_databricks/default/training/MW_Train/merged_data_corrected.csv')
df.rename(columns={'required_loan_id':'loan_id'}, inplace=True)
spark_df = spark.table('ispl_databricks.model_logs.mw_final_inference_data')
df_fs = spark_df.toPandas()
df_label = pd.merge(df[['loan_id','target_30_dpd']],df_fs, on='loan_id',how = 'inner')
df_target = df_label.drop(columns = ['loan_id','target_30_dpd'])




# --- Load model from MLflow Registry ---
client = MlflowClient()
model_name = "ispl_databricks.model_logs.final_mw_model"
model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}/4"
)




# --- Predict ---
prediction = model.predict(df_target)
print(prediction)
df_label['prediction'] = prediction[:,0]

print(prediction)
def get_target(x):
    if x > 0.5:
        return 1
    else:
        return 0
df_label['final_score'] = df_label['prediction'].apply(lambda x: get_target(x))


print(df_label.head())







# --- Compute Metrics ---
accuracy = accuracy_score(df_label['target_30_dpd'], df_label['final_score'])
precision = precision_score(df_label['target_30_dpd'], df_label['final_score'])
f1_score = f1_score(df_label['target_30_dpd'], df_label['final_score'])
recall = recall_score(df_label['target_30_dpd'], df_label['final_score'])
auc = roc_auc_score(df_label['target_30_dpd'], df_label['final_score'])



if auc < 0.5:
    drift_flag = True
else:
    drift_flag = False
print(drift_flag)
# # --- Save Metrics to Delta Table ---
schema = StructType([
    StructField("week_start_date", DateType(), True),
    StructField("model_name", StringType(), True),
    StructField("model_version", StringType(), True),
    StructField("dataset_id", StringType(), True),
    StructField("accuracy", DoubleType(), True),
    StructField("precision", DoubleType(), True),
    StructField("recall", DoubleType(), True),
    StructField("f1_score", DoubleType(), True),
    StructField("auc", DoubleType(), True),
    StructField("sample_size", IntegerType(), True),
    StructField("drift_flag", BooleanType(), True),
    StructField("comment", StringType(), True),
    StructField("created_at", TimestampType(), True)
])

created_date = date.today()
created_datetime = datetime.now()
metrics_df = spark.createDataFrame([
   (created_date,'ff_mw', '4', "weekly_dataset_v1", accuracy, precision, recall,f1_score, auc, len(df_label), drift_flag,None,created_datetime)
 ], schema)

metrics_df.write.format("delta").mode("append").saveAsTable("ispl_databricks.model_logs.model_performance_logs")


In [0]:
%sql
select * from ispl_databricks.model_logs.model_performance_logs

In [0]:
if auc < 0.5:
    drift_flag = True
else:
    drift_flag = False

In [0]:
dbutils.jobs.taskValues.set("drift_flag", drift_flag)