In [0]:
!pip install lightgbm

In [0]:
%pip install databricks-feature_engineering

In [0]:
 %restart_python

In [0]:
import pandas as pd
import lightgbm as lgb
import pickle
import joblib
import mlflow
import mlflow.lightgbm
from databricks.feature_engineering import FeatureEngineeringClient
from databricks.feature_engineering import FeatureLookup
from sklearn.metrics import accuracy_score
import numpy as np
import requests
from sklearn.metrics import (
    roc_auc_score,
    precision_score,
    recall_score
)
from databricks.sdk.service.jobs import JobSettings
from databricks.sdk import WorkspaceClient

# Define job settings with approval gate


In [0]:
dbutils.widgets.text("training_csv", "")

In [0]:
input_path = dbutils.widgets.get("training_csv")

In [0]:
print(input_path)

In [0]:

df=pd.read_csv(input_path)

In [0]:
import mlflow

In [0]:
df.head()

In [0]:
df_label = spark.createDataFrame(df[['loan_id','target']])

In [0]:
fe = FeatureEngineeringClient()

In [0]:
spark_df_production = spark.table('ispl_databricks.model_logs.bd_final_inference_data')

In [0]:
df_production = spark_df_production.join(df_label, on='loan_id', how='inner')
df_production = df_production.toPandas()

In [0]:
test_production_data = df_production.drop(['loan_id','target'],axis=1)
test_production_target = df_production[['loan_id','target']]

In [0]:
fe = FeatureEngineeringClient()

In [0]:
training_set = fe.create_training_set(
    df=df_label,
    feature_lookups=[
        FeatureLookup(
            table_name="ispl_databricks.model_logs.bd_final_feature_stores",
            lookup_key="loan_id"
        )
    ],
    label="target",
   
)


In [0]:
train_df = training_set.load_df()

In [0]:
train_df = train_df.toPandas()

In [0]:
test_target = train_df[['loan_id','target']]

In [0]:
test_data = train_df.drop(['loan_id','target'], axis=1)

In [0]:
train_df.shape

In [0]:
from mlflow.tracking import MlflowClient

In [0]:

client = MlflowClient()

In [0]:
model_name = 'ispl_databricks.model_logs.final_bd_model'

In [0]:

model_versions = client.search_model_versions(
    filter_string=f"name = '{model_name}'",
    
    
)

In [0]:
versions = []
for mv in model_versions:
    versions.append(int(mv.version))

In [0]:
versions

In [0]:
versions.sort(reverse=True)

In [0]:
production_model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}@champion"
)

In [0]:
for mv in model_versions:
    print(
        f"Model: {mv.name}",
        f"Version: {mv.version}",
        f"Stage: {mv.current_stage}",
        f"Created at: {mv.creation_timestamp}",
        sep=" | "
    )

In [0]:
latest_version = str(versions[0])

In [0]:
latest_version

In [0]:

model_uri_latest = f'models:/ispl_databricks.model_logs.final_bd_model/{latest_version}'
model_latest = mlflow.pyfunc.load_model(model_uri_latest)

In [0]:
# predictions = []
# for i in range(len(test_data)):
#     test_i = test_data.iloc[[i]]
#     pred = model_latest.predict(test_i)
#     pred = pred.tolist()
#     test_i_dict = test_i.iloc[0].to_dict()
#     test_i_dict['prediction'] = pred
#     test_i_dict['model_name'] = model_uri_latest.split('/')[1]
#     test_i_dict['model_version'] = latest_version
#     predictions.append(test_i_dict)

In [0]:

# for i in range(100):
#     test_i = test_data.iloc[[i]]
#     pred = model_x.predict(test_i)
#     pred = pred.tolist()
#     test_i_dict = test_i.iloc[0].to_dict()
#     test_i_dict['prediction'] = pred
#     test_i_dict['model_name'] = model_uri_x.split('/')[1]
#     test_i_dict['model_version'] = version_x
#     predictions.append(test_i_dict)

In [0]:
# final_table = spark.createDataFrame(predictions)
# display(final_table)

In [0]:
# final_table.write.format("delta").mode("overwrite").saveAsTable("ispl_databricks.model_logs.bd_evaluation")


In [0]:
# %sql
# select * from ispl_databricks.model_logs.bd_evaluation

In [0]:
model_info = mlflow.models.get_model_info(model_uri_latest)

In [0]:

feature_names = model_info.signature.inputs.input_names()

In [0]:
len(feature_names)

In [0]:
latest_prediction = model_latest.predict(test_data[feature_names])
production_prediction = production_model.predict(test_production_data)

In [0]:
latest_prediction 

In [0]:
latest_result = []
for i in latest_prediction:
    if i[0]>0.5:
        latest_result.append(1)
    else:
        latest_result.append(0)
production_result = []
for i in production_prediction:
    if i[0]>0.5:
        production_result.append(1)
    else:
        production_result.append(0)

In [0]:
latest_model_accuracy = accuracy_score(test_target['target'].values,np.array(latest_result))
model_production_accuracy = accuracy_score(test_production_target['target'].values,np.array(production_result))

latest_model_auc = roc_auc_score(test_target['target'].values,np.array(latest_result))
model_production_auc = roc_auc_score(test_production_target['target'].values,np.array(production_result))
latest_model_precision = precision_score(test_target['target'].values,np.array(latest_result))
model_x_precision = precision_score(test_production_target['target'].values,np.array(production_result))
latest_model_recall = recall_score(test_target['target'].values,np.array(latest_result))
model_x_recall = recall_score(test_production_target['target'].values,np.array(production_result))

In [0]:
print("latest accuracy",latest_model_accuracy)
print("model_production_accuracy",model_production_accuracy)


In [0]:
import json

In [0]:
json_dict = {
    "model_name": model_name,
    'new_accuracy': latest_model_accuracy,
    'new_auc': latest_model_auc,
    'latest_version':latest_version
    }

In [0]:
with open("/Workspace/Shared/ff_bd/model_approval.json", "w") as f:
    json.dump(json_dict, f)


In [0]:
if latest_model_accuracy > model_production_accuracy:
    notification = 'True'
else:
    notification = 'False'

In [0]:
dbutils.jobs.taskValues.set("trigger", notification)