In [1]:
import pandas as pd
import numpy as np
import os

from evidently import ColumnMapping

from evidently.report import Report
from evidently.metrics.base_metric import generate_column_metrics
from evidently.metric_preset import DataDriftPreset, TargetDriftPreset, DataQualityPreset, ClassificationPreset
from evidently.metrics import *

from evidently.test_suite import TestSuite
from evidently.tests.base_test import generate_column_tests
from evidently.test_preset import DataStabilityTestPreset, NoTargetPerformanceTestPreset, MulticlassClassificationTestPreset
from evidently.tests import *

import tensorflow as tf
import tensorflow_decision_forests as tfdf

2024-01-30 15:50:26.901525: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-30 15:50:26.937988: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-30 15:50:27.119518: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-30 15:50:27.119723: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-30 15:50:27.150272: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
%pwd

'/home/tejas/MLProj/Thyroid-Disease-Prediction/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/home/tejas/MLProj/Thyroid-Disease-Prediction'

In [5]:
train_df = pd.read_csv("artifacts/data_transformation/train.csv")
test_df = pd.read_csv("artifacts/data_transformation/test.csv")

In [6]:
target = "target"
X_train = train_df.drop(target,axis=1)
y_train = train_df[target]
X_test = test_df.drop(target,axis=1)
y_test = test_df[target]

In [7]:
model = tf.keras.models.load_model("artifacts/model_trainer/model")
X_train_data = tfdf.keras.pd_dataframe_to_tf_dataset(X_train)
X_test_data = tfdf.keras.pd_dataframe_to_tf_dataset(X_test)

2024-01-30 15:50:33.311652: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-30 15:50:33.314499: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
[INFO 24-01-30 15:50:34.8932 IST kernel.cc:1233] Loading model from path artifacts/model_trainer/model/assets/ with prefix cf121ea7a8bd4631
[INFO 24-01-30 15:50:34.9045 IST decision_forest.cc:660] Model loaded with 111 root(s), 4117 node(s), and 14 input feature(s).
[INF









In [8]:
preds = model.predict(X_test_data)
y_test_preds = [np.argmax(i) for i in preds]



In [9]:
preds = model.predict(X_train_data)
y_train_preds = [np.argmax(i) for i in preds]

1/6 [====>.........................] - ETA: 0s



In [10]:
train_df["prediction"] = y_train_preds
test_df["prediction"] = y_test_preds

In [11]:
cat_col = train_df.select_dtypes(include="object").columns
num_col = train_df.select_dtypes(exclude="object").columns

In [12]:
num_col = num_col.drop(['target', 'prediction'])

In [13]:
num_col

Index(['age', 'TSH', 'T3', 'TT4', 'T4U', 'FTI'], dtype='object')

In [14]:
column_mapping = ColumnMapping()
column_mapping.target = target
column_mapping.prediction = "predictions"
column_mapping.numerical_features = cat_col
column_mapping.categorical_features = num_col

In [15]:
report = Report(metrics=[ColumnSummaryMetric(column_name='T3'),
                         ColumnSummaryMetric(column_name='TSH'),
                         ColumnSummaryMetric(column_name='T4U'),
                         ColumnSummaryMetric(column_name='TT4'),
                         generate_column_metrics(ColumnQuantileMetric, parameters={
                                                 'quantile': 0.25}, columns='num'),
                         DataDriftPreset(),
                         TargetDriftPreset(),
                         DataQualityPreset(),
                         ClassificationPreset()
])

report.run(reference_data=train_df, current_data=test_df)
# report.save_html('drift.html')

In [16]:
tests = TestSuite(tests=[
    TestNumberOfColumnsWithMissingValues(),
    TestNumberOfRowsWithMissingValues(),
    TestNumberOfConstantColumns(),
    TestNumberOfDuplicatedRows(),
    TestNumberOfDuplicatedColumns(),
    TestColumnsType(),
    TestNumberOfDriftedColumns(),
    DataStabilityTestPreset(),
    NoTargetPerformanceTestPreset(),
    MulticlassClassificationTestPreset()
])

tests.run(reference_data=train_df, current_data=train_df)
# tests.save_html('test.html')

In [57]:
pd.DataFrame(report.as_dict()["metrics"][10]).drop("metric",axis=1).rename_axis("Drift Metric",axis=1)

Drift Metric,result
drift_share,0.5
number_of_columns,23
number_of_drifted_columns,0
share_of_drifted_columns,0.0
dataset_drift,False
