In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from deepchecks import Dataset
from deepchecks.checks import TrainTestFeatureDrift, ConfusionMatrixReport
from deepchecks.suites import full_suite

# Generate a synthetic dataset
from sklearn.datasets import make_classification

In [4]:
# Generate a dataset with 2 classes
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [6]:
# Convert the datasets to pandas DataFrames
columns = [f"feature_{i}" for i in range(X.shape[1])]
train_df = pd.DataFrame(X_train, columns=columns)
test_df = pd.DataFrame(X_test, columns=columns)

In [7]:
# Add the target column
train_df['target'] = y_train
test_df['target'] = y_test

In [8]:
# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [10]:
# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(train_df, label='target', cat_features=[])
dc_test = Dataset(test_df, label='target', cat_features=[])

In [11]:
# Run a single check: Train-Test Feature Drift
feature_drift_check = TrainTestFeatureDrift()
drift_result = feature_drift_check.run(dc_train, dc_test)
drift_result.show()


The TrainTestFeatureDrift check is deprecated and will be removed in the 0.14 version. Please use the FeatureDrift check instead



VBox(children=(HTML(value='<h4><b>Feature Drift</b></h4>'), HTML(value='<p>The TrainTestFeatureDrift check is …

In [13]:
# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(train_df, label='target', cat_features=[])
dc_test = Dataset(test_df, label='target', cat_features=[])

In [14]:
# Run the Confusion Matrix Report
confusion_matrix_check = ConfusionMatrixReport()
confusion_matrix_result = confusion_matrix_check.run(dc_test, model=model)
confusion_matrix_result.show()


X has feature names, but RandomForestClassifier was fitted without feature names


X has feature names, but RandomForestClassifier was fitted without feature names



VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…

In [15]:
# https://docs.deepchecks.com/0.18/tabular/auto_checks/model_evaluation/plot_confusion_matrix_report.html?utm_source=display_output&utm_medium=referral&utm_campaign=check_link

In [16]:
# Convert X_train and X_test to DataFrames with column names
X_train_df = pd.DataFrame(X_train, columns=columns)
X_test_df = pd.DataFrame(X_test, columns=columns)

In [17]:
# Train the model with feature names
model = RandomForestClassifier(random_state=42)
model.fit(X_train_df, y_train)

In [18]:
# Wrap datasets into Deepchecks Dataset objects
dc_train = Dataset(pd.concat([X_train_df, pd.Series(y_train, name='target')], axis=1), 
                   label='target', cat_features=[])
dc_test = Dataset(pd.concat([X_test_df, pd.Series(y_test, name='target')], axis=1), 
                  label='target', cat_features=[])

In [19]:
# Run the Confusion Matrix Report
confusion_matrix_check = ConfusionMatrixReport()
confusion_matrix_result = confusion_matrix_check.run(dc_test, model=model)
confusion_matrix_result.show()

VBox(children=(HTML(value='<h4><b>Confusion Matrix Report</b></h4>'), HTML(value='<p>Calculate the confusion m…