In [2]:
!python --version

Python 3.10.5


In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from deepchecks.tabular import Dataset
from deepchecks.tabular.suites import full_suite

In [6]:
# Load the Titanic dataset
url = "https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"
titanic = pd.read_csv(url)

In [7]:
# Preprocess the data
# Drop irrelevant columns
titanic = titanic.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

In [8]:
# Fill missing values
titanic['Age'] = titanic['Age'].fillna(titanic['Age'].median())
titanic['Embarked'] = titanic['Embarked'].fillna('S')

In [9]:
# Encode categorical variables
label_encoder = LabelEncoder()
titanic['Sex'] = label_encoder.fit_transform(titanic['Sex'])
titanic['Embarked'] = label_encoder.fit_transform(titanic['Embarked'])

In [11]:
# Split the data
X = titanic.drop(columns=['Survived'])
y = titanic['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
# Train a Random Forest model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [13]:
# Wrap datasets into Deepchecks Dataset objects
columns = X.columns.tolist()
train_df = pd.DataFrame(X_train, columns=columns)
train_df['Survived'] = y_train

In [14]:
test_df = pd.DataFrame(X_test, columns=columns)
test_df['Survived'] = y_test

In [15]:
dc_train = Dataset(train_df, label='Survived', cat_features=['Sex', 'Embarked'])
dc_test = Dataset(test_df, label='Survived', cat_features=['Sex', 'Embarked'])

In [16]:
# Run the full Deepchecks suite
suite = full_suite()
suite_result = suite.run(train_dataset=dc_train, test_dataset=dc_test, model=model)


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'Other' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


Setting an item of incompatible dtype is deprecated and will raise in a future error of pandas. Value 'Other' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.


DataFrame.applymap has been deprecated. Use DataFrame.map instead.



In [17]:
# Display and save the results
suite_result.save_as_html('titanic_analysis_report.html')
print("Deepchecks analysis report saved to 'titanic_analysis_report.html'")

Deepchecks analysis report saved to 'titanic_analysis_report.html'


In [18]:
# Display the results in the notebook or terminal
suite_result.show()

Accordion(children=(VBox(children=(HTML(value='\n<h1 id="summary_EKL0D0IU0T3X3W4TN4V3CMXRK">Full Suite</h1>\n<…