In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
!pip install deepchecks
from deepchecks.tabular import Dataset
from deepchecks.suites import data_integrity, train_test_validation

# Load the dataset manually (replace with the actual path)
file_path = "adult.csv"  # Update this with your dataset file path
column_names = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
    'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
    'hours-per-week', 'native-country', 'income'
]

# Read CSV file
df = pd.read_csv(file_path, names=column_names, skipinitialspace=True)

# Convert categorical features to string (if needed)
cat_features = ['workclass', 'education', 'marital-status', 'occupation',
                'relationship', 'race', 'sex', 'native-country']

df[cat_features] = df[cat_features].astype(str)

# Separate features and labels
X = df.drop(columns=['income'])
y = df['income']

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create Deepchecks Dataset objects
train_ds = Dataset(X_train, label=y_train, cat_features=cat_features)
test_ds = Dataset(X_test, label=y_test, cat_features=cat_features)

# Run Data Integrity Suite
integrity_suite = data_integrity()
integrity_result = integrity_suite.run(train_ds)
integrity_result.show()

# Run Train-Test Validation Suite
validation_suite = train_test_validation()
validation_result = validation_suite.run(train_ds, test_ds)
validation_result.show()
