In [4]:
pwd

'/Users/sangeethgeorge/MyProjects/oncoai-patient-outcome-navigator'

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, classification_report
from sklearn.preprocessing import StandardScaler
import shap
import joblib
import os
import matplotlib.pyplot as plt
import mlflow
import mlflow.sklearn

# --- Configuration ---
# Define the root directory of your project.
# This assumes the script is run from a location where 'data/', 'models/', 'shap_plots/'
# can be found relative to this project_root.
project_root = '/Users/sangeethgeorge/MyProjects/oncoai-patient-outcome-navigator'

# Define paths for data, models, and SHAP plots
data_file_path = os.path.join(project_root, "data", "onco_features_cleaned.parquet")
model_save_base_path = os.path.join(project_root, "models")
shap_plots_base_path = os.path.join(project_root, "shap_plots")
mlruns_path = os.path.join(project_root, "mlruns")

# Ensure necessary directories exist
os.makedirs(os.path.dirname(data_file_path), exist_ok=True)
os.makedirs(model_save_base_path, exist_ok=True)
os.makedirs(shap_plots_base_path, exist_ok=True)
os.makedirs(mlruns_path, exist_ok=True) 


# --- Function Definitions (remain mostly the same, with minor adjustments for MLflow) ---

def load_dataset(path="/Users/sangeethgeorge/MyProjects/oncoai-patient-outcome-navigator/data/onco_features_cleaned.parquet"):
    """
    Loads the dataset from the specified path.
    If the file doesn't exist, a dummy dataset is created for demonstration.
    """
    if not os.path.exists(path):
        print(f"Creating a dummy dataset at {path} for demonstration.")
        # Create a small dummy DataFrame with some numerical and categorical data
        data = {
            'icustay_id': range(100),
            'subject_id': range(100, 200),
            'hadm_id': range(200, 300),
            'admittime': pd.to_datetime(['2020-01-01'] * 100),
            'dob': pd.to_datetime(['1980-01-01'] * 100),
            'dod': pd.to_datetime(['2020-02-01'] * 100),
            'feature_A': np.random.rand(100),
            'feature_B': np.random.randint(0, 10, 100),
            'categorical_C': np.random.choice(['X', 'Y', 'Z'], 100),
            'mortality_30d': np.random.randint(0, 2, 100)
        }
        dummy_df = pd.DataFrame(data)
        dummy_df.to_parquet(path)
        print("Dummy dataset created.")
    df = pd.read_parquet(path)
    return df

def train_test_impute_split(df, label_col="mortality_30d"):
    """
    Splits the data into training and testing sets, and imputes missing values
    using the median from the training set.
    """
    df = df.drop(columns=['icustay_id', 'subject_id', 'hadm_id', 'admittime', 'dob', 'dod'], errors='ignore')
    y = df[label_col]
    X = df.drop(columns=[label_col])

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    train_median = X_train.median(numeric_only=True)
    X_train = X_train.fillna(train_median)
    X_test = X_test.fillna(train_median)

    return X_train, X_test, y_train, y_test

def check_for_leakage(X, y):
    """
    Checks for potential data leakage by identifying highly correlated features
    with the target variable and removes them.
    """
    corr = pd.concat([X, y], axis=1).corr(numeric_only=True)[y.name].drop(y.name)
    high_corr = corr[abs(corr) > 0.95]
    if not high_corr.empty:
        print("\n⚠️ Potential Leakage Detected:")
        print(high_corr)
        X = X.drop(columns=high_corr.index)
    return X

# Modified to return y_pred and y_prob for logging metrics
def train_logistic_regression(X_train, y_train, X_test, y_test):
    """
    Trains a Logistic Regression model and evaluates its performance.
    Returns the model, original (scaled) X_train/X_test, y_train/y_test,
    and predictions/probabilities for logging.
    """
    model = LogisticRegression(max_iter=1000)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    print("\n🧠 Classification Report:")
    print(classification_report(y_test, y_pred))
    print("\n📊 ROC AUC Score:", roc_auc_score(y_test, y_prob))

    return model, X_train, y_train, X_test, y_test, y_pred, y_prob

# Renamed and adjusted to fit the MLflow flow, taking X_df for feature names
def explain_predictions(model, X_scaled_for_shap, X_df_for_shap, output_dir="shap_plots", top_n=10):
    """
    Generates SHAP plots to explain the model's predictions.
    Saves plots to the specified output directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    if X_scaled_for_shap.shape[0] == 0:
        print("Skipping SHAP explanation: No data in X_scaled_for_shap for explanation.")
        return

    # Use the original DataFrame (X_df_for_shap) for feature names and display data
    explainer = shap.Explainer(model, X_scaled_for_shap, feature_names=X_df_for_shap.columns.tolist())
    shap_values = explainer(X_scaled_for_shap)
    shap_values.feature_names = X_df_for_shap.columns.tolist() # Ensure feature names are set

    # Assign display_data to shap_values object (important for some SHAP plots)
    # Ensure display_data has the same number of rows as shap_values.values
    if shap_values.values.shape[0] == X_df_for_shap.shape[0]:
        shap_values.display_data = X_df_for_shap.values
    else:
        print("Warning: Mismatch in row count between shap_values and X_df_for_shap. display_data not set.")


    print("\n📈 Generating overall SHAP Summary Plot...")
    plt.figure(figsize=(12, 8))
    shap.summary_plot(shap_values, features=X_df_for_shap, show=False) # Use X_df_for_shap for features
    plt.savefig(os.path.join(output_dir, "shap_summary_overall.png"), bbox_inches='tight')
    plt.close()
    print("✅ Overall SHAP Summary Plot generated.")

    abs_shap_means = np.abs(shap_values.values).mean(axis=0)
    # Get top 3 overall features for dependence plots
    top_overall_features_indices = np.argsort(abs_shap_means)[::-1][:min(3, len(X_df_for_shap.columns))]
    for feat_idx in top_overall_features_indices:
        top_feature_name = X_df_for_shap.columns[feat_idx]
        plt.figure(figsize=(8, 6))
        # Use X_df_for_shap for features argument in dependence_plot
        shap.dependence_plot(top_feature_name, shap_values.values, features=X_df_for_shap, feature_names=X_df_for_shap.columns.tolist(), show=False)
        plt.title(f"SHAP Dependence Plot: {top_feature_name}")
        plt.savefig(os.path.join(output_dir, f"shap_dependence_{top_feature_name}.png"), bbox_inches='tight')
        plt.close()

    risk_scores = model.predict_proba(X_scaled_for_shap)[:, 1]
    actual_top_n = min(top_n, X_scaled_for_shap.shape[0])
    top_indices = np.argsort(risk_scores)[-actual_top_n:][::-1] # Indices of top N highest risk patients

    for i_idx, original_index in enumerate(top_indices):
        # Original index refers to the index in the X_scaled_for_shap array
        patient_identifier = f"patient_idx_{original_index}_rank_{i_idx+1}"
        plt.figure(figsize=(10, 6))
        shap.plots.waterfall(shap_values[original_index], show=False)
        plt.title(f"SHAP Waterfall Plot for {patient_identifier}\nPredicted Risk: {risk_scores[original_index]:.4f}")
        plt.savefig(os.path.join(output_dir, f"waterfall_{patient_identifier}.png"), bbox_inches='tight')
        plt.close()

        # For the top N patients, generate dependence plots for their top 3 contributing features
        relevant_shap_values_patient = shap_values.values[original_index]
        top_patient_features_indices = np.argsort(np.abs(relevant_shap_values_patient))[::-1][:min(3, len(X_df_for_shap.columns))]

        for feat_idx in top_patient_features_indices:
            feat_name = X_df_for_shap.columns[feat_idx]
            plt.figure(figsize=(8, 6))
            shap.dependence_plot(feat_name, shap_values.values, features=X_df_for_shap, feature_names=X_df_for_shap.columns.tolist(), show=False)
            plt.title(f"SHAP Dependence Plot for {patient_identifier} - {feat_name}")
            plt.savefig(os.path.join(output_dir, f"dependence_{patient_identifier}_{feat_name}.png"), bbox_inches='tight')
            plt.close()
    print("✅ All SHAP plots generated and saved.")

def save_model(model, scaler, output_path):
    """
    Saves the trained model and scaler to a joblib file.
    """
    joblib.dump({"model": model, "scaler": scaler}, output_path)
    print(f"\n✅ Saved model and scaler to {output_path}")

# --- Jupyter Notebook Execution Flow ---

# Set up MLflow tracking
mlflow.set_experiment("OncoAI-Mortality-Prediction")

# Start an MLflow run
# The 'with' statement ensures the run is properly ended, even if errors occur.
with mlflow.start_run() as run:
    run_id = run.info.run_id
    
    # Make SHAP plots output directory specific to the run
    run_shap_output_dir = os.path.join(shap_plots_base_path, run_id)
    os.makedirs(run_shap_output_dir, exist_ok=True)
    
    # Define the full model save path for this run
    model_save_path_for_run = os.path.join(model_save_base_path, f"logreg_model_run_{run_id}.joblib")

    print(f"Starting MLflow Run with ID: {run_id}")
    print(f"SHAP plots will be saved to: {run_shap_output_dir}")
    print(f"Model will be saved to: {model_save_path_for_run}")

    # Log parameters to MLflow (common setup parameters)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("random_state", 42)
    mlflow.log_param("stratify", "mortality_30d")
    mlflow.log_param("logistic_regression_max_iter", 1000)

    # 1. Load dataset
    df = load_dataset()

    if df.empty: # Check if DataFrame is empty
        print("❌ Dataset is empty. Cannot proceed with training and explanation. Exiting MLflow run.")
        mlflow.end_run(status="FAILED")
    else:
        # 2. Train-test split and imputation
        X_train, X_test, y_train, y_test = train_test_impute_split(df)
        print(f"X_train shape after split: {X_train.shape}")
        print(f"X_test shape after split: {X_test.shape}")

        # 3. One-hot encode categorical columns
        X_train_ohe = pd.get_dummies(X_train, drop_first=True)
        X_test_ohe = pd.get_dummies(X_test, drop_first=True)

        # Align columns after one-hot encoding to ensure same features in train/test
        missing_cols_in_test = set(X_train_ohe.columns) - set(X_test_ohe.columns)
        for c in missing_cols_in_test:
            X_test_ohe[c] = 0
        # Ensure the order of columns is the same
        X_test_ohe = X_test_ohe[X_train_ohe.columns]
        print("\nOne-hot encoding completed and columns aligned.")
        print(f"X_train_ohe shape: {X_train_ohe.shape}")
        print(f"X_test_ohe shape: {X_test_ohe.shape}")

        # 4. Check for data leakage on the one-hot encoded training data
        X_train_leakage_checked = check_for_leakage(X_train_ohe, y_train)

        # Apply the same column selection (after leakage check) to the test set
        X_test_leakage_checked = X_test_ohe[X_train_leakage_checked.columns]
        print("\nLeakage check completed and columns adjusted.")
        print(f"X_train_leakage_checked shape: {X_train_leakage_checked.shape}")
        print(f"X_test_leakage_checked shape: {X_test_leakage_checked.shape}")


        # 5. Scale features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train_leakage_checked)
        X_test_scaled = scaler.transform(X_test_leakage_checked)

        print("✅ Features prepared (one-hot encoded and scaled).")

        # Log parameters to MLflow
        mlflow.log_param("scaler_type", "StandardScaler")
        mlflow.log_param("model_type", "LogisticRegression")
        mlflow.log_param("num_features_after_preprocessing", X_train_scaled.shape[1])


        # 6. Train Logistic Regression model
        # The train_logistic_regression function now returns y_pred and y_prob
        model, X_train_final_scaled, y_train_final, X_test_final_scaled, y_test_final, y_pred, y_prob = \
            train_logistic_regression(X_train_scaled, y_train, X_test_scaled, y_test)

        # Log ROC AUC metric to MLflow
        auc = roc_auc_score(y_test_final, y_prob)
        mlflow.log_metric("roc_auc", auc)
        print(f"Logged ROC AUC to MLflow: {auc:.4f}")

        # You can also log other metrics from the classification report if desired
        # For example, to log precision, recall, f1-score for class 1 (positive class)
        report = classification_report(y_test_final, y_pred, output_dict=True)
        mlflow.log_metric("precision_1", report['1']['precision'])
        mlflow.log_metric("recall_1", report['1']['recall'])
        mlflow.log_metric("f1_score_1", report['1']['f1-score'])
        mlflow.log_metric("accuracy", report['accuracy'])


        # 7. Save model and scaler
        save_model(model, scaler, output_path=model_save_path_for_run)

        # 8. Log model with input_example for signature inference
        # Use X_train_leakage_checked (DataFrame) for input_example as it preserves column names
        if X_train_leakage_checked.shape[0] > 0:
            mlflow.sklearn.log_model(model, "logreg_model",
                                     input_example=X_train_leakage_checked.head(5)) # using .head(5) for a smaller example
        else:
            mlflow.sklearn.log_model(model, "logreg_model")
        print("Model logged to MLflow with signature.")


        # 9. Explain with SHAP
        # Pass X_test_scaled (numpy array) and X_test_leakage_checked (DataFrame)
        # X_test_leakage_checked is critical for feature names and display data for SHAP.
        explain_predictions(model, X_test_scaled, X_test_leakage_checked, output_dir=run_shap_output_dir)
        print("\nSHAP explanations generated and plots saved.")

        # 10. Log SHAP plots as MLflow artifacts
        shap_plot_files = [f for f in os.listdir(run_shap_output_dir) if f.endswith('.png')]
        for plot_file in shap_plot_files:
            mlflow.log_artifact(os.path.join(run_shap_output_dir, plot_file), artifact_path="shap_plots")
        print(f"Logged {len(shap_plot_files)} SHAP plots as MLflow artifacts.")

print("\n✨ MLflow run completed successfully. Check your MLflow UI for details.")

Starting MLflow Run with ID: eafc352555634fb1885751da6afba93b
SHAP plots will be saved to: /Users/sangeethgeorge/MyProjects/oncoai-patient-outcome-navigator/shap_plots/eafc352555634fb1885751da6afba93b
Model will be saved to: /Users/sangeethgeorge/MyProjects/oncoai-patient-outcome-navigator/models/logreg_model_run_eafc352555634fb1885751da6afba93b.joblib
X_train shape after split: (3264, 13)
X_test shape after split: (817, 13)

One-hot encoding completed and columns aligned.
X_train_ohe shape: (3264, 226)
X_test_ohe shape: (817, 226)

Leakage check completed and columns adjusted.
X_train_leakage_checked shape: (3264, 226)
X_test_leakage_checked shape: (817, 226)


DTypePromotionError: The DType <class 'numpy.dtypes.DateTime64DType'> could not be promoted by <class 'numpy.dtypes.Float64DType'>. This means that no common DType exists for the given inputs. For example they cannot be stored in a single array unless the dtype is `object`. The full list of DTypes is: (<class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.DateTime64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.Float64DType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>, <class 'numpy.dtypes.BoolDType'>)