In [1]:
from typing import Dict, Any, List
import sqlite3
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from causalnex.structure.notears import from_pandas
from sklearn.preprocessing import StandardScaler

with sqlite3.connect("data/bank.db") as conn:
    df = pd.read_sql_query("SELECT * FROM customer_data", conn)

# Encode categorical features
for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].astype("category").cat.codes

scaler = StandardScaler()
df_scaled = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
df_scaled = df_scaled.fillna(0)

sm = from_pandas(df_scaled, w_threshold=0.01, max_iter=1000)
sm.remove_edges_below_threshold(0.01)
dot_str = sm.to_dot()

AttributeError: 'StructureModel' object has no attribute 'to_dot'

In [2]:
sm

<causalnex.structure.structuremodel.StructureModel at 0x16a17148d60>

In [3]:
dot_str = "digraph {\n"
for src, dst in sm.edges():
    dot_str += f"  {src} -> {dst};\n"
dot_str += "}"
print(dot_str)

digraph {
  income -> age;
  income -> education;
  income -> branch_visits;
  income -> channel_preference;
  income -> promotion_offer;
  income -> customer_engagement;
  education -> age;
  education -> channel_preference;
  education -> promotion_offer;
  branch_visits -> age;
  branch_visits -> education;
  branch_visits -> channel_preference;
  branch_visits -> region_code;
  branch_visits -> promotion_offer;
  channel_preference -> age;
  promotion_offer -> age;
  promotion_offer -> region_code;
  customer_engagement -> age;
  customer_engagement -> education;
  customer_engagement -> branch_visits;
  customer_engagement -> channel_preference;
  customer_engagement -> promotion_offer;
  activated_ib -> age;
  activated_ib -> income;
  activated_ib -> education;
  activated_ib -> branch_visits;
  activated_ib -> channel_preference;
  activated_ib -> region_code;
  activated_ib -> promotion_offer;
  activated_ib -> customer_engagement;
}


In [4]:
sm.edges()

OutEdgeView([('income', 'age'), ('income', 'education'), ('income', 'branch_visits'), ('income', 'channel_preference'), ('income', 'promotion_offer'), ('income', 'customer_engagement'), ('education', 'age'), ('education', 'channel_preference'), ('education', 'promotion_offer'), ('branch_visits', 'age'), ('branch_visits', 'education'), ('branch_visits', 'channel_preference'), ('branch_visits', 'region_code'), ('branch_visits', 'promotion_offer'), ('channel_preference', 'age'), ('promotion_offer', 'age'), ('promotion_offer', 'region_code'), ('customer_engagement', 'age'), ('customer_engagement', 'education'), ('customer_engagement', 'branch_visits'), ('customer_engagement', 'channel_preference'), ('customer_engagement', 'promotion_offer'), ('activated_ib', 'age'), ('activated_ib', 'income'), ('activated_ib', 'education'), ('activated_ib', 'branch_visits'), ('activated_ib', 'channel_preference'), ('activated_ib', 'region_code'), ('activated_ib', 'promotion_offer'), ('activated_ib', 'custo

In [7]:
from typing import Dict, Any, List
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
import json
import sqlite3
import pandas as pd
from dowhy import CausalModel
from sklearn.preprocessing import LabelEncoder
from causaltune import CausalTune
from causaltune.data_utils import CausalityDataset
from causaltune.dataset_processor import CausalityDatasetProcessor
from causalnex.structure.notears import from_pandas
from sklearn.preprocessing import StandardScaler
from causalnex.structure import StructureModel
from causalnex.structure.notears import from_numpy
from langgraph.graph import MessagesState
from langchain_core.messages import HumanMessage

with sqlite3.connect("data/bank.db") as conn:
    df = pd.read_sql_query("SELECT * FROM customer_data", conn)
for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].astype(str)
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

cd = CausalityDataset(data=df, treatment="promotion_offer", outcomes=["activated_ib"],
                      common_causes=['age', 'income', 'education', 'branch_visits', 'channel_preference', 'region_code', 'customer_engagement'])
cd.preprocess_dataset()

estimators = ["SLearner", "TLearner", "XLearner"]
base_learners = ["random_forest", "neural_network"]

ct = CausalTune(
    estimator_list=estimators,
    metric="energy_distance",
    verbose=1,
    components_time_budget=10, # in seconds trial for each model
    outcome_model="auto",
)

# run causaltune
ct.fit(data=cd, outcome=cd.outcomes[0])

# return best estimator
print(f"Best estimator: {ct.best_estimator}")
# config of best estimator:
print(f"Best config: {ct.best_config}")
# best score:
print(f"Best score: {ct.best_score}")

Fitting a Propensity-Weighted scoring estimator to be used in scoring tasks
Propensity Model Fitted Successfully
Error instantiating catboost: No module named 'catboost'
Error instantiating catboost: No module named 'catboost'
[flaml.tune.tune: 06-28 19:30:12] {811} INFO - trial 1 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}, 'outcome_estimator': {'n_estimators': 100, 'max_features': 1.0, 'max_leaves': 4, 'estimator_name': 'random_forest'}}



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/

[flaml.tune.tune: 06-28 19:30:15] {811} INFO - trial 2 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}, 'outcome_estimator': {'alpha': 0.09999999999999991, 'fit_intercept': True, 'eps': 2.220446049250313e-16, 'estimator_name': 'lasso_lars'}}


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
The default of 'normalize' will be set to False in version 1.2 and deprecated in version 1.4.
If you wish to scale the data, use Pipeline with a StandardScaler in a preprocessing stage. To reproduce the previous behavior:

from sklearn.pipeline import make_pipeline

model = make_pipeline(StandardScaler(with_mean=False), LassoLars())

If you wish to pass a sample_weight parameter, you need to pass it as a fit parameter to each step of the pipeline as follows:

kwargs = {s[0] + '__sample_weight': sample_weight for s in model.steps}
model.fit(X, y, **kwargs)

Set parameter alpha to: original_alpha * np.sqrt(n_samples). 


[flaml.tune.tune: 06-28 19:30:16] {811} INFO - trial 3 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.SLearner'}, 'outcome_estimator': {'n_estimators': 100, 'num_leaves': 4, 'min_child_samples': 20, 'learning_rate': 0.09999999999999995, 'log_max_bin': 8, 'colsample_bytree': 1.0, 'reg_alpha': 0.0009765625, 'reg_lambda': 1.0, 'estimator_name': 'lgbm'}}


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


[flaml.tune.tune: 06-28 19:30:19] {811} INFO - trial 4 config: {'estimator': {'estimator_name': 'backdoor.econml.metalearners.TLearner'}, 'outcome_estimator': {'alpha': 0.09999999999999991, 'l1_ratio': 0.5, 'selection': 'cyclic', 'estimator_name': 'elastic_net'}}


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().


KeyboardInterrupt: 

In [3]:
from typing import Dict, Any, List
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI
import json
import sqlite3
import pandas as pd
from dowhy import CausalModel
from sklearn.preprocessing import LabelEncoder
from causaltune import CausalTune
from causaltune.data_utils import CausalityDataset
from causaltune.dataset_processor import CausalityDatasetProcessor
from causalnex.structure.notears import from_pandas
from sklearn.preprocessing import StandardScaler
from causalnex.structure import StructureModel
from causalnex.structure.notears import from_numpy
from langgraph.graph import MessagesState
from langchain_core.messages import HumanMessage

with sqlite3.connect("data/bank.db") as conn:
    df = pd.read_sql_query("SELECT * FROM customer_data", conn)
for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].astype(str)
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])

model = CausalModel(
    data=df,
    treatment="promotion_offer",
    outcome="activated_ib",
    common_causes=['age', 'income', 'education', 'branch_visits', 'channel_preference', 'region_code', 'customer_engagement'],
    instruments=None,
    effect_modifiers=None,
)
identified_estimand = model.identify_effect()
estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression")


# Run refutation methods
refute_results = []
refute_methods = [
    "placebo_treatment_refuter",
    "random_common_cause",
    "data_subset_refuter"
]
for method in refute_methods:
    refute = model.refute_estimate(identified_estimand, estimate, method_name=method)
    refute_results.append({"method": method, "result": str(refute)})

# print(refute_results)

pass_test = all("fail" not in r["result"].lower() for r in refute_results)

In [4]:
refute_results

[{'method': 'placebo_treatment_refuter',
  'result': 'Refute: Use a Placebo Treatment\nEstimated effect:0.23825513913622448\nNew effect:6.943301029743054e-05\np value:0.96\n'},
 {'method': 'random_common_cause',
  'result': 'Refute: Add a random common cause\nEstimated effect:0.23825513913622448\nNew effect:0.23822851960244631\np value:0.8999999999999999\n'},
 {'method': 'data_subset_refuter',
  'result': 'Refute: Use a subset of data\nEstimated effect:0.23825513913622448\nNew effect:0.23826882687612905\np value:0.96\n'}]

In [5]:
pass_test

True