In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from dowhy import CausalModel
import networkx as nx

# 1. Create synthetic data
np.random.seed(42)
N = 1000
X = np.random.normal(0, 1, N)
T = 2 * X + np.random.normal(0, 1, N)  # Treatment influenced by X
Y = 3 * T + 1.5 * X + np.random.normal(0, 1, N)  # Outcome influenced by T and X
data = pd.DataFrame({'X': X, 'T': T, 'Y': Y})

# 2. Define the causal model: T is treatment, Y is outcome, X is common cause.
model = CausalModel(
    data=data,
    treatment='T',
    outcome='Y',
    common_causes=['X']
)

# 3. Visualize the causal graph
# Instead of calling a removed method like to_dot(), use the underlying NetworkX graph.
nx_graph = model._graph._graph  # Underlying NetworkX DiGraph

plt.figure(figsize=(8, 5))
pos = nx.spring_layout(nx_graph)
nx.draw(
    nx_graph,
    pos,
    with_labels=True,
    node_color="lightblue",
    font_size=10,
    font_weight="bold",
    edge_color="gray"
)
plt.title("Causal Graph")
plt.show()

# 4. Identify the causal effect using the backdoor criterion
identified_estimand = model.identify_effect()
print("Identified estimand:")
print(identified_estimand)

# 5. Estimate the causal effect using linear regression (a backdoor method)
estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression")
print("Estimated causal effect:", estimate.value)

# 6. Optionally, refute the estimate using a placebo treatment refuter
refutation = model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter")
print(refutation)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from dowhy import CausalModel
import networkx as nx

# 1. Create synthetic data
np.random.seed(42)
N = 1000
X = np.random.normal(0, 1, N)
T = 2 * X + np.random.normal(0, 1, N)
Y = 3 * T + 1.5 * X + np.random.normal(0, 1, N)
data = pd.DataFrame({'X': X, 'T': T, 'Y': Y})

# 2. Define the causal model
model = CausalModel(
    data=data,
    treatment='T',
    outcome='Y',
    common_causes=['X']
)

# 3. Visualize the underlying NetworkX causal graph
nx_graph = model._graph._graph  # underlying NetworkX DiGraph
plt.figure(figsize=(8, 5))
pos = nx.spring_layout(nx_graph)
nx.draw(
    nx_graph,
    pos,
    with_labels=True,
    node_color="lightblue",
    font_size=10,
    font_weight="bold",
    edge_color="gray"
)
plt.title("Causal Graph")
plt.show()

# 4. Identify the causal effect
identified_estimand = model.identify_effect()
print("Identified estimand:")
print(identified_estimand)

# 5. Estimate the causal effect using linear regression
estimate = model.estimate_effect(identified_estimand, method_name="backdoor.linear_regression")
print("Estimated causal effect:", estimate.value)

# 6. Refute the estimate using a placebo treatment refuter
refutation = model.refute_estimate(identified_estimand, estimate, method_name="placebo_treatment_refuter")
print(refutation)

# 7. (Optional) Use bootstrapping to compute confidence intervals
# Note: DoWhy can also compute these if you set confidence_intervals=True
estimate_bootstrap = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.linear_regression",
    confidence_intervals=True
)
print("Bootstrap-based CI:", estimate_bootstrap.get_confidence_intervals())


In [None]:
# Exoplanet example
# Age is the treatment, and has a causal effect on the outcome (radius)
# We want to estimate the causal effect of age on exoplanet radius
# We will use the backdoor criterion to identify the causal effect
# We will use the linear regression method to estimate the causal effect
# We will use the placebo treatment refuter to refute the estimate
# We will use the bootstrapping method to compute confidence intervals


In [None]:
# 2. Define the causal model: T is treatment, Y is outcome, X is common cause.
model = CausalModel(
    data=data,
    treatment='st_age',
    outcome='pl_rade',
    common_causes=['X']
)

In [None]:
graph = """
digraph {
  A [label="System Age"];
  Y [label="Exoplanet Size"];
  M [label="Stellar Mass"];
  R [label="Stellar Radius"];
  F [label="Stellar Metallicity"];
  G [label="Stellar Surface Gravity"];
  T [label="Stellar Effective Temperature"];
  P [label="Orbital Period"];

  A -> F;
  A -> R;
  A -> T;
  M -> R;
  M -> G;
  M -> T;
  F -> R;
  R -> G;
  A -> Y;
  F -> Y;
  R -> Y;
  P -> Y;
}
"""

model = CausalModel(
    data=data,      # your pandas DataFrame
    treatment='A',  # stellar age column name
    outcome='Y',    # exoplanet size column name
    graph=graph
)

model.view_model(layout="dot", size=(8, 6), file_name="causal_model")


In [None]:
from astroquery.ipac.nexsci.nasa_exoplanet_archive import NasaExoplanetArchive
import pandas as pd

# Query to retrieve all columns from the PSCompPars table
query = "SELECT * FROM pscomppars"

# Fetch the data using the TAP service
result = NasaExoplanetArchive.query_criteria(query=query)

# Convert the result to a pandas DataFrame
df = result.to_pandas()

# Print out the column names
print(df.columns.tolist())


In [None]:
import pyvo
import pandas as pd

# Connect to the NASA Exoplanet Archive TAP service
tap_service = pyvo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")

# Define the query to retrieve all columns from the pscomppars table
query = "SELECT * FROM pscomppars"

# Execute the query
results = tap_service.search(query)

# Convert the results to an Astropy Table, then to a pandas DataFrame
df = results.to_table().to_pandas()

# Print out the column names
print(df.columns.tolist())


In [None]:
graph = """
digraph {
  A [label="System Age"];
  Y [label="Exoplanet Size"];
  M [label="Stellar Mass"];
  R [label="Stellar Radius"];
  F [label="Stellar Metallicity"];
  G [label="Stellar Surface Gravity"];
  T [label="Stellar Effective Temperature"];
  P [label="Orbital Period"];

  A -> F;
  A -> R;
  A -> T;
  M -> R;
  M -> G;
  M -> T;
  F -> R;
  R -> G;
  A -> Y;
  F -> Y;
  R -> Y;
  P -> Y;
}
"""

model = CausalModel(
    data=data,      # your pandas DataFrame
    treatment='A',  # stellar age column name
    outcome='Y',    # exoplanet size column name
    graph=graph
)

model.view_model(layout="dot", size=(8, 6), file_name="causal_model")


In [None]:
import numpy as np
mask = df['discoverymethod'] == 'Transit'
mask &= df['pl_orbper'] < 100
mask &= df['st_mass'] >= 0.8
mask &= df['st_mass'] <= 1.2
mask &= df['pl_rade'] <= 10
mask &= np.isfinite(df['st_age'])
mask &= np.isfinite(df['st_mass'])
mask &= np.isfinite(df['st_teff'])
mask &= np.isfinite(df['st_rad'])
mask &= np.isfinite(df['st_logg'])
mask &= np.isfinite(df['st_met'])
mask &= np.isfinite(df['pl_orbper'])
mask &= np.isfinite(df['pl_rade'])

print(len(df), len(df[mask]))

In [None]:
# import matplotlib.pyplot as plt

# plt.scatter(df['pl_orbper'][mask], df['pl_rade'][mask], alpha=0.2, s=2)
# plt.loglog()
# plt.show()

In [42]:
data = df[['st_age','st_mass','st_teff','st_rad','st_logg','st_met','pl_orbper','pl_rade']][mask]

data = data.rename(columns={
    'st_age': 'A',   # System Age
    'st_mass': 'M',  # Stellar Mass
    'st_teff': 'T',  # Stellar Effective Temperature
    'st_rad': 'R',   # Stellar Radius
    'st_logg': 'G',  # Stellar Surface Gravity
    'st_met': 'F',   # Stellar Metallicity
    'pl_orbper': 'P',# Orbital Period
    'pl_rade': 'Y'   # Exoplanet Size
})


In [None]:
data.head()

In [44]:
from dowhy import CausalModel

graph = """
digraph {
  A [label="System Age"];
  Y [label="Exoplanet Size"];
  M [label="Stellar Mass"];
  R [label="Stellar Radius"];
  F [label="Stellar Metallicity"];
  G [label="Stellar Surface Gravity"];
  T [label="Stellar Effective Temperature"];
  P [label="Orbital Period"];

  A -> F;
  A -> R;
  A -> T;
  M -> R;
  M -> G;
  M -> T;
  F -> R;
  R -> G;
  A -> Y;
  F -> Y;
  R -> Y;
  P -> Y;
}
"""

model = CausalModel(
    data=data,
    treatment='A',  # Treatment variable: System Age
    outcome='Y',    # Outcome variable: Exoplanet Size
    graph=graph
)


In [None]:
#model.view_model(layout="dot", size=(8, 6), file_name="causal_model.png")
model.view_model(layout="dot", size=(8, 6))


In [None]:
identified_estimand = model.identify_effect()
print(identified_estimand)

In [None]:
estimate = model.estimate_effect(
    identified_estimand,
    method_name="backdoor.linear_regression"
)
print(estimate)


In [None]:
refutation = model.refute_estimate(
    identified_estimand,
    estimate,
    method_name="placebo_treatment_refuter"
)
print(refutation)


In [None]:
refutation = model.refute_estimate(
    identified_estimand,
    estimate,
    method_name="placebo_treatment_refuter"
)
print(refutation)