# 📚 Private Causal Discovery

Built by **Stu** 🚀

## Section 1: Basics of Causal Discovery

### Exercise 1: Define Causal Discovery

In [1]:
causal_discovery_definition = "Learning the causal structure (e.g., DAG) from observational or experimental data."

### Exercise 2: Sketch Privacy Risks in Causal Discovery

In [2]:
causal_privacy_risks = "Sensitive relationships (edges) between variables could leak user-specific behavior or attributes."

## Section 2: Simulate Toy Causal Data

### Exercise 3: Generate Tiny 3-Variable Causal Data

In [3]:
import numpy as np
np.random.seed(42)
n = 300
X = np.random.normal(0,1,n)
Y = 2*X + np.random.normal(0,0.1,n)
Z = 3*Y + np.random.normal(0,0.1,n)

### Exercise 4: Correlation Matrix (No Privacy Yet)

In [4]:
import pandas as pd
df = pd.DataFrame({'X': X, 'Y': Y, 'Z': Z})
correlation = df.corr()
correlation

## Section 3: Add Noise for Private Discovery

### Exercise 5: Add Laplace Noise to Correlations

In [5]:
def add_laplace_noise_to_corr(corr_matrix, epsilon=1.0):
    noisy_corr = corr_matrix.copy()
    for i in range(len(corr_matrix)):
        for j in range(len(corr_matrix)):
            if i != j:
                noisy_corr.iloc[i,j] += np.random.laplace(0, 1/epsilon)
    return noisy_corr

noisy_correlation = add_laplace_noise_to_corr(correlation)
noisy_correlation

### Exercise 6: Build Noisy Causal Graph

In [6]:
import networkx as nx

G = nx.DiGraph()
threshold = 0.5
for col in noisy_correlation.columns:
    for row in noisy_correlation.index:
        if col != row and abs(noisy_correlation.loc[row, col]) > threshold:
            G.add_edge(row, col)

nx.draw(G, with_labels=True)

## Section 4: Reflections

### Exercise 7: Reflect on Private Causal Graph Accuracy

In [7]:
causal_graph_accuracy_reflection = "Noise may add false edges or miss real ones; need careful threshold tuning."

### Exercise 8: Sketch Real Applications

In [8]:
causal_real_apps = "Health research, social science, private marketing experiments."

### Exercise 9: Vary Epsilon and Measure False Edges

In [9]:
epsilons = [0.1, 0.5, 1.0, 2.0]
false_edges = []
for eps in epsilons:
    noisy_corr = add_laplace_noise_to_corr(correlation, epsilon=eps)
    count = 0
    for col in noisy_corr.columns:
        for row in noisy_corr.index:
            if col != row and abs(noisy_corr.loc[row, col]) > threshold:
                count += 1
    false_edges.append(count)

plt.plot(epsilons, false_edges)
plt.xlabel('ε')
plt.ylabel('Number of (Potentially False) Edges')
plt.title('Privacy vs Causal Graph Accuracy')
plt.show()

### Exercise 10: Summarize Trade-offs

In [10]:
causal_discovery_summary = "Lower ε → stronger privacy but more noise → more false edges; trade-off tuning essential."