In [None]:
from sklearn.linear_model import LogisticRegression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from llr import compute_llr
from sim import gen_fuzzy_rdd, point_plot
from tree import RDDTree

# generate some data
n = 1000
df = gen_fuzzy_rdd(n, 0.8, 0.8, 0.2, seed=0)

# fit background function
lr = LogisticRegression(random_state=0)
X = df['x'].values.reshape(-1, 1)
lr.fit(X, df['t'])

test_X = np.linspace(0,1,100).reshape(-1, 1)
preds = lr.predict_proba(test_X)[:, 1]

#plot the RDD and the background function
scale=100
point_plot('x', 't', df, scale, errwidth=0)
plt.xlabel("Forcing variable")
plt.ylabel("Treatment")
disp_X = np.floor(test_X.flatten()*scale)
sns.lineplot(disp_X, preds, color='red', label="estimated Pr(T)")
plt.legend()
plt.show()

# check LLR computation
all_Ps = lr.predict_proba(X)[:, 1]
all_Ts = df['t']
cutoffs = np.arange(0.1, 1, 0.1)

llrs = []
for cutoff in cutoffs:
    Gs = (X > cutoff).astype(int).flatten()
    llr = compute_llr(all_Ps, all_Ts, Gs)
    llrs.append(llr)

plt.plot(cutoffs, llrs)
plt.title("Maximum LLR for each candidate cutoff")
plt.ylabel("Max LLR")
plt.xlabel("cutoff")
#plt.axhline(y=uncorr_thres, ls='--', color='green', label="95% threshold")
#plt.axhline(y=corr_thres, ls='--', color='red', label="Bonferroni 95% threshold")
#plt.legend()
plt.show()

#%% test tree implementation

df['Ts'] = all_Ts
df['Ps'] = all_Ps

In [None]:
tree = RDDTree(df[['x', 'Ts', 'Ps']], 3, 2, 5)
tree.build_tree()

  improvement from the last ten iterations.
  - np.log(1-Ps + (mus * Ps))
  - np.log(1-Ps + (mus * Ps))
  - np.log(1-Ps + (mus * Ps))
  - np.log(1-Ps + (mus * Ps))
  - np.log(1-Ps + (mus * Ps))


In [None]:
print(tree.root)