In [None]:
# Setup
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import glm
from statsmodels.genmod.families import Binomial
import matplotlib.pyplot as plt

# Read the dummy data
data = pd.read_csv("../TTE/data/data_censored.csv")
print("Data shape:", data.shape)
data.head()


In [None]:
# Data Preparation
print(data.info())
print(data.describe())

data_pp = data.copy()
data_itt = data.copy()

estimand_pp = "PP"
estimand_itt = "ITT"


In [None]:
# Weight Models and Censoring
data_itt['not_censored'] = 1 - data_itt['censored']

model_numerator_itt = glm("not_censored ~ x2", data=data_itt, family=Binomial()).fit()
print("ITT Censor Numerator Model Summary")
print(model_numerator_itt.summary())

model_denominator_itt = glm("not_censored ~ x2 + x1", data=data_itt, family=Binomial()).fit()
print("
ITT Censor Denominator Model Summary")
print(model_denominator_itt.summary())

data_itt['p_num'] = model_numerator_itt.predict(data_itt)
data_itt['p_den'] = model_denominator_itt.predict(data_itt)
data_itt['censor_ipw'] = data_itt['p_num'] / data_itt['p_den']


In [None]:
# Calculate Weights
print(data_itt['censor_ipw'].describe())

cutoff_99 = data_itt['censor_ipw'].quantile(0.99)
data_itt['censor_ipw'] = np.where(data_itt['censor_ipw'] > cutoff_99, cutoff_99, data_itt['censor_ipw'])


In [None]:
# Specify Outcome Model
data_itt['followup_time'] = data_itt['period']

formula_itt = "outcome ~ treatment + x2 + followup_time + I(followup_time**2)"
weights_itt = data_itt['censor_ipw']

model_outcome_itt = glm(formula_itt, data=data_itt, family=Binomial(), freq_weights=weights_itt).fit()
print("ITT Outcome Model Summary")
print(model_outcome_itt.summary())


In [None]:
# Expand Trials
expanded_data_itt = []
for pid, group in data_itt.groupby('id'):
    max_period = group['period'].max()
    for t in range(max_period + 1):
        row = group[group['period'] == t]
        if not row.empty:
            expanded_data_itt.append(row.iloc[0].to_dict())

expanded_data_itt = pd.DataFrame(expanded_data_itt)
print("Expanded data shape:", expanded_data_itt.shape)


In [None]:
# Sampling from Expanded Data
np.random.seed(1234)
mask = (expanded_data_itt['outcome'] == 0) & (np.random.rand(len(expanded_data_itt)) > 0.5)
sampled_data_itt = expanded_data_itt[~mask].copy()
print("Sampled data shape:", sampled_data_itt.shape)
