In [1]:
import pandas as pd
import os
import numpy as np

In [2]:
file_path = "extraction/data_censored.csv"

# Read the CSV file into a DataFrame
try:
    data_df = pd.read_csv(file_path)
    print("Data loaded successfully!")
    print(data_df.head())  # Display the first few rows
except FileNotFoundError:
    print(f"File not found at {file_path}")

Data loaded successfully!
   id  period  treatment  x1        x2  x3        x4  age     age_s  outcome  \
0   1       0          1   1  1.146148   0  0.734203   36  0.083333        0   
1   1       1          1   1  0.002200   0  0.734203   37  0.166667        0   
2   1       2          1   0 -0.481762   0  0.734203   38  0.250000        0   
3   1       3          1   0  0.007872   0  0.734203   39  0.333333        0   
4   1       4          1   1  0.216054   0  0.734203   40  0.416667        0   

   censored  eligible  
0         0         1  
1         0         0  
2         0         0  
3         0         0  
4         0         0  


In [3]:
# Define directories for saving results
trial_pp_dir = os.path.join(os.getcwd(), "trial_pp")
os.makedirs(trial_pp_dir, exist_ok=True)

trial_itt_dir = os.path.join(os.getcwd(), "trial_itt")
os.makedirs(trial_itt_dir, exist_ok=True)

In [5]:

# Define a function to structure the trial data
def set_data(trial_name, data, id_col, period_col, treatment_col, outcome_col, eligible_col):
    """Prepare a dictionary to structure trial data."""
    return {
        "trial_name": trial_name,
        "data": data,
        "id": data[id_col],
        "period": data[period_col],
        "treatment": data[treatment_col],
        "outcome": data[outcome_col],
        "eligible": data[eligible_col],
    }

# Per-Protocol (PP)
trial_pp = set_data(
    trial_name="PP",
    data=data_df,
    id_col="id",
    period_col="period",
    treatment_col="treatment",
    outcome_col="outcome",
    eligible_col="eligible",
)

# Intention-To-Treat (ITT)
trial_itt = set_data(
    trial_name="ITT",
    data=data_df,
    id_col="id",
    period_col="period",
    treatment_col="treatment",
    outcome_col="outcome",
    eligible_col="eligible",
)

# Print the structured ITT trial data
print(trial_itt)

{'trial_name': 'ITT', 'data':      id  period  treatment  x1        x2  x3        x4  age     age_s  \
0     1       0          1   1  1.146148   0  0.734203   36  0.083333   
1     1       1          1   1  0.002200   0  0.734203   37  0.166667   
2     1       2          1   0 -0.481762   0  0.734203   38  0.250000   
3     1       3          1   0  0.007872   0  0.734203   39  0.333333   
4     1       4          1   1  0.216054   0  0.734203   40  0.416667   
..   ..     ...        ...  ..       ...  ..       ...  ...       ...   
720  99       3          0   0 -0.747906   1  0.575268   68  2.750000   
721  99       4          0   0 -0.790056   1  0.575268   69  2.833333   
722  99       5          1   1  0.387429   1  0.575268   70  2.916667   
723  99       6          1   1 -0.033762   1  0.575268   71  3.000000   
724  99       7          0   0 -1.340497   1  0.575268   72  3.083333   

     outcome  censored  eligible  
0          0         0         1  
1          0         0 