In [33]:
import bioeq
import polars as pl
from bioeq import Crossover2x2

In [2]:
# Load Simdata

df_simdata = pl.read_csv(
    source="https://raw.githubusercontent.com/shaunporwal/bioeq/refs/heads/main/simdata/bioeq_simdata_1.csv"
)

In [4]:
bioeq.__version__

'0.1.0.1'

In [5]:
df_simdata.columns

['SubjectID',
 'Period',
 'Sequence',
 'Formulation',
 'Time (hr)',
 'Concentration (ng/mL)']

In [38]:
df_simdata = df_simdata.rename(
    {
        "SubjectID": "subject_id",
        "Period": "period",
        "Sequence": "sequence",
        "Formulation": "formulation",
        "Time (hr)": "time",
        "Concentration (ng/mL)": "concentration",
    }
)

In [42]:
df_simdata.columns

['subject_id', 'period', 'sequence', 'formulation', 'time', 'concentration']

In [44]:
# Instantiate the class with the correct column names
crossover = Crossover2x2(
    data=df_simdata,
    subject_col="subject_id",
    seq_col="sequence",
    period_col="period",
    time_col="time",
    conc_col="concentration",
    form_col="formulation",
)

# Test the AUC calculation function
auc_results = crossover.calculate_auc()
print(auc_results)

None


In [40]:
df_simdata.head()

subject_id,period,sequence,formulation,time,concentration
i64,i64,str,str,f64,f64
1,1,"""TR""","""Reference""",0.0,49.449766
1,1,"""TR""","""Reference""",0.5,39.455472
1,1,"""TR""","""Reference""",1.0,30.800394
1,1,"""TR""","""Reference""",2.0,19.762422
1,1,"""TR""","""Reference""",4.0,5.849937


In [41]:
df_single_case = df_simdata.filter(
    (pl.col("subject_id") == 1)
    & (pl.col("period") == 1)
    & (pl.col("formulation") == "Reference")
)

df_single_case_small = df_single_case.select(pl.col("time"), pl.col("concentration"))

row_num = df_single_case_small["concentration"].len()

for conc in df_single_case_small["concentration"]:
    print(conc)

49.44976551083605
39.45547222602065
30.800393819376495
19.762422181837874
5.849937318012263
2.365206238120429
0.0


In [10]:
row_num

7

In [28]:
df_simdata.select("Sequence").unique().sort(by="Sequence")

Sequence
str
"""TR"""


In [None]:
df_simdata = pl.read_csv(
    source="https://raw.githubusercontent.com/statist-bhfz/bioeq/refs/heads/master/testdata.csv",
    separator=";",
    truncate_ragged_lines=True,
)


df

In [None]:
df_simdata = df.with_columns(  # with_columns doesn't modify cols in place, so has to be casted back to df
    pl.col("subj").cast(dtype=pl.Int64),
    pl.col("seq").cast(dtype=pl.Int64),
    pl.col("prd").cast(dtype=pl.Int64),
    pl.col("drug").cast(dtype=pl.String),
    pl.col("time").cast(dtype=pl.String).str.replace(r",", ".").cast(dtype=pl.Float64),
    pl.col("conc").cast(dtype=pl.String).str.replace(r",", ".").cast(dtype=pl.Float64),
)

In [None]:
df_simdata.columns

In [36]:
subject_col = "subj"
time_col = "time"
conc_col = "conc"

In [None]:
# Validate required columns

df = df_simdata.clone()
df = df.sort([subject_col, time_col]).clone()

df

In [None]:
# Compute AUC for each group
auc_df = df.group_by(subject_col).agg(
    pl.struct([time_col, conc_col])
    .apply(
        lambda rows: np.trapz(
            [row[conc_col] for row in rows],
            [row[time_col] for row in rows],
        )
    )
    .alias("AUC")
)

In [None]:
# The below is probably something that has to be developed in parallel to this bioeq package, it's a whole separate entity

# In doing the data validation, what we want is probably something like:

# Parse

# 1. State all expected raw colnames and types or if applicable, expected categorical values
# 2. State raw col to parsed col relationship
# 3. See if there are problematic values
# 4. Resolve problematic values and create parsed cols
# 5. Check that they have been resolved
# 6. Assign the correct types to all the parsed cols once we're happy with col vals

# Validate

# 7. Check that all col values are reasonable to human judgement (pointblank in R)
# 8. Change those that aren't or filter out, and proceed with cleaned data (get_data_pass() in R)

# Derive

# 9. Create a list of cols to derive and which cols they are derived from
# 10. Derive the columns in the dataframe
# 11. Check that we derived all the columns that we said we would
# 12. Check that the col vals now have reasonable values, and if not either filter or replace (pointblank in R)

In [None]:
cross

In [None]:
Crossover2x2()