In [122]:
import pandas as pd
from pathlib import Path
import tempfile
import io
from mrich import print
from datetime import datetime

In [129]:
mdf = pd.read_excel("data/InVivo2/JM_InVivo2_EchoRandomisation_max.xlsx", 
                   usecols="B:G", skiprows=1,
    )
# df = df.set_index(["Timepoint", "Animal number", "Rat"])
indices = ["Timepoint", "Treatment", "Rat", "Animal number"]
mdf = mdf.set_index("Randomised number")
mdf.loc[mdf["Rat"] == "NM", "Rat"] = 79
mdf["Treatment"] = mdf["Rat"].apply(lambda x: "Control" if x%4<2 else "IR")
mdf

Unnamed: 0_level_0,Timepoint,Animal number,Rat,Ear Notch,Acquisition Date,Treatment
Randomised number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7,Baseline,M00681267,64,L,2024-08-31,Control
28,Baseline,M00681268,65,R,2024-08-31,Control
69,Baseline,M00681269,66,LL,2024-08-31,IR
76,Baseline,M00681270,67,NM,2024-09-02,IR
25,Baseline,M00681271,68,L,2024-08-31,Control
...,...,...,...,...,...,...
75,25 wk,M00681278,75,NM,2025-03-28,IR
27,25 wk,M00681279,76,L,2025-03-28,Control
78,25 wk,M00681280,77,R,2025-03-28,Control
29,25 wk,M00681281,78,LL,2025-03-28,IR


In [132]:
data = {}

### PSLAX Bmode

keys = [
    "PSLAX_Bmode",
    "PSLAX_Mmode",
    "PSSAX_Mmode",
]

for key in keys:
    for file in Path("data/InVivo2/blinded").glob(f"202?????_InVivo2_Blinded_*_{key}.csv"):
        
        date, _, _, image_num, _, _ = file.name.split("_")
        
        image_num = int(image_num)
        
        match key:
            case "PSLAX_Bmode":
                search = '"Measurement","Mode","Parameter","Units",'
                n_lines = 11
            case "PSLAX_Mmode" | "PSSAX_Mmode":
                search = '"Measurement","Mode","Parameter","Units","Avg","STD","Instance 1","Instance 2","Instance 3"'
                n_lines = 6
            case _:
                raise ValueError

        cmds = [
            "grep",
            f"-A{n_lines}",
            search,
            file,
        ]

        result = subprocess.run(cmds, stdout=subprocess.PIPE, text=True)
        df = pd.read_csv(io.StringIO(result.stdout), index_col=False)

        d = data.setdefault(image_num, {})
        
        d.update({
            "Randomised number":int(image_num),
            f"{key}: Date": datetime.strptime(date, "%Y%m%d").date(),
        })

        match key:
            case "PSLAX_Bmode":
                for i,row in df.iterrows():
                    param = row["Parameter"]
                    unit = row["Units"]
                    value = row[-1]
                    d[f"{key}: {param} [{unit}]"] = value
            case "PSLAX_Mmode" | "PSSAX_Mmode":
                for i,row in df.iterrows():
                    param = row["Measurement"]
                    unit = row["Units"]
                    d[f"{key}: {param} Avg [{unit}]"] = row["Avg"]
                    d[f"{key}: {param} STD [{unit}]"] = row["STD"]

#### COMBINE

df = pd.DataFrame(data.values())
# df = df.set_index("Randomised number")

#### MAP TO RANDOM NUMBERS

for col in indices:
    df[col] = df["Randomised number"].apply(lambda x: mdf.loc[x,col])

#### SORT

timepoints = ["Baseline", "4 wk", "8 wk", "10 wk", "14 wk", "18 wk", "22 wk", "25 wk"]

df["Timepoint"] = pd.Categorical(df["Timepoint"], categories=timepoints, ordered=True)
    
df = df.sort_values(by=indices)
df = df.set_index(indices + ["Randomised number"])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,PSLAX_Bmode: Date,PSLAX_Bmode: Heart Rate [BPM],PSLAX_Bmode: Area [mm2],PSLAX_Bmode: Area;s [mm2],PSLAX_Bmode: Area;d [mm2],PSLAX_Bmode: Volume [uL],PSLAX_Bmode: Volume;s [uL],PSLAX_Bmode: Volume;d [uL],PSLAX_Bmode: Stroke Volume [uL],PSLAX_Bmode: Ejection Fraction [%],...,PSSAX_Mmode: IVS;s Avg [mm],PSSAX_Mmode: IVS;s STD [mm],PSSAX_Mmode: LVID;d Avg [mm],PSSAX_Mmode: LVID;d STD [mm],PSSAX_Mmode: LVID;s Avg [mm],PSSAX_Mmode: LVID;s STD [mm],PSSAX_Mmode: LVPW;d Avg [mm],PSSAX_Mmode: LVPW;d STD [mm],PSSAX_Mmode: LVPW;s Avg [mm],PSSAX_Mmode: LVPW;s STD [mm]
Timepoint,Treatment,Rat,Animal number,Randomised number,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Baseline,Control,64,M00681267,7,2025-04-24,423.280423,44.067844,43.706305,74.757071,142.158018,143.017484,353.343311,210.325827,59.524497,...,2.577174,0.159830,7.004628,0.207131,4.361372,0.117283,1.924621,0.090937,2.981923,0.057939
Baseline,Control,65,M00681268,28,2025-04-26,365.408039,53.579189,56.023075,90.991978,198.943009,212.042219,471.512951,259.470732,55.029397,...,2.746443,0.119364,7.606122,0.097818,4.974114,0.155002,2.250558,0.113670,2.860878,0.048008
Baseline,Control,68,M00681271,25,2025-04-26,393.894633,78.544546,52.105674,91.288323,343.026430,169.321994,448.580589,279.258595,62.253830,...,3.559959,0.182555,7.496761,0.130572,4.169322,0.294466,2.084661,0.157119,3.094920,0.176540
Baseline,Control,69,M00681272,109,2025-05-02,394.347683,81.506940,46.157460,75.709840,397.705903,150.503809,354.862210,204.358401,57.588099,...,2.902489,0.181708,6.943524,0.242772,4.425895,0.320557,1.812051,0.084245,2.581772,0.094598
Baseline,Control,72,M00681275,5,2025-04-24,375.175864,56.852593,53.216054,92.705027,187.359852,175.615414,453.744176,278.128762,61.296382,...,2.794001,0.134961,7.097324,0.129824,4.401605,0.176203,1.972649,0.072549,2.871222,0.146920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25 wk,IR,71,M00681274,71,2025-04-28,410.466906,31.007125,31.657742,85.768956,78.204106,79.697147,407.194386,327.497239,80.427739,...,2.707947,0.226240,6.822299,0.204956,3.213759,0.340351,1.554448,0.081086,2.948516,0.156440
25 wk,IR,74,M00681277,88,2025-04-28,409.137402,29.300926,31.122229,71.780201,69.756458,75.803773,313.259298,237.455524,75.801589,...,,,,,,,,,,
25 wk,IR,75,M00681278,75,2025-04-28,367.759730,74.750803,42.096245,76.034566,344.678759,123.777942,350.188742,226.410799,64.653934,...,2.598177,0.130188,7.506733,0.258752,4.444881,0.213796,2.342356,0.146801,3.205751,0.140392
25 wk,IR,78,M00681281,29,2025-04-26,436.760692,54.005644,31.223408,58.180455,204.857642,75.446692,231.247957,155.801265,67.374115,...,2.758321,0.234233,6.311413,0.190923,3.559770,0.335669,1.382500,0.115448,2.504529,0.090496


In [133]:
df.to_excel("InVivo2_processed.xlsx")