In [134]:
import pandas as pd
from pathlib import Path
import tempfile
import io
from mrich import print
from datetime import datetime

In [135]:
mdf = pd.read_excel("data/InVivo2/JM_InVivo2_EchoRandomisation_max.xlsx", 
                   usecols="B:G", skiprows=1,
    )
# df = df.set_index(["Timepoint", "Animal number", "Rat"])
indices = ["Timepoint", "Treatment", "Rat", "Animal number"]
mdf = mdf.set_index("Randomised number")
mdf.loc[mdf["Rat"] == "NM", "Rat"] = 79
mdf["Treatment"] = mdf["Rat"].apply(lambda x: "Control" if x%4<2 else "IR")
mdf

Unnamed: 0_level_0,Timepoint,Animal number,Rat,Ear Notch,Acquisition Date,Treatment
Randomised number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
53,Baseline,M00681267,64,L,2024-08-31,Control
47,Baseline,M00681268,65,R,2024-08-31,Control
128,Baseline,M00681269,66,LL,2024-08-31,IR
52,Baseline,M00681270,67,NM,2024-09-02,IR
40,Baseline,M00681271,68,L,2024-08-31,Control
...,...,...,...,...,...,...
74,25 wk,M00681278,75,NM,2025-03-28,IR
57,25 wk,M00681279,76,L,2025-03-28,Control
28,25 wk,M00681280,77,R,2025-03-28,Control
21,25 wk,M00681281,78,LL,2025-03-28,IR


In [136]:
data = {}

### PSLAX Bmode

keys = [
    "PSLAX_Bmode",
    "PSLAX_Mmode",
    "PSSAX_Mmode",
]

for key in keys:
    for file in Path("data/InVivo2/blinded_echo").glob(f"202?????_InVivo2_Blinded_*_{key}.csv"):
        
        date, _, _, image_num, _, _ = file.name.split("_")
        
        image_num = int(image_num)
        
        match key:
            case "PSLAX_Bmode":
                search = '"Measurement","Mode","Parameter","Units",'
                n_lines = 11
            case "PSLAX_Mmode" | "PSSAX_Mmode":
                search = '"Measurement","Mode","Parameter","Units","Avg","STD","Instance 1","Instance 2","Instance 3"'
                n_lines = 6
            case _:
                raise ValueError

        cmds = [
            "grep",
            f"-A{n_lines}",
            search,
            file,
        ]

        result = subprocess.run(cmds, stdout=subprocess.PIPE, text=True)
        df = pd.read_csv(io.StringIO(result.stdout), index_col=False)

        d = data.setdefault(image_num, {})
        
        d.update({
            "Randomised number":int(image_num),
            f"{key}: Date": datetime.strptime(date, "%Y%m%d").date(),
        })

        match key:
            case "PSLAX_Bmode":
                for i,row in df.iterrows():
                    param = row["Parameter"]
                    unit = row["Units"]
                    value = row[-1]
                    d[f"{key}: {param} [{unit}]"] = value
            case "PSLAX_Mmode" | "PSSAX_Mmode":
                for i,row in df.iterrows():
                    param = row["Measurement"]
                    unit = row["Units"]
                    d[f"{key}: {param} Avg [{unit}]"] = row["Avg"]
                    d[f"{key}: {param} STD [{unit}]"] = row["STD"]

#### COMBINE

df = pd.DataFrame(data.values())
# df = df.set_index("Randomised number")

#### MAP TO RANDOM NUMBERS

for col in indices:
    df[col] = df["Randomised number"].apply(lambda x: mdf.loc[x,col])

#### SORT

timepoints = ["Baseline", "4 wk", "8 wk", "10 wk", "14 wk", "18 wk", "22 wk", "25 wk"]

df["Timepoint"] = pd.Categorical(df["Timepoint"], categories=timepoints, ordered=True)
    
df = df.sort_values(by=indices)
df = df.set_index(indices + ["Randomised number"])

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,PSLAX_Bmode: Date,PSLAX_Bmode: Heart Rate [BPM],PSLAX_Bmode: Area [mm2],PSLAX_Bmode: Area;s [mm2],PSLAX_Bmode: Area;d [mm2],PSLAX_Bmode: Volume [uL],PSLAX_Bmode: Volume;s [uL],PSLAX_Bmode: Volume;d [uL],PSLAX_Bmode: Stroke Volume [uL],PSLAX_Bmode: Ejection Fraction [%],...,PSSAX_Mmode: IVS;s Avg [mm],PSSAX_Mmode: IVS;s STD [mm],PSSAX_Mmode: LVID;d Avg [mm],PSSAX_Mmode: LVID;d STD [mm],PSSAX_Mmode: LVID;s Avg [mm],PSSAX_Mmode: LVID;s STD [mm],PSSAX_Mmode: LVPW;d Avg [mm],PSSAX_Mmode: LVPW;d STD [mm],PSSAX_Mmode: LVPW;s Avg [mm],PSSAX_Mmode: LVPW;s STD [mm]
Timepoint,Treatment,Rat,Animal number,Randomised number,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
Baseline,Control,64,M00681267,53,2025-04-28,428.189117,30.421540,26.736552,69.351570,79.513114,63.852401,328.422373,264.569972,80.557841,...,3.074397,0.170266,6.431793,0.172007,2.823557,0.201880,1.498608,0.056804,2.804262,0.151769
Baseline,Control,65,M00681268,47,2025-04-26,435.808970,32.815279,30.244550,74.158844,80.054812,71.273218,324.980130,253.706912,78.068438,...,2.841309,0.112528,6.982230,0.178742,3.249030,0.202262,1.573550,0.056264,2.924127,0.182116
Baseline,Control,68,M00681271,40,2025-04-26,468.566966,28.106887,26.581895,63.462990,67.110365,62.158107,283.710657,221.552550,78.091021,...,1.885522,0.278139,6.743885,0.129357,4.349272,0.181115,1.363861,0.044085,2.237487,0.103045
Baseline,Control,69,M00681272,117,2025-05-02,418.628990,34.847759,33.943376,64.661061,98.011109,94.396773,286.603548,192.206776,67.063641,...,2.535036,0.156152,6.990950,0.186637,4.109633,0.209524,1.613799,0.045828,2.423965,0.067521
Baseline,Control,72,M00681275,76,2025-04-28,399.600400,29.239489,27.919859,67.405486,73.210195,66.761222,303.929654,237.168432,78.033989,...,2.691846,0.053797,6.723660,0.124638,3.466050,0.106000,1.429299,0.050533,2.352388,0.109359
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25 wk,IR,71,M00681274,18,2025-04-25,345.622120,35.601831,32.689276,75.423355,96.823374,87.017289,358.328161,271.310872,75.715755,...,3.830247,0.179787,7.443232,0.093702,3.612985,0.107598,2.277227,0.047469,3.299162,0.058470
25 wk,IR,74,M00681277,65,2025-04-28,352.164343,70.887392,24.979777,67.305461,304.118190,53.057409,271.973715,218.916306,80.491714,...,4.742300,0.196340,6.378952,0.208617,2.379134,0.356755,1.724473,0.109233,3.249354,0.158632
25 wk,IR,75,M00681278,74,2025-04-28,370.084811,38.012953,26.633341,64.632273,115.502812,64.342152,274.276512,209.934360,76.541137,...,3.466213,0.128069,6.925451,0.101643,3.954412,0.136881,1.631979,0.102500,2.420072,0.152989
25 wk,IR,78,M00681281,21,2025-04-25,351.699883,25.901006,27.015851,73.009990,64.459078,68.541742,314.595922,246.054180,78.212768,...,,,,,,,,,,


In [137]:
df.to_excel("InVivo2_echo_processed.xlsx")