In [23]:
import pandas as pd
import numpy as np

In [24]:
def select_last_flytos(image_frames_info, sweepid_column='uuid', tag_column = "tag"):
    ## Select last frame from each flyto sweep -- WE'RE ASSUMING COMPLETE SWEEPS!!!
    # idx = image_frames_info[tag_column].isin(['BPD', 'TCD', 'AC', 'FL', 'CRL', 'HC'])
    """
    BPD: Biparietal Diameter
    TCD: Transcranial doppler
    AC:  Abdominal Circumference
    FL:  Femur Length
    CRL: Crown-Rump Length
    HC:  Head Circumference
    """
    # idx = image_frames_info[tag_column].isin(['FL']) # around 15k 
    idx = image_frames_info[tag_column].isin(['FL', 'AC']) # around 15k 
    # idx = image_frames_info[tag_column].isin(['BPD', 'HC']) #around 16k
    # idx = image_frames_info[tag_column].isin(['AC']) # around 15k
    

    if idx.sum() == 0:
        print("!!WARNING: file has no flytos!!")
        return pd.DataFrame()
    else:
        image_frames_info_flyto_tags = image_frames_info[idx].copy()

        ## Determine frame number from image path
        image_frames_info_flyto_tags['frame_num'] = image_frames_info_flyto_tags['img_path'].map(lambda x: x.split('/')[-1].split('.')[0]).astype(int)

        u = image_frames_info_flyto_tags.groupby(sweepid_column).apply(lambda df: df.frame_num == df.frame_num.max() )
        assert u.shape[0] > 0

        last_flyto_idx = u.reset_index().set_index('level_1').frame_num
        assert set(image_frames_info_flyto_tags.index) == set(last_flyto_idx.index)

        return image_frames_info_flyto_tags[last_flyto_idx]

In [25]:
myCsv = pd.read_parquet("/mnt/raid/C1_ML_Analysis/CSV_files/extract_frames_blind_sweeps_c1_30082022_wscores_1e-4_train.parquet")

In [26]:
print(myCsv.head())

                                            img_path  \
0  extract_frames_blind_sweeps/dataset_C1_cines_m...   
1  extract_frames_blind_sweeps/dataset_C1_cines_m...   
2  extract_frames_blind_sweeps/dataset_C1_cines_m...   
3  extract_frames_blind_sweeps/dataset_C1_cines_m...   
4  extract_frames_blind_sweeps/dataset_C1_cines_m...   

                                   uuid    study_id fetal_presentation_str  \
0  1b5c4639-6f1e-45ef-8b7a-ddd6d5fc8d84  VIL-0495-1               Cephalic   
1  1b5c4639-6f1e-45ef-8b7a-ddd6d5fc8d84  VIL-0495-1               Cephalic   
2  1b5c4639-6f1e-45ef-8b7a-ddd6d5fc8d84  VIL-0495-1               Cephalic   
3  1b5c4639-6f1e-45ef-8b7a-ddd6d5fc8d84  VIL-0495-1               Cephalic   
4  1b5c4639-6f1e-45ef-8b7a-ddd6d5fc8d84  VIL-0495-1               Cephalic   

   ga_boe  ga_expert famli_edd_source famli_edd_source_detail  dvp_crf  \
0   132.0  130.96153         UNC EPIC   LAST MENSTRUAL PERIOD      8.0   
1   132.0  130.96153         UNC EPIC   LAST M

In [27]:
print(myCsv.columns)
print(myCsv.shape)

Index(['img_path', 'uuid', 'study_id', 'fetal_presentation_str', 'ga_boe',
       'ga_expert', 'famli_edd_source', 'famli_edd_source_detail', 'dvp_crf',
       'fetal_presentation', 'ga_absdiff', 'is_anchor', 'tag', 'pidscan',
       'us_plac', 'us_previa', 'placenta_str', 'previa_str', 'Manufacturer',
       'ManufacturerModelName', 'uuid_path_256', 'score', 'pred'],
      dtype='object')
(23804618, 23)


In [28]:
print(myCsv["tag"])

0           AC
1           AC
2           AC
3           AC
4           AC
            ..
23804613    NM
23804614    NM
23804615    NM
23804616    NM
23804617    NM
Name: tag, Length: 23804618, dtype: object


In [29]:
myList = select_last_flytos(myCsv, sweepid_column='uuid', tag_column = "tag")

  return image_frames_info_flyto_tags[last_flyto_idx]


In [30]:
print(myList.shape)
# print(myList.head)
print(myList.columns)

(31125, 24)
Index(['img_path', 'uuid', 'study_id', 'fetal_presentation_str', 'ga_boe',
       'ga_expert', 'famli_edd_source', 'famli_edd_source_detail', 'dvp_crf',
       'fetal_presentation', 'ga_absdiff', 'is_anchor', 'tag', 'pidscan',
       'us_plac', 'us_previa', 'placenta_str', 'previa_str', 'Manufacturer',
       'ManufacturerModelName', 'uuid_path_256', 'score', 'pred', 'frame_num'],
      dtype='object')


In [31]:
myList["No structures visible"] = 0
myList["Head Visible"] = 0
myList["Abdomen Visible"] = np.where(myList["tag"]=="AC", 1, 0)
myList["Femur Visible"] = np.where(myList["tag"]=="FL", 1, 0)
myList["Placenta visible"] = 0

In [32]:
print(myList.shape)

(31125, 29)


In [34]:
print(myList["Abdomen Visible"].value_counts())
print(myList["Femur Visible"].value_counts())

Abdomen Visible
1    15898
0    15227
Name: count, dtype: int64
Femur Visible
0    15898
1    15227
Name: count, dtype: int64


In [39]:
filtered_df_abdomen = myList[myList["Abdomen Visible"] == 1]
filtered_df_femur = myList[myList["Femur Visible"] == 1]
sampledAbdomen = filtered_df_abdomen.sample(n=1000)
sampledFemur = filtered_df_femur.sample(n=2000)

In [40]:
print(sampledAbdomen.shape)

(1000, 29)


In [41]:
dualMerged = pd.concat([sampledAbdomen, sampledFemur], axis=0)

In [42]:
print(dualMerged.shape)

(3000, 29)


In [43]:
dualMerged.to_csv("/mnt/raid/home/ayrisbud/us-famli-pl/src/abdomenFemur.csv", index=False)