In [1]:
# Imports
%load_ext autoreload
%autoreload 2 
# !apt-get install -y xvfb
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import torch
from tqdm.notebook import tqdm

## LOAD FILE W WEIBEL

In [None]:
# Load the e5 csv and add weibel gen (if uncommented)
# orig_df = pd.read_csv(os.path.abspath("/home/sneha/e5lungairwaysvida_20140211.csv"))
orig_df = pd.read_csv(os.path.abspath('/home/sneha/e5lungairwaysvida_20140211_weibel.csv')) # original + weibel gen added

## Remove scan problems

In [None]:
# REMOVE SCAN PROBLEM
ids_w_scan_problem = orig_df.loc[orig_df.scan_problem != 0].idno.unique()
print(f"There are {len(ids_w_scan_problem)} ids with san problems --> remove them")
orig_df = orig_df.loc[~(orig_df.idno.isin(ids_w_scan_problem))]
print(f"There are {orig_df.idno.nunique()} remaining participants")




## Remove nans in startbpid / endbpid

In [None]:
# REMOVE NANS in START + ENDBPID
orig_df.dropna(subset=['startbpid', 'endbpid'], inplace=True)
print(f"There are {orig_df.idno.nunique()} remaining participants after dropping NaNs in start/endbpid")
# print null
orig_df.isnull().sum()

## Remove unneccesary cols

In [None]:
# drop unneccesary cols

useful_cols = ['idno',
 'anatomicalname',
 'centerlinelength','avginnerarea','lobe',
 'sublobe',
 'endbpid',
 'startbpid',
 'angle',
 'dircosx',
 'dircosy',
 'dircosz', 'weibel_generation']

orig_df = orig_df[useful_cols]




## QC paths only

In [None]:
# KEEP ONLY QC PATHS
qc_names = [x for x in orig_df.anatomicalname.unique() if ('unnamed' not in x.lower()) & (x != '-')]
orig_df['QC'] = False

print(qc_names)
orig_df.loc[orig_df.anatomicalname.isin(qc_names), 'QC'] = True
display(orig_df)

def _travel_two(df, extend_names=['LB1', 'LB10', 'RB1', 'RB4', 'RB10'], extend_gen=2):
    print('start',df.QC.sum())
    startbpids = df.loc[df.anatomicalname.isin(extend_names)]['endbpid'].to_list()
    print(f"Extending {extend_gen} generations from {extend_names} which have segment ids = {startbpids}")
    qc_children = startbpids
    for i in range(extend_gen):
        new_children = df.loc[df.startbpid.isin(qc_children)]['endbpid'].to_list()
        qc_children+=new_children
#         print('Adding a generation', qc_children)
    print('end', len(qc_children))
    return qc_children
    
          
          
for i, participant_df in tqdm(orig_df.groupby('idno')):
    print(participant_df.idno.unique())
    qc_children = _travel_two(participant_df,extend_names=['LB1', 'LB10', 'RB1', 'RB4', 'RB10'], extend_gen=2)
    orig_df.loc[(orig_df.idno==int(participant_df.idno.unique())) & (orig_df.endbpid.isin(qc_children)), 'QC']= True

display(orig_df)


## Remove all orphans

In [None]:
def _remove_orphans(df):
    orphan_check = np.zeros_like(df.endbpid.values, dtype=bool)
    groups = df.groupby('idno')
    i = 0
    for name, group in tqdm(groups, desc='Iterating ids'):
        orphan_check[i:i+len(group)] =\
        np.isin(group.startbpid.values, group.endbpid.values) | \
        (group.startbpid == -1)
        i = i+len(group)
    return orphan_check

In [None]:
def remove_all_orphans(orig_df):
    there_are_orphans = True
    i = 0
    while there_are_orphans:
        i+=1
        
        orphan_check = _remove_orphans(orig_df)
        num_orphan = (orphan_check==0).sum()
        
        if num_orphan ==0:
            print(f"Iter {i}: THERE ARE NO ORPHANS")
            there_are_orphans = False
            break
        else:
            print(f"Iter {i}:There are {num_orphan} orphans, REMOVING THEM")
            orig_df = orig_df.loc[orphan_check]
            print(f"Df length {len(orig_df)}")
    return orig_df


df_no_orphans = remove_all_orphans(orig_df)
    


## Add coords of tree

In [None]:
# ADD COorodinates

def _get_coords(single_test, start_x=0, start_y=0,start_z=0, start_loc=-1, x_sf=1, y_sf=1, z_sf=1):
    '''
    single_test: single participant tree dataframe
    startx/y/z: coords of carina to start
    start_loc: 1 = endbpid corresponding to startx,y,z 1=carina, -1 = top of trachea  
    x/y/z_sf: scale factors based on voxel dimensions mm    
    
    '''
    assert single_test.startbpid.min() == start_loc, 'The dataframe entered has rows before start_loc'
    # gets coords of ENDBPID
    # THIS HAS -1 on the Y!
    # delta x, delta y, delta z for each of the segments --> scaled by pixel scale factor to get to 1:1:1 visualisation
    x_diff= (single_test.centerlinelength*single_test.dircosx/x_sf).values
    # running without minus
    y_diff = (single_test.centerlinelength*single_test.dircosy/y_sf).values
    z_diff = (single_test.centerlinelength*single_test.dircosz/z_sf).values
    endbpid = single_test.endbpid.values
    idno = single_test.idno.values
    startbpid = single_test.startbpid.values
#     print(list(zip(endbpid, x_diff, y_diff, z_diff)))
    
    x = np.zeros_like(x_diff)
    y = np.zeros_like(x_diff)
    z = np.zeros_like(x_diff)
    # for each segment, if we're at the start location, add start location to delta x,y,z
    # otherwise, find the parent x,y,z value (done sequentially) and add on parent location
    for i in range(len(endbpid)):
        if startbpid[i] == start_loc:
            print("adding start loc", start_x, start_y, start_z)
            x[i] = x_diff[i] + start_x
            y[i] = y_diff[i] + start_y
            z[i] = z_diff[i] + start_z
            print(i, x[i], y[i], z[i])
        else:
            # find parent endbpid and get the corresponding x,y,z
            parent = np.where(endbpid == startbpid[i])
            print('parent',parent)
            x[i] = x_diff[i] + x[parent] 
            y[i] = y_diff[i] + y[parent] 
            z[i] = z_diff[i] + z[parent] 
#             print("parent", parent, "i", i, x[i], y[i], z[i])
    # put into dataframe
    coords_df = pd.DataFrame({"idno": idno, "endbpid": endbpid, "x": x, "y": y, "z":z})
#     tree_w_coords= single_test.merge(coords_df, how="left", on='endbpid')
    return single_test.merge(coords_df, how="left", on='endbpid')




In [None]:
groups = df_no_orphans.groupby('idno')

name, group = next(groups)
print(name, group)


## Save file