In [1]:
!pip install pylidc

Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting pylidc
  Downloading pylidc-0.2.3-py2.py3-none-any.whl.metadata (1.2 kB)
Collecting sqlalchemy>=1.1.5 (from pylidc)
  Downloading sqlalchemy-2.0.42-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting greenlet>=1 (from sqlalchemy>=1.1.5->pylidc)
  Downloading greenlet-3.2.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)
Downloading pylidc-0.2.3-py2.py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading sqlalchemy-2.0.42-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
[?25hDownloading gree

# Obtain LIDC Semantic Features and Nodule Coord

In [11]:
import pandas as pd
import os
import pylidc as pl
from tqdm import tqdm
import numpy as np
np.int = int 

In [12]:
def get_anno(patient_id):
    anno_df = []
    keys_to_include = ['id','scan_id','_nodule_id', 'internalStructure', 
                              'sphericity', 'lobulation', 'texture', 'subtlety', 
                              'calcification', 'margin', 'spiculation', 'malignancy']
    scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == patient_id).all()
    for scan in scans:
        annotations = scan.cluster_annotations()
        for anno_id , annos in enumerate(annotations):
            for anno in annos:
                anno_dict = {key: anno.__dict__[key] for key in keys_to_include}
                anno_dict = pd.DataFrame(anno_dict, index = [0])
                anno_dict['diameter'] = anno.diameter
                anno_dict['pid'] = patient_id
                anno_dict[['x','y','z']] = anno.centroid
                anno_dict['study_instance_uid'] = scan.study_instance_uid
                anno_dict['series_instance_uid'] = scan.series_instance_uid
                anno_dict['nodule_id'] = anno_id
                anno_df.append(anno_dict)
    return anno_df

In [14]:
cases = ['LIDC-IDRI-0001','LIDC-IDRI-0002']
r = [get_anno(c) for c in cases]

In [21]:
# aggregate annotation from radiologists
anno_df = pd.concat([pd.concat(r_i) for r_i in r if r_i]).reset_index(drop = True).drop_duplicates()
keys = ['sphericity','lobulation', 'texture',  'margin', 'spiculation', 'malignancy','diameter','x','y','z']
agg_dict = {i: 'median' for i in keys}
agg_dict['internalStructure'] = pd.Series.mode
agg_dict['calcification'] = pd.Series.mode
anno_df_coord = anno_df.groupby(['pid','nodule_id']).agg(agg_dict).reset_index()
anno_df_coord

Unnamed: 0,pid,nodule_id,sphericity,lobulation,texture,margin,spiculation,malignancy,diameter,x,y,z,internalStructure,calcification
0,LIDC-IDRI-0001,0,3.5,3.0,5.0,3.5,4.5,5.0,32.69729,366.729124,316.205336,89.639908,1,6
1,LIDC-IDRI-0002,0,4.0,1.0,1.5,1.5,1.0,4.5,30.781671,361.162287,345.222282,185.044542,1,6


# Map to UCLA Semantic Features

In [29]:
cyst_like_spaces = []
for i in anno_df_coord.internalStructure:
    if isinstance(i,int):
        if i == 4:
            cyst_like_spaces.append('Present')
        else:
            cyst_like_spaces.append('Absent')
    else:
        if '4' in i:
            cyst_like_spaces.append('Present')
        else:
            cyst_like_spaces.append('Absent')
anno_df_coord['cyst_like_spaces'] = cyst_like_spaces

In [30]:
eccentric_calcification = []
for i in anno_df_coord.calcification:
    if isinstance(i,np.int):
        if i == 4:
            eccentric_calcification.append('Present')
        else:
            eccentric_calcification.append('Absent')
    else:
        if '4' in i:
            eccentric_calcification.append('Present')
        else:
            eccentric_calcification.append('Absent')
anno_df_coord['eccentric_calcification'] = eccentric_calcification

In [32]:
anno_df_coord['nodule_shape'] = ['Round' if i>3 else 'Ovoid' for  i in anno_df_coord.sphericity]
anno_df_coord['nodule_margin_conspicuity'] = ['Well marginated' if i>=3 else 'Poorly marginated' for  i in anno_df_coord.margin]
anno_df_coord['nodule_margins'] = np.nan
anno_df_coord.loc[anno_df_coord.lobulation>=3 , 'nodule_margins'] = 'Lobulated'
anno_df_coord['additional_nodule_margins'] = np.nan
anno_df_coord.loc[anno_df_coord.spiculation>=3 , 'additional_nodule_margins'] = 'Spiculated'
anno_df_coord['nodule_consistency'] = 'Part-solid'
anno_df_coord.loc[anno_df_coord.texture<2 , 'nodule_consistency'] = 'Pure ground glass'
anno_df_coord.loc[anno_df_coord.texture>4 , 'nodule_consistency'] = 'Solid'

In [33]:
anno_df_coord

Unnamed: 0,pid,nodule_id,sphericity,lobulation,texture,margin,spiculation,malignancy,diameter,x,...,z,internalStructure,calcification,cyst_like_spaces,eccentric_calcification,nodule_shape,nodule_margin_conspicuity,nodule_margins,additional_nodule_margins,nodule_consistency
0,LIDC-IDRI-0001,0,3.5,3.0,5.0,3.5,4.5,5.0,32.69729,366.729124,...,89.639908,1,6,Absent,Absent,Round,Well marginated,Lobulated,Spiculated,Solid
1,LIDC-IDRI-0002,0,4.0,1.0,1.5,1.5,1.0,4.5,30.781671,361.162287,...,185.044542,1,6,Absent,Absent,Round,Poorly marginated,,,Pure ground glass
