In [1]:
import pandas as pd
import numpy as np
from Constants import *
import re
from glob import glob
from re import findall, match, sub
import json

In [16]:
class Const():
    #for troubleshooting, will migrate to own file
    data_dir = "../data/" #private data
    resource_dir = "../resources/" #public data (can be on github)
    
    mdasi_file = data_dir + "MDASI_72021.xlsx"
    camprt_dir = data_dir + "CAMPRT_Centroids/"
    
    organ_info_json = resource_dir + "OrganInfo.json"
    symptom_info_json = resource_dir + "symptoms.json"
    
    tumor_aliases = {'GTV node': 'GTVn',
                     'GTV-N': 'GTVn',
                     'GTV_n': 'GTVn',
                     'GTVn1': 'GTVn',
                     'GTV primary': 'GTVp',
                     'GTV-P': 'GTVp',
                     'GTV_p': 'GTVp',
                     'GTV_P': 'GTVp',
                     'GTV P': 'GTVp',
                     'GTV nodes': 'GTVn',
                     'GTV-N1': 'GTVn',
                     'GTV_N1': 'GTVn',
                     'GTV N': 'GTVn',
                     'GTV-NR': 'GTVn2', #I am only aware of this for 10144 and 10022, may need more robust solution later
                     'GTV-NL': 'GTVn3'
                    }

In [5]:
def load_spatial_files(root = None):
    #reads in the files for tumor centroids and ROI-tumor distances
    #returns {'distances': [file_names],'doses': [file_names]}
    root = Const.camprt_dir if root is None else root
    file_sort = lambda x: sorted(x, key =
                                         lambda file:
                                             max([int(x) for x in findall("[0-9]+", file)])
                                    )
    try:
        distance_files = file_sort(glob(Const.camprt_dir + '**/*distances.csv'))
    except:
        distance_files = []
    try:
        dose_files = file_sort(glob(Const.camprt_dir + '**/*centroid*.csv'))
    except: 
        dose_files = []
    return {'distances': distance_files, 'doses': dose_files}
spatial_files = load_spatial_files()

In [23]:
class OrganData():
    
    def __init__(self, organ_info_json = None):
        organ_info_json = Const.organ_info_json if organ_info_json is None else organ_info_json
        with open(Const.organ_info_json) as f:
            self.organ_dict = json.load(f)
            
            
    def organ_rename_dict(self, skip_gtv = False):
        #will give a dict of names that should be standardized to a normal name
        adict = {}
        for oname, oentry in self.organ_dict.items():
            if skip_gtv and 'gtv' in oname.lower():
                continue
            #this part does not include the _L/_R or Lt_/Rt_, so it should be passedd as a subpattern to a regex
            #and the positional stuff can be fixed later
            for alt in oentry['alt_names']:
                adict[alt] = oname
        return adict
            
od = OrganData()

In [10]:
def format_patient_distances(pdist_file):
    #read the file with the centroid info, and format it for the data
    df = pd.read_csv(pdist_file)
    df = fix_tumor_names(df)
    return df

def fix_tumor_names(df):
    #fixes inconsistencies in the way the GTVs are labeled
    df = df.replace(Const.tumor_aliases)
    #tries to fix anything else for when there are a lot of gtvns.  format GTVn
    df = df.replace(to_replace = r'GTV.*N', value = 'GTVn',regex = True)
    return df

format_patient_distances(spatial_files['distances'][0])

Unnamed: 0,Reference ROI,Target ROI,Eucledian Distance (mm),Phi (degrees),Theta (degrees),% of Target Overlap,Eucledian Distance (mm) 5th Percentile
0,GTVn,Brainstem,39.13720,11.0494,-157.0113,0.000000,46.49440
1,GTVn,Cricoid_cartilage,17.75500,-3.1798,180.0000,0.000000,20.10820
2,GTVn,Cricopharyngeal_Muscle,20.85330,-8.5625,-162.4744,0.000000,23.45630
3,GTVn,Esophagus,36.54430,-20.1214,-151.5571,0.000000,43.17420
4,GTVn,Extended_Oral_Cavity,15.74660,0.0000,150.2551,0.000000,27.07940
5,GTVn,Genioglossus_M,20.10870,0.0000,150.9454,0.000000,25.69950
6,GTVn,Glottic_Area,32.21180,0.0000,165.9638,0.000000,32.39390
7,GTVn,Hard_Palate,58.87780,22.1673,135.9710,0.000000,62.11040
8,GTVn,Hyoid_bone,15.25380,12.4383,143.9726,0.000000,22.04700
9,GTVn,IPC,11.71880,0.0000,180.0000,0.000000,12.97590
