In [None]:
#libraries
import pandas as pd

import numpy as np
import datetime as dt
pd.set_option("display.max_rows", None)

In [None]:
#read csv
cohort_filter = pd.read_csv('../datasets/COHORT_FILTER.csv')
concept = pd.read_csv('../datasets/CONCEPT.csv')
condition_occurrence = pd.read_csv('../datasets/CONDITION_OCCURRENCE.csv')
drug_exposure = pd.read_csv('../datasets/DRUG_EXPOSURE.csv')
person = pd.read_csv('../datasets/PERSON.csv')
hierarchy = pd.read_csv('../datasets/hierarchy.csv')
props = pd.read_csv('../datasets/hemonc_component_properties.csv')

In [None]:
#rxnorm = props[props['vocabulary_id']=='RxNorm']

#list of valid drug categories from Ivy from RxNorm/HemOnc
sact=['Alkylating agent', 'Anti-CD38 antibody', 'Anti-CTLA-4 antibody', 'Anti-TACSTD2 antibody-drug conjugate', 'Anthracycline', 'Antiandrogen', 'Antifolate',
'Antimetabolite', 'Antitumor antibiotic', 'Anti-CD52 antibody', 'Anti-CD20 antibody', 'Anti-EGFR antibody', 'Anti-HER2 antibody', 'Anti-CD38 antibody', 'Anti-PD-1 antibody',
'Anti-PD-L1 antibody', 'Anti-RANKL antibody', 'Anti-SLAMF7 antibody','Anti-VEGF antibody', 'Aromatase inhibitor', 'Aromatase inhibitorsthird generation',
'Biosimilar', 'BRAF inhibitor', 'DNA methyltransferase inhibitor', 'Deoxycytidine analog', 'EGFR inhibitor', 'ERBB 2 inhibitor', 'Estrogen receptor inhibitor',
'Folic acid analog', 'Fluoropyrimidine', 'GnRH agonist', 'HDAC inhibitor', 'Human DNA synthesisinhibitor', 'Microtubule inhibitor', 'MTOR inhibitor',
'Nitrogen mustard', 'Nitrosourea', 'Neutral', 'PARP inhibitor', 'PARP1 inhibitor', 'PARP2 inhibitor', 'Phenothiazine', 'Platinum agent', 'Proteasome inhibitor',
'Purine analog', 'Pyrimidine analog', 'RANK ligand inhibitor', 'Selective estrogen receptor modulator', 'Somatostatin analog', 'T-cell activator',
'Targeted therapeutic', 'Taxane', 'Topoisomerase I inhibitor', 'Topoisomerase II inhibitor', 'Triazene', 'Vinca alkaloid', 'Xanthine oxidase inhibitor',
'WHO Essential Cancer Medicine']
#rxnorm = rxnorm[rxnorm['component_class_name'].isin(sact)]
props=props[props['component_class_name'].isin(sact)]
antican = props['concept_id_2']
drug_exposure=drug_exposure[drug_exposure['drug_concept_id'].isin(antican)]
#rxnorm['component_class_name'].value_counts()

In [None]:
#concept lookup in concept df
concept_lookup = {c.concept_id: c.concept_name for c in concept.itertuples()}

In [None]:
#add labels
def make_labels(df):
    for c in df.columns:
        if 'concept_id' in c:
            df[c.replace('_id', '_label')] = df[c].map(concept_lookup)
        if 'concept_id' in c or 'source' in c or len(df[df[c].notna()])==0:
            df = df.drop(c, axis=1)
    return df

In [None]:
#label the following dfs
cohort_filter_labelled = make_labels(cohort_filter)
condition_occurrence_labelled = make_labels(condition_occurrence)
drug_exposure_labelled = make_labels(drug_exposure)
person_labelled = make_labels(person)
exclusions = ['dexamethasone']
drug_exposure_labelled=drug_exposure_labelled[~drug_exposure_labelled['drug_concept_label'].isin(exclusions)]

In [None]:
'''# filter only by occurrences of Squamous cell carcinoma, NOS, of glottis
glottis = condition_occurrence[condition_occurrence.condition_concept_id==44500236]
# patient IDs matching this occurrence
glottis_patients = glottis.person_id.tolist()'''

In [None]:
'''# mask the drug exposures only by people matching the condition
mask = drug_exposure_labelled['person_id'].isin(glottis_patients)
masked = drug_exposure_labelled[mask]'''

In [None]:
#reduce DF down to relevant variables for the visualization
small = drug_exposure_labelled[['person_id', 'drug_exposure_start_datetime', 'drug_concept_label']]
small.fillna('N/A', inplace=True)
small = small.drop_duplicates()
small_sorted = small.sort_values('drug_concept_label')
small['drug_concept_label'] = small_sorted.groupby(['person_id', 'drug_exposure_start_datetime'])['drug_concept_label'].transform(lambda x : ' & '.join(x))
#small.head()
#small['drug_concept_label'] = small['drug_concept_label'].str.replace('dexamethasone & cisplatin','cisplatin & dexamethasone')
#small['drug_concept_label'] = small['drug_concept_label'].str.replace('dexamethasone & cetuximab','cetuximab & dexamethasone')
#small['drug_concept_label'] = small['drug_concept_label'].str.replace('dexamethasone & carboplatin','carboplatin & dexamethasone')
small_nodup = small.drop_duplicates()
small_nodup['drug_concept_label']=small_nodup['drug_concept_label'].str.replace('& ', '&<br>')

In [None]:
# add new variable for every new drug administration per person
readministrations = pd.Series(np.zeros(len(small_nodup),dtype=int),index=small_nodup.index)

In [None]:
# Loop through all unique ids                                                                                                                                                                                      
all_id = small_nodup['person_id'].unique()
id_administrations = {}
for pid in all_id:
    # These are all the times a patient with a given ID has had surgery                                                                                                                                            
    patient = small_nodup.loc[small_nodup['person_id']==pid]
    administrations_sorted = pd.to_datetime(patient['drug_exposure_start_datetime'], format='%Y-%m-%d %H:%M:%S').sort_values()

# This checks if the previous surgery was longer than 180 days ago                                                                                                                                              
    frequency = administrations_sorted.diff()<dt.timedelta(days=6000)

    # Compute the readmission                                                                                                                                                                                      
    n_administrations = [0]
    for v in frequency.values[1:]:
       n_administrations.append((n_administrations[-1]+1)*v)

    # Add these value to the time series                                                                                                                                                                           
    readministrations.loc[administrations_sorted.index] = n_administrations

small_nodup['readministration'] = readministrations

In [None]:
#pivot the DF from long to wide
pivoted = small_nodup.pivot(index='person_id', columns='readministration', values='drug_concept_label').reset_index()
# add the prefix 'drug' to every instance
prefixed = pivoted.add_prefix('drug')
#remove the word 'drug' from other variables
renamed = prefixed.rename(columns={"drugperson_id": "person_id", "readministration":"index"})
#fill all empty cells with "N/A"
fillednones = renamed.fillna(" ")

In [None]:
#add a value of 1 to all data points for sums in the visualization
fillednones["value"] = 1
fillednones.head()

In [None]:
import plotly.express as px
import nbformat
#wrap text:
import textwrap

def customwrap(s, width=30):
    return "<br>".join(textwrap.wrap(s,width=width))

df = fillednones
'''
d0 = df.drug0.map(customwrap)
d1 = df.drug1.map(customwrap)
d2 = df.drug2.map(customwrap)
d3 = df.drug3.map(customwrap)
d4 = df.drug4.map(customwrap)
d5 = df.drug5.map(customwrap)
myvalue = df.value

df = pd.DataFrame(
    dict(drug0=d0, drug1=d1, drug2=d2, drug3=d3, drug4=d4, drug5=d5, value=myvalue)
)'''

fig = px.icicle(df, path=[px.Constant('All'),'drug0', 'drug1', 'drug2', 'drug3', 'drug4', 'drug5'], 
values='value',
color='drug0', branchvalues='remainder')
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0),hovermode=False,)

#set marker colors whose labels are " " to transparent
marker_colors=list(fig.data[0].marker['colors'])
marker_labels=list(fig.data[0]['labels'])
new_marker_colors=["rgba(0,0,0,0)" if label=="" else color for (color, label) in zip(marker_colors, marker_labels)]
marker_colors=new_marker_colors

fig.data[0].marker['colors'] = marker_colors
fig.show()

In [None]:
import plotly.express as px
import nbformat
#wrap text:
import textwrap

def customwrap(s, width=30):
    return "<br>".join(textwrap.wrap(s,width=width))

df = fillednones

fig = px.icicle(df, path=[px.Constant('All'),'drug0', 'drug1', 'drug2', 'drug3', 'drug4', 'drug5'], 
values='value',
color='drug0', branchvalues='total')
fig.update_layout(margin = dict(t=0, l=0, r=0, b=0),hovermode=False,)

#set marker colors whose labels are " " to transparent
marker_colors=list(fig.data[0].marker['colors'])
marker_labels=list(fig.data[0]['labels'])
new_marker_colors=["rgba(0,0,0,0)" if label==" " else color for (color, label) in zip(marker_colors, marker_labels)]
marker_colors=new_marker_colors

fig.data[0].marker['colors'] = marker_colors
fig.show()

In [None]:
'''
def customwrap(s, width=30):
    return "<br>".join(textwrap.wrap(s,width=width))

''''''df = fillednones

d0=df.drug0.map(customwrap)
d1=df.drug1.map(customwrap)
d2=df.drug2.map(customwrap)
d3=df.drug3.map(customwrap)
d4=df.drug4.map(customwrap)
d5=df.drug5.map(customwrap)
myvalue=df.value

df2 = pd.DataFrame(dict(drug0=d0, drug1=d1, drug2=d2, drug3=d3, drug4=d4, drug5=d5, value=myvalue))''''''

fig = px.sunburst(df, path=['drug0', 'drug1', 'drug2', 'drug3', 'drug4', 'drug5'], values='value', color='drug0', branchvalues='remainder')
fig.update_layout(hovermode=False)

#set marker colors whose labels are " " to transparent
marker_colors=list(fig.data[0].marker['colors'])
marker_labels=list(fig.data[0]['labels'])
new_marker_colors=["rgba(0,0,0,0)" if label=="" else color for (color, label) in zip(marker_colors, marker_labels)]
marker_colors=new_marker_colors

fig.data[0].marker['colors'] = marker_colors
fig.show()
fig.write_html("plot.html")
'''

In [None]:
df = fillednones


fig = px.sunburst(df, path=[px.Constant('All'),'drug0', 'drug1', 'drug2', 'drug3', 'drug4', 'drug5', 'drug6', 'drug7', 'drug8', 'drug9', 'drug10'], values='value', color='drug0')
#fig.update_layout(hovermode=False)

#set marker colors whose labels are " " to transparent
marker_colors=list(fig.data[0].marker['colors'])
marker_labels=list(fig.data[0]['labels'])
new_marker_colors=["rgba(0,0,0,0)" if label==" " else color for (color, label) in zip(marker_colors, marker_labels)]
marker_colors=new_marker_colors

fig.data[0].marker['colors'] = marker_colors
config={'scrollZoom':True}
fig.show(config=config)
fig.write_html("sbplot.html")

In [None]:
df.head()