In [1]:
from IPython.display import display
from IPython.display import HTML
from IPython.display import Image
import IPython.core.display as di # Example: di.display_html('<h3>%s:</h3>' % str, raw=True)

display(HTML("<style>.container { width:1600px !important; }</style>"))

# This line will hide code by default when the notebook is exported as HTML
#di.display_html('<script>jQuery(function() {if (jQuery("body.notebook_app").length == 0) { jQuery(".input_area").toggle(); jQuery(".prompt").toggle();}});</script>', raw=True)

# This line will add a button to toggle visibility of code blocks, for use with the HTML export version
#di.display_html('''<button onclick="jQuery('.input_area').toggle(); jQuery('.prompt').toggle();">Toggle code</button>''', raw=True)

<b><font size="+3">Abbvie - Atopic Dermatitis - Analytical Report</font></b>

# About Amplity Insights and the data

![Amplity%20Health%20-%20Data%201.png](attachment:Amplity%20Health%20-%20Data%201.png)

![Amplity%20Health%20-%20Data%202.png](attachment:Amplity%20Health%20-%20Data%202.png)

**Note:** The medical transcription (MT) records are sourced from private practices, clinics, hospitals, and academic institutions across the US.

**This data is NOT EMR or Claims.**

Each MT record relates to one unique patient. There may be multiple MT records pertaining to a unique patient, either from one individual medical provider or from multiple providers.

# Study Introduction

This analytical report covers **5 views**, **10 standard** variables and **9 custom** variables.

The view consists on the following criteria:

* **Inclusion criteria:**
    * Either of the following diagnoses:
        * A. Diagnosis of Atopical Dermatitis (including Besnier's Prurigo or flexural/intrinsic/infantile/atopic Eczema)
        * B. Diagnoses of: [Eczema/Pruritus/Rash] & [Asthma/Allergic rhinitis/Food allergy]*
    * 36 months lookback period
    * Any of the following medications:
        * Dupixent (dupilumab)
        * Rinvoq (upadacitinib)
        * Cibinqo (abrocitinib)
        * Adbry (tralokinumab-ldrm)
        * Opzelura (ruxolitinib)

**(at least one dx in each category)*

We were able to locate **373** patients who meet all the above criteria.

The variables created for the analysis of this view are the following:

* **Standard Variables:**

    * PatientID: anonymized 16+ digit identifier.
    * DocumentID: filename/identifier of the medical transcription.
    * ProviderID: anonymized provider identifier.
    * State: practice state of the provider *(where available)*.
    * Age: patient age during the visit *(where available)*.
    * DOB: date of birth *(where available)*.
    * Gender: gender of the patient *(where available)*.
    * Ethnicity: ethnicity of the patient *(where available)*.
    * Specialty: specialty of the provider that generated the medical transcription *(where available)*.
    * Date: anonymized date of service. This date is shifted up to -365 days consistently per patient. Any document belonging to a given patient is shifted by the same value. Specific date shift values are unknown to Amplity Insights.
    
* **Custom Variables:** these include categorical, numerical and boolean variables with specific characteristics that were extracted using NLP text mining.

    * Taking Tx: one out of 5 treatments being taken *(5 categories)*.
    * Dx Date: year of Atopic Dermatitis diagnosis.
    * Tx Patter (Action): categorical. Any of the following treatment patterns associated to a drug:
        * "Start"
        * "Stop"
        * "Switch to"
        * "Switch from"
        * "Dose increase"
        * "Dose decrease"
        * "Continue"
    * Tx Pattern (Drug): categorical. Drug associated to the treatment pattern.
    * Comorbidities: categorical *(16 categories)*.
    * Symptoms: categorical *(11 categories)*.
    * Flares: boolean *("Y" or blank)*.
    * Sleep disturbance: boolean *("Y" or blank)*.
    * Concomitant Tx: categorical.
    * Concomitant Oral Contraceptives: boolean *("Y" or blank)*.

**Instructions:**

The below visualizations are all interactive and permit filtering, zooming, re-scaling, panning, etc. Hover over visualizations to see available tools, click and drag to zoom in for a closer view of specific bars.

In [2]:
# Libraries and Parameters
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
from matplotlib.cm import get_cmap
from matplotlib import cm
import ipywidgets as widgets

import plotly.io as pio
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots
pio.renderers.default = 'notebook'

import warnings
warnings.filterwarnings('ignore')

plt.rcParams["figure.figsize"] = (20,10)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
# Import dataset
data = pd.read_csv("Abbvie - AD - Complete dataset.csv", sep=',', dtype={'PatientID': str}, na_values= 'None')
data_part2 = pd.read_csv("Abbvie - AD - Cohort 2 dataset.csv", sep=',', dtype={'PatientID': str}, na_values='None')

In [4]:
pats = data.PatientID.nunique()
docs = data.DocumentID.nunique()

print(f'The dataset contains {pats:,.0f} unique patients.')
print(f'The datset contains {docs:,.0f} unique medical transcriptions (MT).')

The dataset contains 42,045 unique patients.
The datset contains 58,170 unique medical transcriptions (MT).


In [5]:
# Normalize state name
data.loc[data["State"] == "District Of Columbia", "State"] = "District of Columbia"

# Dictionary with codes of states
code = {'Alabama': 'AL',
        'Alaska': 'AK',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Connecticut': 'CT',
        'Delaware': 'DE',
        'District of Columbia': 'DC',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York': 'NY',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Pennsylvania': 'PA',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY'}

In [6]:
# Color dictionaries
tx_colors = {'Dupixent (dupilumab)': '#6EC7ED', 
             'Rinvoq (upadacitinib)': '#FEF086',
             'Opzelura (ruxolitinib)': '#C6DE91',
             'Adbry (tralokinumab-ldrm)': '#DD9492',
            None: '#8998C1'}

figure = 0
table = 0

In [7]:
data.Age = data.Age.astype(str)

# Part 1

In [8]:
# Define filter function
def unique_sorted_values_plus_ALL(array):
    unique = array.unique().tolist()
    unique.sort()
    unique.insert(0, 'All')
    return unique

# Create Inclusion criteria Toggle Buttons
criteria_buttons = widgets.ToggleButtons(
    options=['All AD', 'No Tx mention', 'AD & Tx mentions'],
    description='Inclusion criteria:',
    disabled=False,
    button_style='info',
    tooltips=['All patients with diagnosis of Atopic Dermatitis (with or without Tx mention)', 
              'All patients with diagnosis of Atopic Dermatitis excluding Tx mentions', 
              'All patients with diagnosis of Atopic Dermatitis and Tx mentions'],
    style={'description_width': 'auto'})

# Create ouput object for dropdowns
output = widgets.Output()
plot_age = widgets.Output()
plot_gender = widgets.Output()
plot_ethnicity = widgets.Output()
plot_specialty = widgets.Output()
plot_state = widgets.Output()


# Event handler
def criteria_filtering(criteria):
    # Clear outputs
    output.clear_output()
    plot_age.clear_output()
    plot_gender.clear_output()
    plot_ethnicity.clear_output()
    plot_specialty.clear_output()
    plot_state.clear_output()
    # Filter by criteria
    if (criteria == 'All AD'):
        criteria_filter = data
    elif (criteria == 'No Tx mention'):
        criteria_filter = data.loc[data.Tx.isna()]
    else:
        criteria_filter = data.loc[data.Tx.notna()]
        
    with output:
        display(criteria_filter.head(10))
    with plot_age:
        g = criteria_filter.groupby('PatientID').Age.max().reset_index()
        fig = px.histogram(g, x='Age')
        fig.update_layout(
            xaxis_title="Age",
            yaxis_title="Number of Patients",
            width = 900,
            height = 600)
        fig.show()
    with plot_gender:
        g = criteria_filter.groupby("Gender").PatientID.nunique().reset_index()
        fig = px.bar(g, x="Gender", y="PatientID", color="Gender", text='PatientID')
        fig.update_layout(
            xaxis_title="",
            yaxis_title="Number of patients",
            width=900,
            height=600)
        fig.show()
    with plot_ethnicity:
        g = criteria_filter.groupby("Ethnicity", dropna=False).PatientID.nunique().reset_index()
        fig = px.pie(g, values='PatientID', names='Ethnicity', color_discrete_sequence=px.colors.sequential.Blues_r)
        fig.update_layout(width=900,
                          height=600)
        fig.show()
    with plot_specialty:
        g = criteria_filter.groupby(["Specialty"]).DocumentID.nunique().reset_index()
        g = g.loc[g.DocumentID >= 5]  # Filter by Specialties that appear in more than 5 MTs
        fig = px.bar(g, y='Specialty', x='DocumentID', color='Specialty', orientation='h')
        fig.update_layout(barmode='stack', yaxis={'categoryorder': 'total ascending'})
        fig.update_layout(
            xaxis_title="Number of MTs",
            yaxis_title="Specialty",
            width=1000,
            height=800)
        fig.show()
    with plot_state:
        g = criteria_filter.groupby("State").PatientID.nunique().reset_index()
        g["Code"] = g["State"].map(code)  # match with the dictionary of State codes
        fig = px.choropleth(g,
                    locations="Code",
                    locationmode="USA-states",
                    color="PatientID",
                    color_continuous_scale="sunset_r",
                    hover_name="State",
                    scope='usa')
        fig.update_layout(width=800,
                  height=600)
        fig.show()
        
# Event handler
def criteria_buttons_eventhandler(change):
    criteria_filtering(change.new)
    
# Bind handler to Dropdown widget
criteria_buttons.observe(criteria_buttons_eventhandler, names='value')
# Layout
item_layout = widgets.Layout(margin='0 0 50px 0', justify_content='center')
# Inputs
input_widgets = widgets.Box([criteria_buttons], layout=item_layout)
# Tabs
tab = widgets.Tab([output, plot_age, plot_gender, plot_ethnicity, plot_specialty, plot_state], layout=item_layout)
tab.set_title(0, 'Dataset sample')
tab.set_title(1, 'Age distribution')
tab.set_title(2, 'Gender distribution')
tab.set_title(3, 'Ethnicity distribution')
tab.set_title(4, 'Specialty distribution')
tab.set_title(5, 'State distribution')
# Dashboard
dashboard = widgets.VBox([input_widgets, tab])
display(dashboard)

VBox(children=(Box(children=(ToggleButtons(button_style='info', description='Inclusion criteria:', options=('A…