# Patient trajectory and condition analysis

## Required libraries and packages

In [1]:
import pandas as pd
import numpy as np

import glob
import pprint 
import plotly
import plotly.express as px
import plotly.graph_objs as go


import folium
from folium.plugins import MarkerCluster
from itertools import chain
import math
from collections import Counter
from prettytable import PrettyTable
from ast import literal_eval
from IPython import display


import warnings
warnings.filterwarnings("ignore")

print("Pandas version:", pd.__version__)
print("Numpy version:", np.__version__)
print("plotly version:", plotly.__version__)
print("folium version:", folium.__version__)

Pandas version: 1.5.3
Numpy version: 1.22.4
plotly version: 5.13.1
folium version: 0.14.0


# Utilities

In [2]:
def count_missing_values(data):
    """
    Counts the number of missing values in each column of a DataFrame and returns the results as a new DataFrame.
    """
    missing_values = data.isnull().sum()
    missing_df = pd.DataFrame({'column_name': missing_values.index, 'missing_count': missing_values.values})
    return missing_df

In [3]:
def is_float(string):
    """
    Check either is a string number or not
    """
    
    try:
        float(string)
        return True
    except ValueError:
        return False

def is_number(string):
    return string.isdigit() or is_float(string)


def count_values(value_list, topk=10, verbose=True, key_name="KEY", val_name="VALUE", ignore_number=False):
    # remove nan values
    values = [[v] if not isinstance(v, list) else v for v in value_list if v == v]
    # flatten the list of list into a list
    values = list(chain(*values))
    # ignore numbers
    if ignore_number:
        values = list(filter(lambda x: not is_float(x), values))
    
    counter = Counter()
    counter.update(values)
    most_common = counter.most_common(topk) if topk > 0 else counter.most_common()
    
    if verbose:
        tb = PrettyTable([key_name, val_name])
        for item in most_common:
            tb.add_row([item[0], item[1]])
        print(tb)
    

In [4]:
def contains_x(value, x):
    if value != value:
        return False
    
    if isinstance(value, str) and value == x:
        return False
    
    if isinstance(value, list) and x in value:
        return True
    
    return False

In [5]:
def fix_data_type(row):
    new_row = {}
    for key, value in row.to_dict().items():
        
        try:
            value = literal_eval(value)
        except (ValueError, SyntaxError):
            pass
        
        if "DATE" in key or "START" in key or "STOP" in key:
            if isinstance(value, list):
                value = [pd.to_datetime(v, errors='coerce') for v in value]
            else:
                value = pd.to_datetime(value, errors='coerce')
            
        new_row[key] = value
        
    return pd.Series(new_row)

# Load final dataset

In [7]:
data = pd.read_csv('final_data.csv', sep='\t') 
data = data.apply(fix_data_type, axis=1)
data.head(1)

Unnamed: 0,PATIENT_Id,PATIENT_BIRTHDATE,PATIENT_DEATHDATE,PATIENT_SSN,PATIENT_FIRST,PATIENT_LAST,PATIENT_MARITAL,PATIENT_RACE,PATIENT_ETHNICITY,PATIENT_GENDER,...,ALLERGY_DESCRIPTION,DEVICE_START,DEVICE_STOP,DEVICE_CODE,DEVICE_DESCRIPTION,DEVICE_UDI,IMMUN_DATE,IMMUN_CODE,IMMUN_DESCRIPTION,IMMUN_BASE_COST
0,034e9e3b-2def-4559-bb2a-7850888ae060,1983-11-14,NaT,999-73-5361,Milo271,Feil794,M,white,nonhispanic,M,...,,NaT,NaT,,,,NaT,,,


In [8]:
count_missing_values(data)

Unnamed: 0,column_name,missing_count
0,PATIENT_Id,0
1,PATIENT_BIRTHDATE,0
2,PATIENT_DEATHDATE,39059
3,PATIENT_SSN,0
4,PATIENT_FIRST,0
...,...,...
123,DEVICE_UDI,53268
124,IMMUN_DATE,42978
125,IMMUN_CODE,42978
126,IMMUN_DESCRIPTION,42978


In [9]:
print(data.dtypes.value_counts())

object                 91
float64                18
int64                  14
datetime64[ns]          3
datetime64[ns, UTC]     2
dtype: int64


# Patient trjectory 

Patient trajectory refers to the path that a patient takes through the healthcare system over time, including their interactions with different healthcare providers and the healthcare services they receive. The patient trajectory can be complex and may involve multiple transitions between different levels of care, such as hospitalization, outpatient care, rehabilitation, and long-term care.

In [10]:
data.groupby("PATIENT_Id").size().reset_index(name='count').sort_values('count', ascending=False).head(20)

Unnamed: 0,PATIENT_Id,count
924,cae10920-f977-48b4-a0d3-4d70ad561fd1,2006
120,19d2cfb8-439b-454a-b47e-5274c219005b,1606
268,3f336702-bf73-4fc8-bd59-3ba77fd65d0d,1538
253,3acf9313-1874-4dff-ab2a-3187516d92d6,826
942,cecb7ece-fb70-4a7f-b51d-6e8dd1506ad9,452
261,3dd2dd29-7cd0-48f7-b859-136c6e9d36e0,385
470,714b9c18-783d-4f52-aa64-cc3a05a286d9,375
199,2c71dd97-7085-416a-aa07-d675bbe3adf2,367
461,6ec18ddf-e9ee-421a-9033-456f558c7b4b,367
89,137acc1b-dbca-473e-84bb-fe1baaf98819,346


In [11]:
patient_id = '034e9e3b-2def-4559-bb2a-7850888ae060'
patient = data[data['PATIENT_Id'] == patient_id]
enconter_columns = list(filter(lambda x: "ENCOUNTER" in x, patient.columns.tolist()))
display.display(patient[enconter_columns].head())
if not patient.empty:
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_START'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Start'))
    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_STOP'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Stop'))

    for _, row in patient.iterrows():
        fig.add_shape(
            type='line',
            x0=row['ENCOUNTER_START'], x1=row['ENCOUNTER_STOP'],
            y0=row['ENCOUNTER_Id'], y1=row['ENCOUNTER_Id'],
            yref='y', xref='x',
            line=dict(color='black', width=1)
        )

    fig.update_layout(
        title=f'Trajectory for patient {patient_id}',
        xaxis_title='Timeline',
        yaxis_title='',
        yaxis=dict(tickmode='linear')
    )

    # set the x-axis range
    min_date = patient['ENCOUNTER_START'].min() - pd.Timedelta(days=30)
    max_date = patient['ENCOUNTER_STOP'].max() + pd.Timedelta(days=30)

    fig.update_xaxes(range=[min_date, max_date])
    fig.update_yaxes(title_text='', showticklabels=False)
    
    fig.show()
else:
    print("No encounters found")

Unnamed: 0,ENCOUNTER_Id,ENCOUNTER_START,ENCOUNTER_STOP,ENCOUNTER_ENCOUNTERCLASS,ENCOUNTER_CODE,ENCOUNTER_DESCRIPTION,ENCOUNTER_BASE_ENCOUNTER_COST,ENCOUNTER_TOTAL_CLAIM_COST,ENCOUNTER_PAYER_COVERAGE,ENCOUNTER_REASONCODE,ENCOUNTER_REASONDESCRIPTION,PAYER_COVERED_ENCOUNTERS,PAYER_UNCOVERED_ENCOUNTERS
0,d0c40d10-8d87-447e-836e-99d26ad52ea5,2010-01-23 17:45:28+00:00,2010-01-23 18:10:28+00:00,ambulatory,185345009,Encounter for symptom,129.16,129.16,54.16,10509002.0,Acute bronchitis (disorder),16168,0
1,e88bc3a9-007c-405e-aabc-792a38f4aa2b,2012-01-23 17:45:28+00:00,2012-01-23 18:00:28+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,129.16,,,16168,0
14,e3639187-450a-4aa9-9fc9-3ea3cafa672b,2015-01-26 17:45:28+00:00,2015-01-26 18:15:28+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,129.16,,,16168,0
26,3b639086-5fbc-4720-8c31-e8c8c0f1d660,2016-12-29 17:45:28+00:00,2016-12-29 18:00:28+00:00,ambulatory,185345009,Encounter for symptom,129.16,129.16,54.16,10509002.0,Acute bronchitis (disorder),16168,0
35,4607e1a1-f2bb-42c8-a2fd-8709331a935d,2017-01-09 17:45:28+00:00,2017-01-09 18:00:28+00:00,outpatient,185349003,Encounter for check up (procedure),129.16,129.16,54.16,,,16168,0


In [12]:
patient_id = "9fda53d4-6fcc-4ef5-a1fe-16e007182ec2"
patient = data[data["PATIENT_Id"] == patient_id]
enconter_columns = list(filter(lambda x: "ENCOUNTER" in x, patient.columns.tolist()))
display.display(patient[enconter_columns].head())
if not patient.empty:
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_START'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Start'))
    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_STOP'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Stop'))

    for _, row in patient.iterrows():
        fig.add_shape(
            type='line',
            x0=row['ENCOUNTER_START'], x1=row['ENCOUNTER_STOP'],
            y0=row['ENCOUNTER_Id'], y1=row['ENCOUNTER_Id'],
            yref='y', xref='x',
            line=dict(color='black', width=1)
        )

    fig.update_layout(
        title=f'Trajectory for patient {patient_id}',
        xaxis_title='Timeline',
        yaxis_title='',
        yaxis=dict(tickmode='linear')
    )

    # set the x-axis range
    min_date = patient['ENCOUNTER_START'].min() - pd.Timedelta(days=30)
    max_date = patient['ENCOUNTER_STOP'].max() + pd.Timedelta(days=30)

    fig.update_xaxes(range=[min_date, max_date])
    fig.update_yaxes(title_text='', showticklabels=False)
    
    fig.show()
else:
    print("No encounters found")

Unnamed: 0,ENCOUNTER_Id,ENCOUNTER_START,ENCOUNTER_STOP,ENCOUNTER_ENCOUNTERCLASS,ENCOUNTER_CODE,ENCOUNTER_DESCRIPTION,ENCOUNTER_BASE_ENCOUNTER_COST,ENCOUNTER_TOTAL_CLAIM_COST,ENCOUNTER_PAYER_COVERAGE,ENCOUNTER_REASONCODE,ENCOUNTER_REASONDESCRIPTION,PAYER_COVERED_ENCOUNTERS,PAYER_UNCOVERED_ENCOUNTERS
12191,6bf0fde0-52f9-463c-8aa8-64fe948d5171,2000-02-25 11:19:49+00:00,2000-02-25 11:34:49+00:00,ambulatory,185347001,Encounter for problem,129.16,129.16,0.0,24079001.0,Atopic dermatitis,0,10116
12192,da2fa819-307a-410d-9b95-6ea21a2f19ae,2000-03-10 11:19:49+00:00,2000-03-10 12:06:49+00:00,ambulatory,185347001,Encounter for problem,129.16,129.16,0.0,,,0,10116
12193,c5523cfc-caba-4404-a7d2-1265b5ebe274,2001-05-30 11:19:49+00:00,2001-06-13 11:19:49+00:00,ambulatory,185345009,Encounter for symptom,129.16,129.16,0.0,232353008.0,Perennial allergic rhinitis with seasonal vari...,0,10116
12194,06c240ca-8175-4e6e-83de-8f1ceb2677fd,2003-05-30 11:19:49+00:00,2003-05-30 11:49:49+00:00,ambulatory,185345009,Encounter for symptom,129.16,129.16,0.0,233678006.0,Childhood asthma,0,10116
12195,9946bd5c-fd8d-41d8-9222-400c9b064fb2,2004-05-03 11:19:49+00:00,2004-05-03 11:34:49+00:00,wellness,410620009,Well child visit (procedure),129.16,129.16,74.16,,,6085,7422


In [13]:
patient_id = "3acf9313-1874-4dff-ab2a-3187516d92d6"
patient = data[data["PATIENT_Id"] == patient_id]
enconter_columns = list(filter(lambda x: "ENCOUNTER" in x, patient.columns.tolist()))
display.display(patient[enconter_columns].head())
if not patient.empty:
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_START'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Start'))
    fig.add_trace(go.Scatter(x=patient['ENCOUNTER_STOP'], y=patient['ENCOUNTER_Id'], text=patient['ENCOUNTER_ENCOUNTERCLASS'], mode='markers', name='Stop'))

    for _, row in patient.iterrows():
        fig.add_shape(
            type='line',
            x0=row['ENCOUNTER_START'], x1=row['ENCOUNTER_STOP'],
            y0=row['ENCOUNTER_Id'], y1=row['ENCOUNTER_Id'],
            yref='y', xref='x',
            line=dict(color='black', width=1)
        )

    fig.update_layout(
        title=f'Trajectory for patient {patient_id}',
        xaxis_title='Timeline',
        yaxis_title='',
        yaxis=dict(tickmode='linear')
    )

    # set the x-axis range
    min_date = patient['ENCOUNTER_START'].min() - pd.Timedelta(days=30)
    max_date = patient['ENCOUNTER_STOP'].max() + pd.Timedelta(days=30)

    fig.update_xaxes(range=[min_date, max_date])
    fig.update_yaxes(title_text='', showticklabels=False)
    
    fig.show()
else:
    print("No encounters found")

Unnamed: 0,ENCOUNTER_Id,ENCOUNTER_START,ENCOUNTER_STOP,ENCOUNTER_ENCOUNTERCLASS,ENCOUNTER_CODE,ENCOUNTER_DESCRIPTION,ENCOUNTER_BASE_ENCOUNTER_COST,ENCOUNTER_TOTAL_CLAIM_COST,ENCOUNTER_PAYER_COVERAGE,ENCOUNTER_REASONCODE,ENCOUNTER_REASONDESCRIPTION,PAYER_COVERED_ENCOUNTERS,PAYER_UNCOVERED_ENCOUNTERS
41442,9911370d-0d61-475f-8982-50d8d9ec0f51,1934-06-25 13:57:11+00:00,1934-06-25 14:27:11+00:00,wellness,410620009,Well child visit (procedure),129.16,129.16,69.16,,,11971,1745
41443,d7fe5ec7-27b4-4ab2-b9cc-3d6fea8a2e05,1935-07-01 13:57:11+00:00,1935-07-01 14:12:11+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,69.16,,,11971,1745
41444,30d7c24e-64a8-42d0-a282-10f2a453129a,1936-07-06 13:57:11+00:00,1936-07-06 14:27:11+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,69.16,,,11971,1745
41445,95913884-94cd-45b7-bafe-1faa88413e05,1937-07-12 13:57:11+00:00,1937-07-12 14:27:11+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,69.16,,,11971,1745
41446,bf3eb140-5545-4964-888f-f2e28b1b24b0,1938-07-18 13:57:11+00:00,1938-07-18 14:12:11+00:00,wellness,162673000,General examination of patient (procedure),129.16,129.16,69.16,,,11971,1745


# Top conditions analysis

**Analysis**

- **Symptoms:** there is some similarity between these three disorders as all of them are caused by viruses and affect the respiratory system. Acute bronchitis is an inflammation of the bronchial tubes, Acute viral pharyngitis is an inflammation of the pharynx (back of the throat), and Viral sinusitis is an inflammation of the sinuses. They may share similar symptoms like cough, sore throat, congestion, and fatigue.
- **Location:** Most of patients in three top conditions are located in Massachusetts. In Massachusetts, the climate is generally humid, with warm summers and cold winters. The humidity levels can vary throughout the year, with higher levels in the summer months. The climate is influenced by the state's location on the East Coast of the United States, with the Atlantic Ocean to the east and the Appalachian Mountains to the west.This can lead to increased moisture in the sinuses, which can exacerbate sinusitis symptoms. Additionally, temperature changes and barometric pressure changes, which can occur frequently in Massachusetts, can also trigger infections symptoms in some individuals.
- **Severity:** In all three conditions most of patients are able to walk and move around without assistance or support.
- **Weather:** They are more likely to occur during the colder months of the year when people spend more time indoors in close proximity to one another, which increases the risk of transmission of the viruses that cause these infections. Beased on the information condition one is mostly happened in spring and winter,, condition two happend in winter mostly and condition three happend in spring and summer. 
- **Immune system:** Unfortunetly, there are not enough information for conclusion.
- **Life-style:** Other factors that can increase the risk of these infections are exposuring to irritants such as cigarette smoke or pollution.
Most of the patients are not smoker. But based on the infromation we could not make the conclusion that the life style is not matter in these conditions.
- **Allergy:** Unfortunetly, there are not enough information for conclusion.

# Preprocessing stage

The information for patient contains:
- Admission / discharge time
- Dermographic infomration 
- Hospital / insurance information
- Vital sign observation
- Medication duration and type of medication
- Careplan timeline (in or outside of hospital)
- Device and immunization related to surgery / tracking or patient

In [14]:
data.iloc[0].to_dict()

{'PATIENT_Id': '034e9e3b-2def-4559-bb2a-7850888ae060',
 'PATIENT_BIRTHDATE': Timestamp('1983-11-14 00:00:00'),
 'PATIENT_DEATHDATE': NaT,
 'PATIENT_SSN': '999-73-5361',
 'PATIENT_FIRST': 'Milo271',
 'PATIENT_LAST': 'Feil794',
 'PATIENT_MARITAL': 'M',
 'PATIENT_RACE': 'white',
 'PATIENT_ETHNICITY': 'nonhispanic',
 'PATIENT_GENDER': 'M',
 'PATIENT_BIRTHPLACE': 'Danvers  Massachusetts  US',
 'PATIENT_ADDRESS': '422 Farrell Path Unit 69',
 'PATIENT_CITY': 'Somerville',
 'PATIENT_STATE': 'Massachusetts',
 'PATIENT_COUNTY': 'Middlesex County',
 'PATIENT_LAT': 42.36069732777065,
 'PATIENT_LON': -71.12653095133149,
 'PATIENT_HEALTHCARE_EXPENSES': 793946.01,
 'PATIENT_HEALTHCARE_COVERAGE': 3204.49,
 'PATIENT_AGE': 39,
 'ENCOUNTER_Id': 'd0c40d10-8d87-447e-836e-99d26ad52ea5',
 'ENCOUNTER_START': Timestamp('2010-01-23 17:45:28+0000', tz='UTC'),
 'ENCOUNTER_STOP': Timestamp('2010-01-23 18:10:28+0000', tz='UTC'),
 'ENCOUNTER_ENCOUNTERCLASS': 'ambulatory',
 'ENCOUNTER_CODE': 185345009,
 'ENCOUNTER_DE

# Summary of patient condition

According to result top three conditions are:

- **Viral sinusitis (disorder)**
- **Acute viral pharyngitis (disorder)**
- **Acute bronchitis (disorder)** 

In [15]:
print('Encounter description: ')
_ = count_values(data['ENCOUNTER_DESCRIPTION'], topk=10)
print()
print('Careplan description: ')
_ = count_values(data['CAREPLAN_DESCRIPTION'], topk=10)
print()
print('Condition description: ')
_ = count_values(data['COND_DESCRIPTION'], topk=10)
print()

Encounter description: 
+--------------------------------------------+-------+
|                    KEY                     | VALUE |
+--------------------------------------------+-------+
| General examination of patient (procedure) | 14946 |
|     Encounter for problem (procedure)      |  5196 |
|     Encounter for check up (procedure)     |  4515 |
|        Well child visit (procedure)        |  4144 |
|           Encounter for symptom            |  3929 |
|               Prenatal visit               |  2676 |
|       Urgent care clinic (procedure)       |  2373 |
|           Encounter for problem            |  2291 |
|            Follow-up encounter             |  2282 |
|        Patient encounter procedure         |  1624 |
+--------------------------------------------+-------+

Careplan description: 
+--------------------------------------------+-------+
|                    KEY                     | VALUE |
+--------------------------------------------+-------+
|            Resp

# Invistigate first condition: Viral sinusitis (disorder)

Sinusitis is an inflammation of the sinuses, which can be caused by several factors, including infections, allergies, and environmental irritants. Weather conditions, such as humidity, temperature changes, and barometric pressure changes, can also affect sinusitis symptoms.


**Analysis**

```bash
Medication: 
+----------------------------------------------------------------------------------------+-------+
|                                          KEY                                           | VALUE |
+----------------------------------------------------------------------------------------+-------+
|                                          None                                          |  946  |
|                  Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet                   |  247  |
|         120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler         |   20  |
|           NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler           |   20  |
|                   Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet                    |   16  |
+----------------------------------------------------------------------------------------+-------+

Median age: 
AGE (median): 50

Patient state: 
+---------------+-------+
|      KEY      | VALUE |
+---------------+-------+
| Massachusetts |  1248 |
+---------------+-------+

Visit months:
+-------+-------+
| Month | VALUE |
+-------+-------+
|   4   |  126  |
|   7   |  114  |
|   11  |  110  |
|   5   |  105  |
+-------+-------+

Type of condition-admission:
+------------+-------+
|    KEY     | VALUE |
+------------+-------+
| ambulatory |  1248 |
+------------+-------+

Life-style patients:
+--------------------------+-------+
|           KEY            | VALUE |
+--------------------------+-------+
|       Never smoker       |  108  |
|      Former smoker       |   34  |
| Current every day smoker |   1   |
+--------------------------+-------+

Allergy:
+-----+-------+
| KEY | VALUE |
+-----+-------+
+-----+-------+

```

In [16]:
condition_one = data[data["COND_DESCRIPTION"].apply(lambda x: contains_x(x, "Viral sinusitis (disorder)"))]
condition_one.head(1)

Unnamed: 0,PATIENT_Id,PATIENT_BIRTHDATE,PATIENT_DEATHDATE,PATIENT_SSN,PATIENT_FIRST,PATIENT_LAST,PATIENT_MARITAL,PATIENT_RACE,PATIENT_ETHNICITY,PATIENT_GENDER,...,ALLERGY_DESCRIPTION,DEVICE_START,DEVICE_STOP,DEVICE_CODE,DEVICE_DESCRIPTION,DEVICE_UDI,IMMUN_DATE,IMMUN_CODE,IMMUN_DESCRIPTION,IMMUN_BASE_COST
7,8d4c4326-e9de-4f45-9a4c-f8c36bff89ae,1978-05-27,NaT,999-85-4926,Mariana775,Rutherford999,M,white,nonhispanic,F,...,,,NaT,,,,,,,


In [17]:
print("Medication: ")
_ = count_values(condition_one['MEDICATION_DESCRIPTION'], topk=10)

print()
print(" Median age: ")
print(f"AGE (median): {int(condition_one['PATIENT_AGE'].median())}")

print()
print(" Patient state: ")
_ = count_values(condition_one['PATIENT_STATE'], topk=1)

Medication: 
+----------------------------------------------------------------------------------------+-------+
|                                          KEY                                           | VALUE |
+----------------------------------------------------------------------------------------+-------+
|                  Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet                   |  247  |
|         120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler         |   20  |
|           NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler           |   20  |
|                   Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet                    |   16  |
|                       Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray                        |   12  |
|                         Hydrochlorothiazide 25 MG Oral Tablet                          |   11  |
| amLODIPine 5 MG / Hydrochlorothiazide 12.5 MG / Olmesartan medoxomil 20 MG Oral Tablet |   11 

In [18]:
fig = px.histogram(condition_one, x="PATIENT_AGE", nbins=200, marginal="box", color_discrete_sequence=["#FF1493"])

fig.update_layout(
    title="Age Distribution",
    xaxis_title="Age",
    yaxis_title="Count",
    font=dict(
        family="Arial",
        size=14,
        color="#6495ED"
    ),
    bargap=0.1
)

fig.show()

In [19]:
print("Visit months:")
_ = count_values(condition_one["ENCOUNTER_START"].dt.month, 4, key_name="Month")
print()

print("Immunization:")
_ = count_values(condition_one["IMMUN_DESCRIPTION"], 4)
print()

print("Type of condition-admission:")
_ = count_values(condition_one["ENCOUNTER_ENCOUNTERCLASS"], ignore_number=True)
print()

print("Type of condition-admission:")
_ = count_values(condition_one["ENCOUNTER_ENCOUNTERCLASS"], ignore_number=True)
print()

print("Life-style patients:")
_ = count_values(condition_one["OBS_VALUE"], ignore_number=True)
print()

print("Allergy:")
_ = count_values(condition_one["ALLERGY_DESCRIPTION"], ignore_number=True)
print()

Visit months:
+-------+-------+
| Month | VALUE |
+-------+-------+
|   4   |  126  |
|   7   |  114  |
|   11  |  110  |
|   5   |  105  |
+-------+-------+

Immunization:
+-----+-------+
| KEY | VALUE |
+-----+-------+
+-----+-------+

Type of condition-admission:
+------------+-------+
|    KEY     | VALUE |
+------------+-------+
| ambulatory |  1248 |
+------------+-------+

Type of condition-admission:
+------------+-------+
|    KEY     | VALUE |
+------------+-------+
| ambulatory |  1248 |
+------------+-------+

Life-style patients:
+--------------------------+-------+
|           KEY            | VALUE |
+--------------------------+-------+
|       Never smoker       |  108  |
|      Former smoker       |   34  |
| Current every day smoker |   1   |
+--------------------------+-------+

Allergy:
+-----+-------+
| KEY | VALUE |
+-----+-------+
+-----+-------+



# Invistigate second condition: Acute viral pharyngitis (disorder)

Acute viral pharyngitis is a medical condition commonly known as a sore throat. It is an inflammation of the pharynx, the back of the throat, usually caused by a viral infection. Symptoms include a scratchy or sore throat, pain or difficulty when swallowing, swollen glands in the neck, and fever. In most cases, it is a self-limited illness that will resolve on its own within a few days without specific treatment, but symptomatic treatment such as pain relief and hydration can help to alleviate symptoms.

```bash
Medications:
+------------------------------------------------------------------------------------------------------+-------+
|                                                 KEY                                                  | VALUE |
+------------------------------------------------------------------------------------------------------+-------+
|                                                 None                                                 |  630  |
|                              Penicillin V Potassium 250 MG Oral Tablet                               |   9   |
|                                Hydrochlorothiazide 25 MG Oral Tablet                                 |   4   |
|                120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler                |   3   |
+------------------------------------------------------------------------------------------------------+-------+


Median age: 
AGE (median): 46

 Patient state: 
+---------------+-------+
|      KEY      | VALUE |
+---------------+-------+
| Massachusetts |  653  |
+---------------+-------+

Visit months:
+-------+-------+
| Month | VALUE |
+-------+-------+
|   1   |   67  |
|   7   |   66  |
|   10  |   61  |
|   2   |   56  |
+-------+-------+

Immunization:
+------+-------+
| KEY  | VALUE |
+------+-------+
| None |  653  |
+------+-------+

Type of condition-admission:
+------------+-------+
|    KEY     | VALUE |
+------------+-------+
| ambulatory |  653  |
+------------+-------+

Life-style patients:
+--------------------------+-------+
|           KEY            | VALUE |
+--------------------------+-------+
|       Never smoker       |   31  |
|      Former smoker       |   6   |
| Current every day smoker |   1   |
+--------------------------+-------+

Allergy:
+-----+-------+
| KEY | VALUE |
+-----+-------+
+-----+-------+

```

In [20]:
condition_two = data[data["COND_DESCRIPTION"].apply(lambda x: contains_x(x, "Acute viral pharyngitis (disorder)"))]
condition_two.head(2)

Unnamed: 0,PATIENT_Id,PATIENT_BIRTHDATE,PATIENT_DEATHDATE,PATIENT_SSN,PATIENT_FIRST,PATIENT_LAST,PATIENT_MARITAL,PATIENT_RACE,PATIENT_ETHNICITY,PATIENT_GENDER,...,ALLERGY_DESCRIPTION,DEVICE_START,DEVICE_STOP,DEVICE_CODE,DEVICE_DESCRIPTION,DEVICE_UDI,IMMUN_DATE,IMMUN_CODE,IMMUN_DESCRIPTION,IMMUN_BASE_COST
8,8d4c4326-e9de-4f45-9a4c-f8c36bff89ae,1978-05-27,NaT,999-85-4926,Mariana775,Rutherford999,M,white,nonhispanic,F,...,,,NaT,,,,,,,
12,f5dcd418-09fe-4a2f-baa0-3da800bd8c3a,1996-10-18,NaT,999-60-7372,Gregorio366,Auer97,,white,nonhispanic,M,...,,,NaT,,,,,,,


In [21]:
print("Medication: ")
_ = count_values(condition_two['MEDICATION_DESCRIPTION'], topk=10)

print()
print(" Median age: ")
print(f"AGE (median): {int(condition_two['PATIENT_AGE'].median())}")

print()
print(" Patient state: ")
_ = count_values(condition_two['PATIENT_STATE'], topk=1)

print()
print("Visit months:")
_ = count_values(condition_two["ENCOUNTER_START"].dt.month, 4, key_name="Month")
print()

print("Immunization:")
_ = count_values(condition_two["IMMUN_DESCRIPTION"], 4)
print()

print("Type of condition-admission:")
_ = count_values(condition_two["ENCOUNTER_ENCOUNTERCLASS"], ignore_number=True)
print()

print("Life-style patients:")
_ = count_values(condition_two["OBS_VALUE"], ignore_number=True)
print()

print("Allergy:")
_ = count_values(condition_two["ALLERGY_DESCRIPTION"], ignore_number=True)
print()

Medication: 
+------------------------------------------------------------------------------------------------------+-------+
|                                                 KEY                                                  | VALUE |
+------------------------------------------------------------------------------------------------------+-------+
|                              Penicillin V Potassium 250 MG Oral Tablet                               |   9   |
|                                Hydrochlorothiazide 25 MG Oral Tablet                                 |   4   |
|                120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler                |   3   |
|                  NDA020503 200 ACTUAT Albuterol 0.09 MG/ACTUAT Metered Dose Inhaler                  |   3   |
|                          Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet                           |   3   |
|                              Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray           

# Invistigate Third condition: Acute bronchitis (disorder) 

Acute bronchitis is an inflammation of the bronchial tubes, which are the airways that carry air to the lungs. It is usually caused by a viral infection, although bacterial infections, exposure to air pollution, and other factors can also contribute to the condition. Symptoms of acute bronchitis include cough, chest discomfort or pain, fever, and shortness of breath. In most cases, acute bronchitis clears up on its own within a few weeks.

```bash
Medications:
+-------------------------------------------------------------------------------------------------------------------+-------+
|                                                        KEY                                                        | VALUE |
+-------------------------------------------------------------------------------------------------------------------+-------+
|                                          Acetaminophen 325 MG Oral Tablet                                         |  434  |
| Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution |  126  |
|                                 Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet                                 |   7   |
+-------------------------------------------------------------------------------------------------------------------+-------+

Median age: 
AGE (median): 47

 Patient state: 
+---------------+-------+
|      KEY      | VALUE |
+---------------+-------+
| Massachusetts |  563  |
+---------------+-------+

Visit months:
+-------+-------+
| Month | VALUE |
+-------+-------+
|   7   |   55  |
|   8   |   54  |
|   4   |   52  |
|   6   |   51  |
+-------+-------+

Immunization:
+------+-------+
| KEY  | VALUE |
+------+-------+
| None |  563  |
+------+-------+

Type of condition-admission:
+------------+-------+
|    KEY     | VALUE |
+------------+-------+
| ambulatory |  562  |
| outpatient |   1   |
+------------+-------+

Life-style patients:
+---------------------+-------+
|         KEY         | VALUE |
+---------------------+-------+
|     Never smoker    |   49  |
|    Former smoker    |   13  |
|     heterosexual    |   1   |
|   not HIV positive  |   1   |
|  No signs/symptoms  |   1   |
| Patient is homeless |   1   |
|          No         |   1   |
+---------------------+-------+

Allergy:
+-----+-------+
| KEY | VALUE |
+-----+-------+
+-----+-------+
```

In [22]:
condition_three = data[data["COND_DESCRIPTION"].apply(lambda x: contains_x(x, "Acute bronchitis (disorder)"))]
condition_three.head(2)

Unnamed: 0,PATIENT_Id,PATIENT_BIRTHDATE,PATIENT_DEATHDATE,PATIENT_SSN,PATIENT_FIRST,PATIENT_LAST,PATIENT_MARITAL,PATIENT_RACE,PATIENT_ETHNICITY,PATIENT_GENDER,...,ALLERGY_DESCRIPTION,DEVICE_START,DEVICE_STOP,DEVICE_CODE,DEVICE_DESCRIPTION,DEVICE_UDI,IMMUN_DATE,IMMUN_CODE,IMMUN_DESCRIPTION,IMMUN_BASE_COST
10,10339b10-3cd1-4ac3-ac13-ec26728cb592,1992-06-02,NaT,999-27-3385,Jayson808,Fadel536,M,white,nonhispanic,M,...,,,NaT,,,,,,,
19,f5dcd418-09fe-4a2f-baa0-3da800bd8c3a,1996-10-18,NaT,999-60-7372,Gregorio366,Auer97,,white,nonhispanic,M,...,,,NaT,,,,,,,


In [23]:
print("Medication: ")
_ = count_values(condition_three['MEDICATION_DESCRIPTION'], topk=10)

print()
print(" Median age: ")
print(f"AGE (median): {int(condition_three['PATIENT_AGE'].median())}")

print()
print(" Patient state: ")
_ = count_values(condition_three['PATIENT_STATE'], topk=1)

print()
print("Visit months:")
_ = count_values(condition_three["ENCOUNTER_START"].dt.month, 4, key_name="Month")
print()

print("Immunization:")
_ = count_values(condition_three["IMMUN_DESCRIPTION"], 4)
print()

print("Type of condition-admission:")
_ = count_values(condition_three["ENCOUNTER_ENCOUNTERCLASS"], ignore_number=True)
print()

print("Life-style patients:")
_ = count_values(condition_three["OBS_VALUE"], ignore_number=True)
print()

print("Allergy:")
_ = count_values(condition_three["ALLERGY_DESCRIPTION"], ignore_number=True)
print()

Medication: 
+-------------------------------------------------------------------------------------------------------------------+-------+
|                                                        KEY                                                        | VALUE |
+-------------------------------------------------------------------------------------------------------------------+-------+
|                                          Acetaminophen 325 MG Oral Tablet                                         |  434  |
| Acetaminophen 21.7 MG/ML / Dextromethorphan Hydrobromide 1 MG/ML / doxylamine succinate 0.417 MG/ML Oral Solution |  126  |
|                                 Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet                                 |   7   |
|                                       Hydrochlorothiazide 25 MG Oral Tablet                                       |   6   |
|                                           Verapamil Hydrochloride 40 MG                                