In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

%matplotlib inline

In [21]:
# read in three data files

unos_gender_df = pd.read_csv('data/unos_wait_by_gender_072524.csv')
unos_age_df = pd.read_csv('data/unos_wait_by_age_072524.csv')
unos_ethnicity_df = pd.read_csv('data/unos_wait_by_ethnicity_072524.csv')

In [22]:
# wait by gender data

unos_gender_df

Unnamed: 0,Organ,All Genders,Male,Female
0,Kidney,89487,55529,33967
1,Liver,9510,5602,3908
2,Pancreas,860,426,434
3,Kidney / Pancreas,2181,1182,999
4,Heart,3410,2574,836
5,Lung,912,441,471
6,Heart / Lung,42,27,15
7,Intestine,192,97,95
8,Abdominal Wall,2,2,0
9,VCA - head and neck,3,2,1


In [23]:
# shape of gender data set

print('Gender Data Rows: ', unos_gender_df.shape[0])
print('Gender Data Columns: ', unos_gender_df.shape[1])

Gender Data Rows:  12
Gender Data Columns:  4


In [24]:
# gender data column information

unos_gender_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Organ        12 non-null     object
 1   All Genders  12 non-null     object
 2   Male         12 non-null     object
 3   Female       12 non-null     object
dtypes: object(4)
memory usage: 516.0+ bytes


In [25]:
def string_to_int(df, cols):
    '''
    Change the data type of columns that contain the wait list numbers
    from string to integer data type
    
    IN: 
        dataframe
        columns to change
    OUT:
        dataframe
    '''
    
    df[cols] = df[cols].replace({',': ''}, regex=True)
    for col in cols:
        df[col]=pd.to_numeric(df[col])
    return

In [26]:
cols_gender = ['All Genders', 'Male', 'Female']
string_to_int(unos_gender_df, cols_gender)
unos_gender_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Organ        12 non-null     object
 1   All Genders  12 non-null     int64 
 2   Male         12 non-null     int64 
 3   Female       12 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 516.0+ bytes


In [27]:
# determine percentage waiting for a particular organ 

unos_gender_df['Total Wait Pct.'] = unos_gender_df['All Genders'] / unos_gender_df['All Genders'].sum() * 100

unos_gender_df

Unnamed: 0,Organ,All Genders,Male,Female,Total Wait Pct.
0,Kidney,89487,55529,33967,83.937868
1,Liver,9510,5602,3908,8.92028
2,Pancreas,860,426,434,0.806671
3,Kidney / Pancreas,2181,1182,999,2.045755
4,Heart,3410,2574,836,3.198544
5,Lung,912,441,471,0.855446
6,Heart / Lung,42,27,15,0.039396
7,Intestine,192,97,95,0.180094
8,Abdominal Wall,2,2,0,0.001876
9,VCA - head and neck,3,2,1,0.002814


> **Approximately 84% of those awaiting for an organ transplant are waiting for a kidney.**

In [28]:
# wait by age data

unos_age_df

Unnamed: 0,Age Group,All Organs,Kidney,Liver,Pancreas,Kidney / Pancreas,Heart,Lung,Heart / Lung,Intestine,Abdominal Wall,VCA - head and neck,VCA - upper limb,VCA - uterus
0,< 1 Year,105,0,33,1,0,72,0,0,1,0,0,0,0
1,1-5 Years,549,242,122,18,0,173,3,0,31,0,0,0,0
2,6-10 Years,469,269,59,8,1,134,7,1,16,0,0,0,0
3,11-17 Years,958,725,112,21,3,113,7,1,33,0,0,0,0
4,18-34 Years,8413,7167,566,164,448,330,53,6,36,2,0,1,4
5,35-49 Years,23189,20122,1714,416,1127,613,78,17,39,0,2,2,5
6,50-64 Years,43652,37763,4267,223,581,1433,362,16,32,0,1,0,0
7,65 +,26484,23199,2637,9,21,542,402,1,4,0,0,0,0


In [29]:
# shape of age data set

print('Age Data Rows: ', unos_age_df.shape[0])
print('Age Data Columns: ', unos_age_df.shape[1])

Age Data Rows:  8
Age Data Columns:  14


In [30]:
# age data column information

unos_age_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Age Group            8 non-null      object
 1   All Organs           8 non-null      object
 2   Kidney               8 non-null      object
 3   Liver                8 non-null      object
 4   Pancreas             8 non-null      int64 
 5   Kidney / Pancreas    8 non-null      object
 6   Heart                8 non-null      object
 7   Lung                 8 non-null      int64 
 8   Heart / Lung         8 non-null      int64 
 9   Intestine            8 non-null      int64 
 10  Abdominal Wall       8 non-null      int64 
 11  VCA - head and neck  8 non-null      int64 
 12  VCA - upper limb     8 non-null      int64 
 13  VCA - uterus         8 non-null      int64 
dtypes: int64(8), object(6)
memory usage: 1.0+ KB


In [31]:
cols_age = ['All Organs', 'Kidney', 'Liver', 'Kidney / Pancreas', 'Heart']
string_to_int(unos_age_df, cols_age)
unos_age_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Age Group            8 non-null      object
 1   All Organs           8 non-null      int64 
 2   Kidney               8 non-null      int64 
 3   Liver                8 non-null      int64 
 4   Pancreas             8 non-null      int64 
 5   Kidney / Pancreas    8 non-null      int64 
 6   Heart                8 non-null      int64 
 7   Lung                 8 non-null      int64 
 8   Heart / Lung         8 non-null      int64 
 9   Intestine            8 non-null      int64 
 10  Abdominal Wall       8 non-null      int64 
 11  VCA - head and neck  8 non-null      int64 
 12  VCA - upper limb     8 non-null      int64 
 13  VCA - uterus         8 non-null      int64 
dtypes: int64(13), object(1)
memory usage: 1.0+ KB


In [32]:
# percentage waiting by age

unos_age_df['Age Waiting Pct.'] = unos_age_df['All Organs'] / unos_age_df['All Organs'].sum() * 100

unos_age_df

Unnamed: 0,Age Group,All Organs,Kidney,Liver,Pancreas,Kidney / Pancreas,Heart,Lung,Heart / Lung,Intestine,Abdominal Wall,VCA - head and neck,VCA - upper limb,VCA - uterus,Age Waiting Pct.
0,< 1 Year,105,0,33,1,0,72,0,0,1,0,0,0,0,0.101138
1,1-5 Years,549,242,122,18,0,173,3,0,31,0,0,0,0,0.528805
2,6-10 Years,469,269,59,8,1,134,7,1,16,0,0,0,0,0.451748
3,11-17 Years,958,725,112,21,3,113,7,1,33,0,0,0,0,0.92276
4,18-34 Years,8413,7167,566,164,448,330,53,6,36,2,0,1,4,8.103526
5,35-49 Years,23189,20122,1714,416,1127,613,78,17,39,0,2,2,5,22.335989
6,50-64 Years,43652,37763,4267,223,581,1433,362,16,32,0,1,0,0,42.046254
7,65 +,26484,23199,2637,9,21,542,402,1,4,0,0,0,0,25.509781


> **42% of those waiting on organs are between 50 and 64 years old.**

In [33]:
# wait by ethnicity data

unos_ethnicity_df

Unnamed: 0,Ethnicity,All Organs,Kidney,Liver,Pancreas,Kidney / Pancreas,Heart,Lung,Heart / Lung,Intestine,Abdominal Wall,VCA - head and neck,VCA - upper limb,VCA - uterus
0,"White, Non-Hispanic",40470,31856,6037,458,895,1790,598,23,108,1,2,2,8
1,"Black, Non-Hispanic",28574,26616,638,191,644,970,134,6,28,0,1,1,1
2,Hispanic/Latino,23633,20821,2100,156,461,478,128,9,45,0,0,0,0
3,Unknown,485,310,120,5,11,37,17,1,0,0,0,0,0
4,"Asian, Non-Hispanic",8656,8071,455,33,111,90,30,2,9,0,0,0,0
5,"American Indian/Alaska Native, Non-Hispanic",896,785,88,6,28,10,3,1,1,1,0,0,0
6,"Pacific Islander, Non-Hispanic",598,566,15,3,7,11,0,0,1,0,0,0,0
7,"Multiracial, Non-Hispanic",963,857,64,9,28,25,2,0,1,0,0,0,0


In [34]:
# shape of ethnicity data set

print('Ethnicity Data Rows: ', unos_ethnicity_df.shape[0])
print('Ethnicity Data Columns: ', unos_ethnicity_df.shape[1])

Ethnicity Data Rows:  8
Ethnicity Data Columns:  14


In [35]:
# ethnicity data column information

unos_ethnicity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Ethnicity            8 non-null      object
 1   All Organs           8 non-null      object
 2   Kidney               8 non-null      object
 3   Liver                8 non-null      object
 4   Pancreas             8 non-null      int64 
 5   Kidney / Pancreas    8 non-null      int64 
 6   Heart                8 non-null      object
 7   Lung                 8 non-null      int64 
 8   Heart / Lung         8 non-null      int64 
 9   Intestine            8 non-null      int64 
 10  Abdominal Wall       8 non-null      int64 
 11  VCA - head and neck  8 non-null      int64 
 12  VCA - upper limb     8 non-null      int64 
 13  VCA - uterus         8 non-null      int64 
dtypes: int64(9), object(5)
memory usage: 1.0+ KB


In [36]:
cols_ethn = ['All Organs', 'Kidney', 'Liver', 'Heart']
string_to_int(unos_ethnicity_df, cols_ethn)
unos_ethnicity_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8 entries, 0 to 7
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Ethnicity            8 non-null      object
 1   All Organs           8 non-null      int64 
 2   Kidney               8 non-null      int64 
 3   Liver                8 non-null      int64 
 4   Pancreas             8 non-null      int64 
 5   Kidney / Pancreas    8 non-null      int64 
 6   Heart                8 non-null      int64 
 7   Lung                 8 non-null      int64 
 8   Heart / Lung         8 non-null      int64 
 9   Intestine            8 non-null      int64 
 10  Abdominal Wall       8 non-null      int64 
 11  VCA - head and neck  8 non-null      int64 
 12  VCA - upper limb     8 non-null      int64 
 13  VCA - uterus         8 non-null      int64 
dtypes: int64(13), object(1)
memory usage: 1.0+ KB


In [37]:
# percentage waiting by ethnicity

unos_ethnicity_df['Ethnicity Wait Pct.'] = unos_ethnicity_df['All Organs'] / unos_ethnicity_df['All Organs'].sum() * 100

unos_ethnicity_df

Unnamed: 0,Ethnicity,All Organs,Kidney,Liver,Pancreas,Kidney / Pancreas,Heart,Lung,Heart / Lung,Intestine,Abdominal Wall,VCA - head and neck,VCA - upper limb,VCA - uterus,Ethnicity Wait Pct.
0,"White, Non-Hispanic",40470,31856,6037,458,895,1790,598,23,108,1,2,2,8,38.810837
1,"Black, Non-Hispanic",28574,26616,638,191,644,970,134,6,28,0,1,1,1,27.402541
2,Hispanic/Latino,23633,20821,2100,156,461,478,128,9,45,0,0,0,0,22.664109
3,Unknown,485,310,120,5,11,37,17,1,0,0,0,0,0,0.465116
4,"Asian, Non-Hispanic",8656,8071,455,33,111,90,30,2,9,0,0,0,0,8.301127
5,"American Indian/Alaska Native, Non-Hispanic",896,785,88,6,28,10,3,1,1,1,0,0,0,0.859266
6,"Pacific Islander, Non-Hispanic",598,566,15,3,7,11,0,0,1,0,0,0,0,0.573484
7,"Multiracial, Non-Hispanic",963,857,64,9,28,25,2,0,1,0,0,0,0,0.92352


In [38]:
# lets sort the data by total waiting

#unos_df.sort_values(by = ['Total Waiting'], ascending = False, inplace = True)
#unos_df

Unnamed: 0,Organ,Total Waiting,Male,Female,Waiting Pct.
0,Kidney,96400,60000,36400,84.776319
1,Liver,9622,5669,3953,8.461802
4,Heart,3424,2588,836,3.011142
3,Kidney / Pancreas,2251,1206,1045,1.97958
5,Lung,896,431,465,0.787962
2,Pancreas,864,425,439,0.759821
7,Intestine,196,100,96,0.172367
6,Heart / Lung,41,26,15,0.036056
11,VCA - uterus,9,0,9,0.007915
9,VCA - head and neck,3,2,1,0.002638


In [42]:
# bar chart showing number waiting on a kidney against total number waiting on all organs

fig = px.bar(unos_gender_df, x = 'Organ', y = ['All Genders'], text_auto='.2s',
            title = 'UNOS Organ Wait List as of July 25, 2024',
            hover_data = ['Male', 'Female'])
fig.show()

In [54]:
# breakdown waiting on all organs by age group

import plotly.graph_objects as go

fig = go.Figure(data = [
    go.Bar(name = 'Male', x = unos_gender_df['Organ'], y = unos_gender_df['Male'],
          ),
    go.Bar(name = 'Female', x = unos_gender_df['Organ'], y = unos_gender_df['Female']
          )
])

fig.update_layout(
    title = 'UNOS Organ Wait List by Gender as of July 25, 2024',
    xaxis = dict(
        title = 'Organ'
    ),
    yaxis = dict(
        title = 'Wait List Count'
    )
)

fig.show()

#fig = px.bar(unos_gender_df, x = 'Organ', y = ['Male', 'Female'],
#           text_auto='.2s',
#            title = 'UNOS organ waitlist by age group - all age groups',
#            )

#fig.show()

In [60]:
# bar chart showing number waiting on an organ by age group

fig = px.bar(unos_age_df, x = 'Age Group', y = 'All Organs',  
             text_auto='.2s',
             title = 'UNOS Organ Wait List by Age Group as of July 25, 2024',
            )
fig.show()

In [62]:
# bar chart showing number waiting on an kidney by age group

fig = px.bar(unos_age_df, x = 'Age Group', y = 'Kidney',  
             text_auto='.2s',
             title = 'UNOS Kidney Wait List by Age Group as of July 25, 2024',
            )
fig.show()

In [64]:
# bar chart showing number waiting on an organ by ethnic group

fig = px.bar(unos_ethnicity_df, x = 'Ethnicity', y = 'All Organs',  
             text_auto='.2s',
             title = 'UNOS Organ Wait List by Ethnic Group as of July 25, 2024',
            )
fig.show()

In [66]:
# bar chart showing number waiting on an kidney by ethnic group

fig = px.bar(unos_ethnicity_df, x = 'Ethnicity', y = 'Kidney',  
             text_auto='.2s',
             title = 'UNOS Kidney Wait List by Age Group as of July 25, 2024',
            )
fig.show()