In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import numpy as np

%matplotlib inline

In [2]:
cdc = pd.read_csv('../data/cdc_clean.csv')

In [3]:
cdc_trans = cdc.loc[cdc.trans.isin(['Transgender'])]
cdc_cis = cdc.loc[cdc.trans.isin(['Cisgender'])]
cdc_trans.head()

Unnamed: 0,_STATE,_AGE_G,_SEX,BIRTHSEX,SOMALE,SOFEMALE,TRNSGNDR,_EDUCAG,EMPLOY1,_INCOMG,...,PERSDOC2,MEDCOST,CHECKUP1,region,trans,gender,sexual orientation,education,employment,income
7347,Alaska,Age 45 to 54,Female,,,Straight,"Yes, female to male",Graduated from College or Technical School,Employed for Wages,"50,000 or more",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Male,Straight,Post-secondary education,Employed,"$35,000 or more"
7635,Alaska,Age 35 to 44,Male,,Straight,,"Yes, male to female",Graduated High School,Out of work for 1 year or more,"15,000 to less than 25,000",...,"Yes, only one",Yes,Within past year (anytime less than 12 months ...,West,Transgender,Female,Straight,No post-secondary education,Unemployed,"Less than $35,000"
7709,Alaska,Age 65 and older,Male,,Gay,,"Yes, male to female",Did not graduate High School,Retired,"50,000 or more",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Female,Gay,No post-secondary education,,"$35,000 or more"
7993,Alaska,Age 35 to 44,Male,,Straight,,"Yes, male to female",Graduated High School,Self-employed,"50,000 or more",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Female,Straight,No post-secondary education,Employed,"$35,000 or more"
8102,Alaska,Age 65 and older,Male,,Something Else,,"Yes, gender nonconforming",Graduated from College or Technical School,Retired,"35,000 to less than 50,000",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Gender nonconforming,,Post-secondary education,,"$35,000 or more"


In [4]:
cdc_trans.loc[cdc.BIRTHSEX.notnull()]

Unnamed: 0,_STATE,_AGE_G,_SEX,BIRTHSEX,SOMALE,SOFEMALE,TRNSGNDR,_EDUCAG,EMPLOY1,_INCOMG,...,PERSDOC2,MEDCOST,CHECKUP1,region,trans,gender,sexual orientation,education,employment,income
86175,Hawaii,Age 55 to 64,Female,Female,,Straight,"Yes, female to male",Graduated from College or Technical School,Employed for Wages,"50,000 or more",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Male,Straight,Post-secondary education,Employed,"$35,000 or more"
86485,Hawaii,Age 65 and older,Male,Male,Gay,,"Yes, female to male",Attended College or Technical School,Retired,"25,000 to less than 35,000",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Male,Gay,Post-secondary education,,"Less than $35,000"
86799,Hawaii,Age 55 to 64,Male,Male,Gay,,"Yes, male to female",Graduated High School,Unable to work,"Less than 15,000",...,More than one,No,Within past year (anytime less than 12 months ...,West,Transgender,Female,Gay,No post-secondary education,,"Less than $35,000"
86912,Hawaii,Age 65 and older,Female,Female,,Straight,"Yes, female to male",Attended College or Technical School,Retired,"25,000 to less than 35,000",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Male,Straight,Post-secondary education,,"Less than $35,000"
86921,Hawaii,Age 65 and older,Female,Female,,Straight,"Yes, female to male",Graduated from College or Technical School,Retired,,...,"Yes, only one",No,Within past year (anytime less than 12 months ...,West,Transgender,Male,Straight,Post-secondary education,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371179,Vermont,Age 25 to 34,Female,Female,,Something Else,"Yes, gender nonconforming",Graduated High School,A student,"25,000 to less than 35,000",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,Northeast,Transgender,Gender nonconforming,,No post-secondary education,,"Less than $35,000"
371243,Vermont,Age 45 to 54,Male,Male,Bisexual,,"Yes, gender nonconforming",Attended College or Technical School,A homemaker,"50,000 or more",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,Northeast,Transgender,Gender nonconforming,Bisexual,Post-secondary education,,"$35,000 or more"
371466,Vermont,Age 65 and older,Male,Male,Straight,,"Yes, male to female",Graduated from College or Technical School,Employed for Wages,"25,000 to less than 35,000",...,"Yes, only one",No,Within past year (anytime less than 12 months ...,Northeast,Transgender,Female,Straight,Post-secondary education,Employed,"Less than $35,000"
371653,Vermont,Age 65 and older,Male,Male,Something Else,,"Yes, male to female",Attended College or Technical School,Retired,,...,More than one,No,Within past year (anytime less than 12 months ...,Northeast,Transgender,Female,,Post-secondary education,,


## 1. Are there health disparities between transgender and cisgender people in the U.S.?

In [5]:
trans_poor_health = 0
total_trans = 0
cis_poor_health = 0
total_cis = 0
for ind,row in cdc.iterrows():
    if row.trans == 'Transgender' and row.GENHLTH == 'Poor':
        total_trans += 1
        trans_poor_health += 1
    elif row.trans == 'Transgender':
        total_trans += 1
    elif row.trans == 'Cisgender' and row.GENHLTH == 'Poor':
        total_cis += 1
        cis_poor_health += 1
    elif row.trans == 'Cisgender':
        total_cis += 1

print(str.format('Percent of Trans People with Poor Health: {}%', round((trans_poor_health/total_trans)*100,2)))
print(str.format('Percent of Cis People with Poor Health: {}%', round((cis_poor_health/total_cis)*100,2)))
print(str.format('Point Difference: {} points', round((round((trans_poor_health/total_trans)*100,2) - round((cis_poor_health/total_cis)*100,2)),4)))

Percent of Trans People with Poor Health: 9.01%
Percent of Cis People with Poor Health: 5.23%
Point Difference: 3.78 points


## 2. If so, what regions have the greatest health disparities between transgender and cisgender people?

In [6]:
regions = list(cdc_trans.region.unique())

In [7]:
pct_trans_poor_reg = []
pct_cis_poor_reg = []

In [8]:
for item in regions:
    trans_poor_health = 0
    total_trans = 0
    cis_poor_health = 0
    total_cis = 0
    region_df = cdc.loc[cdc.region == item]
    
    for ind,row in region_df.iterrows():
        if row.trans == 'Transgender' and (row.GENHLTH == 'Poor'):
            total_trans += 1
            trans_poor_health += 1
        elif row.trans == 'Transgender':
            total_trans += 1
        elif row.trans == 'Cisgender' and (row.GENHLTH == 'Poor'):
            total_cis += 1
            cis_poor_health += 1
        elif row.trans == 'Cisgender':
            total_cis += 1

    pct_trans_poor_reg.append((trans_poor_health/total_trans)*100)
    pct_cis_poor_reg.append((cis_poor_health/total_cis)*100)

region_breakdown = pd.DataFrame({'region':regions, 'pct_trans_poor':pct_trans_poor_reg,
                                'pct_cis_poor':pct_cis_poor_reg})

In [9]:
cdc_trans.region.value_counts()

South        345
West         281
Midwest      180
Northeast    149
Name: region, dtype: int64

In [10]:
region_breakdown['diff'] = region_breakdown.pct_trans_poor - region_breakdown.pct_cis_poor

In [11]:
health_diff = region_breakdown.sort_values('diff', ascending = False)

In [12]:
health_diff

Unnamed: 0,region,pct_trans_poor,pct_cis_poor,diff
3,Midwest,8.888889,4.310947,4.577942
1,Northeast,8.724832,4.561153,4.163679
0,West,8.540925,4.534692,4.006233
2,South,9.565217,6.462991,3.102226


In [13]:
health_diff.to_csv('../data/health_diff.csv',index=False)

## 3. Among transgender people, what demographics are most likely to have poor health?

### Age

In [14]:
poor_health_1824 = 0
total_1824 = 0
poor_health_2534 = 0
total_2534 = 0
poor_health_3544 = 0
total_3544 = 0
poor_health_4554 = 0
total_4554 = 0
poor_health_5564 = 0
total_5564 = 0
poor_health_65 = 0
total_65 = 0

for ind,row in cdc_trans.iterrows():
    if row._AGE_G == 'Age 18 to 24' and row.GENHLTH == 'Poor':
        total_1824 += 1
        poor_health_1824 += 1
    elif row._AGE_G == 'Age 18 to 24':
        total_1824 += 1

    elif row._AGE_G == 'Age 25 to 34' and row.GENHLTH == 'Poor':
        total_2534 += 1
        poor_health_2534 += 1
    elif row._AGE_G == 'Age 25 to 34':
        total_2534 += 1
        
    elif row._AGE_G == 'Age 35 to 44' and row.GENHLTH == 'Poor':
        total_3544 += 1
        poor_health_3544 += 1
    elif row._AGE_G == 'Age 35 to 44':
        total_3544 += 1

    elif row._AGE_G == 'Age 45 to 54' and row.GENHLTH == 'Poor':
        total_4554 += 1
        poor_health_4554 += 1
    elif row._AGE_G == 'Age 45 to 54':
        total_4554 += 1

    elif row._AGE_G == 'Age 55 to 64' and row.GENHLTH == 'Poor':
        total_5564 += 1
        poor_health_5564 += 1
    elif row._AGE_G == 'Age 55 to 64':
        total_5564 += 1

    elif row._AGE_G == 'Age 65 and older' and row.GENHLTH == 'Poor':
        total_65 += 1
        poor_health_65 += 1
    elif row._AGE_G == 'Age 65 and older':
        total_65 += 1

print('Total 18-24:', total_1824)
print('Total 25-34:', total_2534)
print('Total 35-44:', total_3544)
print('Total 45-54:', total_4554)
print('Total 55-64:', total_5564)
print('Total 65 and Older:', total_65)
print(str.format('Percent of People Age 18-24 with Poor General Health: {}%', round((poor_health_1824/total_1824)*100,2)))
print(str.format('Percent of People Age 25-34 with Poor General Health: {}%', round((poor_health_2534/total_2534)*100,2)))
print(str.format('Percent of People Age 35-44 with Poor General Health: {}%', round((poor_health_3544/total_3544)*100,2)))
print(str.format('Percent of People Age 45-54 with Poor General Health: {}%', round((poor_health_4554/total_4554)*100,2)))
print(str.format('Percent of People Age 55-64 with Poor General Health: {}%', round((poor_health_5564/total_5564)*100,2)))
print(str.format('Percent of People Age 65 and Older with Poor General Health: {}%', round((poor_health_65/total_65)*100,2)))

Total 18-24: 168
Total 25-34: 165
Total 35-44: 128
Total 45-54: 111
Total 55-64: 155
Total 65 and Older: 228
Percent of People Age 18-24 with Poor General Health: 5.95%
Percent of People Age 25-34 with Poor General Health: 7.27%
Percent of People Age 35-44 with Poor General Health: 7.81%
Percent of People Age 45-54 with Poor General Health: 9.01%
Percent of People Age 55-64 with Poor General Health: 12.26%
Percent of People Age 65 and Older with Poor General Health: 10.96%


### Race/Ethnicity

In [15]:
white_poor_health = 0
total_white = 0
poc_poor_health = 0
total_poc = 0
for ind,row in cdc_trans.iterrows():
    if (row._RACE == 'Black only, non-Hispanic' or row._RACE == 'American Indian or Alaskan Native only, Non-Hispanic' or row._RACE == 'Asian only, non-Hispanic' or row._RACE == 'Native Hawaiian or other Pacific Islander only, Non-Hispanic' or row._RACE == 'Other race only, non-Hispanic' or row._RACE == 'Multiracial, non-Hispanic' or row._RACE == 'Hispanic') and row.GENHLTH == 'Poor':
        total_poc += 1
        poc_poor_health += 1
    elif (row._RACE == 'Black only, non-Hispanic' or row._RACE == 'American Indian or Alaskan Native only, Non-Hispanic' or row._RACE == 'Asian only, non-Hispanic' or row._RACE == 'Native Hawaiian or other Pacific Islander only, Non-Hispanic' or row._RACE == 'Other race only, non-Hispanic' or row._RACE == 'Multiracial, non-Hispanic' or row._RACE == 'Hispanic'):
        total_poc += 1
    elif row._RACE == 'White only, non-Hispanic' and row.GENHLTH == 'Poor':
        total_white += 1
        white_poor_health += 1
    elif row._RACE == 'White only, non-Hispanic':
        total_white += 1

print('Total White, Non-Hispanic People:', total_white)
print('Total Hispanic and/or POC:', total_poc)
print(str.format('Percent of White, Non-Hispanic Trans People with Poor General Health: {}%', round((white_poor_health/total_white)*100,2)))
print(str.format('Percent of Trans Hispanic and/or POC with Poor General Health: {}%', round((poc_poor_health/total_poc)*100,2)))

Total White, Non-Hispanic People: 581
Total Hispanic and/or POC: 348
Percent of White, Non-Hispanic Trans People with Poor General Health: 8.61%
Percent of Trans Hispanic and/or POC with Poor General Health: 8.05%


### Gender

In [16]:
male_poor_health = 0
total_male = 0
female_poor_health = 0
total_female = 0
gnc_poor_health = 0
total_gnc = 0 
for ind,row in cdc_trans.iterrows():
    if row.gender == 'Male' and row.GENHLTH == 'Poor':
        total_male += 1
        male_poor_health += 1
    elif row.gender == 'Male':
        total_male += 1
    elif row.gender == 'Female' and row.GENHLTH == 'Poor':
        total_female += 1
        female_poor_health += 1
    elif row.gender == 'Female':
        total_female += 1
    elif row.gender == 'Gender nonconforming' and row.GENHLTH == 'Poor':
        total_gnc += 1
        gnc_poor_health += 1
    elif row.gender == 'Gender nonconforming':
        total_gnc += 1

print('Total Male:', total_male)
print('Total Female:', total_female)
print('Total GNC:', total_gnc)
print(str.format('Percent of Male People with Poor General Health: {}%', round((male_poor_health/total_male)*100,2)))
print(str.format('Percent of Female People with Poor General Health: {}%', round((female_poor_health/total_female)*100,2)))
print(str.format('Percent of GNC People with Poor General Health: {}%', round((gnc_poor_health/total_gnc)*100,2)))

Total Male: 358
Total Female: 371
Total GNC: 226
Percent of Male People with Poor General Health: 6.15%
Percent of Female People with Poor General Health: 10.51%
Percent of GNC People with Poor General Health: 11.06%


### Sexual Orientation

In [17]:
s_poor_health = 0
total_s = 0
gay_poor_health = 0
total_gay = 0
bi_poor_health = 0
total_bi = 0 
for ind,row in cdc_trans.iterrows():
    if row['sexual orientation'] == 'Straight' and row.GENHLTH == 'Poor':
        total_s += 1
        s_poor_health += 1
    elif row['sexual orientation'] == 'Straight':
        total_s += 1
    elif row['sexual orientation'] == 'Gay' and row.GENHLTH == 'Poor':
        total_gay += 1
        gay_poor_health += 1
    elif row['sexual orientation'] == 'Gay':
        total_gay += 1
    elif row['sexual orientation'] == 'Bisexual' and row.GENHLTH == 'Poor':
        total_bi += 1
        bi_poor_health += 1
    elif row['sexual orientation'] == 'Bisexual':
        total_bi += 1

print('Total Straight:', total_s)
print('Total Gay:', total_gay)
print('Total Bi:', total_bi)
print(str.format('Percent of Straight People with Poor General Health: {}%', round((s_poor_health/total_s)*100,2)))
print(str.format('Percent of Gay People with Poor General Health: {}%', round((gay_poor_health/total_gay)*100,2)))
print(str.format('Percent of Bisexual People with Poor General Health: {}%', round((bi_poor_health/total_bi)*100,2)))

Total Straight: 478
Total Gay: 100
Total Bi: 178
Percent of Straight People with Poor General Health: 7.95%
Percent of Gay People with Poor General Health: 10.0%
Percent of Bisexual People with Poor General Health: 6.18%


### Education

In [18]:
ps_poor_health = 0
total_ps = 0
nops_poor_health = 0
total_nops = 0
for ind,row in cdc_trans.iterrows():
    if row.education == 'Post-secondary education' and row.GENHLTH == 'Poor':
        total_ps += 1
        ps_poor_health += 1
    elif row.education == 'Post-secondary education':
        total_ps += 1
    elif row.education == 'No post-secondary education' and row.GENHLTH == 'Poor':
        total_nops += 1
        nops_poor_health += 1
    elif row.education == 'No post-secondary education':
        total_nops += 1

print('Total Post-secondary Education:', total_ps)
print('Total No Post-secondary Education:', total_nops)
print(str.format('Percent of People with a Post-Secondary Education with Poor General Health: {}%', round((ps_poor_health/total_ps)*100,2)))
print(str.format('Percent of People with No Post-Secondary Education with Poor General Health: {}%', round((nops_poor_health/total_nops)*100,2)))

Total Post-secondary Education: 518
Total No Post-secondary Education: 432
Percent of People with a Post-Secondary Education with Poor General Health: 6.56%
Percent of People with No Post-Secondary Education with Poor General Health: 12.04%


### Employment

In [19]:
e_poor_health = 0
total_e = 0
une_poor_health = 0
total_une = 0
for ind,row in cdc_trans.iterrows():
    if row.employment == 'Employed' and row.GENHLTH == 'Poor':
        total_e += 1
        e_poor_health += 1
    elif row.employment == 'Employed':
        total_e += 1
    elif row.employment == 'Unemployed' and row.GENHLTH == 'Poor':
        total_une += 1
        une_poor_health += 1
    elif row.employment == 'Unemployed':
        total_une += 1

print('Total Employed:', total_e)
print('Total Unemployed and Looking for Work:', total_une)
print(str.format('Percent of Employed People with Poor General Health: {}%', round((e_poor_health/total_e)*100,2)))
print(str.format('Percent of Unemployed People Looking for Work with Poor General Health: {}%', round((une_poor_health/total_une)*100,2)))

Total Employed: 465
Total Unemployed and Looking for Work: 40
Percent of Employed People with Poor General Health: 3.23%
Percent of Unemployed People Looking for Work with Poor General Health: 12.5%


### Income

In [20]:
less35_poor_health = 0
total_less35 = 0
more_poor_health = 0
total_more = 0
for ind,row in cdc_trans.iterrows():
    if row.income == 'Less than $35,000' and row.GENHLTH == 'Poor':
        total_less35 += 1
        less35_poor_health += 1
    elif row.income == 'Less than $35,000':
        total_less35 += 1
    elif row.income == '$35,000 or more' and row.GENHLTH == 'Poor':
        total_more += 1
        more_poor_health += 1
    elif row.income == '$35,000 or more':
        total_more += 1

print('Total Less than $35,000:', total_less35)
print('Total $35,000 or More:', total_more)
print(str.format('Percent of People Who Make Less Than $35,000 with Poor General Health: {}%', round((less35_poor_health/total_less35)*100,2)))
print(str.format('Percent of People Who Make $35,000 or More with Poor General Health: {}%', round((more_poor_health/total_more)*100,2)))

Total Less than $35,000: 405
Total $35,000 or More: 371
Percent of People Who Make Less Than $35,000 with Poor General Health: 10.62%
Percent of People Who Make $35,000 or More with Poor General Health: 4.85%


## How might the GMLA help reduce these disparities?

In [21]:
cdc_poor = cdc.loc[cdc.GENHLTH.isin(['Poor'])]

In [22]:
cdc_poor.shape

(22105, 25)

### Healthcare Coverage

In [23]:
cis_nocov = 0
total_cis = 0
trans_nocov = 0
total_trans = 0
for ind,row in cdc_poor.iterrows():
    if row._HCVU651 == 'Do not have health care coverage' and row.trans == 'Transgender':
        total_trans += 1
        trans_nocov += 1
    elif row.trans == 'Transgender':
        total_trans += 1
    elif row._HCVU651 == 'Do not have health care coverage' and row.trans == 'Cisgender':
        total_cis += 1
        cis_nocov += 1
    elif row.trans == 'Cisgender':
        total_cis += 1

print('Total Trans:', total_trans)
print('Total Cis:', total_cis)
print(str.format('Percent of Trans People with Poor Health Who Dont Have Healthcare Coverage: {}%', round((trans_nocov/total_trans)*100,2)))
print(str.format('Percent of Cis People with Poor Health Who Dont Have Healthcare Coverage: {}%', round((cis_nocov/total_cis)*100,2)))

Total Trans: 86
Total Cis: 12203
Percent of Trans People with Poor Health Who Dont Have Healthcare Coverage: 13.95%
Percent of Cis People with Poor Health Who Dont Have Healthcare Coverage: 7.6%
