<a href="https://colab.research.google.com/github/oimartin/Older-and-Wiser/blob/main/medicare_population_65_plus.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Injest Data
* Source - Centers for Medicare and Medicaid Services
* https://data.cms.gov/summary-statistics-on-beneficiary-enrollment/medicare-and-medicaid-reports/medicare-newly-enrolled

In [90]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

# Area Data

In [38]:
# load data
total_area_20 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/main/data/2020_total_area_medicare_enrolled.csv')
state_pop = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/main/data/us_census_2020_population.csv')

In [39]:
# function to modify Medicare area excel files

def area_df(df):
  df.drop(columns='Unnamed: 13', inplace=True)
  df.iloc[:,1:] = df.iloc[:, 1:].replace('  ', np.nan, regex=True)
  df.dropna(axis=0,inplace=True)
  df.columns = df.iloc[0]

  clean = df.iloc[1:,0:2]
  clean.iloc[:,:] = clean.replace(',', '', regex=True)
  clean.iloc[:,:] = clean.replace('*', 0)
  clean.iloc[:,1] = clean.iloc[:,1].astype(int)
  return clean

In [40]:
# Check shape
clean_area_20 = area_df(total_area_20)
display(clean_area_20.shape)

(60, 2)

In [41]:
# function to modify US population excel file
def pop_df(df):
  df = df.iloc[:, 0:2].dropna(axis=0)
  df.iloc[:,:] = df.replace(',', '', regex=True)
  df['Total:'] = df['Total:'].astype(int)

  return df

In [42]:
# Clean and add state population to clean_area_20 df
clean_pop = pop_df(state_pop)
clean_area_20['State_Pop'] = 1
clean_area_20.iloc[2:54,2] = clean_pop['Total:']
clean_area_20['A.B_aged_state_pop_porp'] = clean_area_20.iloc[2:54,1] / clean_area_20.iloc[2:54,2]
clean_area_20['AB_aged_medicare_aged_prorp'] = clean_area_20.iloc[:,1]/clean_area_20.iloc[0,1]

In [43]:
clean_area_20.iloc[0,1]

3793542

In [44]:
clean_area_20

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp
4,All Areas,3793542,1,,1.0
5,United States,3739508,1,,0.985756
7,Alabama,58719,5024279,0.011687,0.015479
8,Alaska,8263,733391,0.011267,0.002178
9,Arizona,78342,7151502,0.010955,0.020651
10,Arkansas,35173,3011524,0.011679,0.009272
11,California,403418,39538223,0.010203,0.106343
12,Colorado,61866,5773714,0.010715,0.016308
13,Connecticut,42950,3605944,0.011911,0.011322
14,Delaware,12877,989948,0.013008,0.003394


In [45]:
# Create regions
clean_area_20['Region'] = clean_area_20['Area of Residence']
clean_area_20['Region'].replace(['Connecticut','Maine','Massachusetts',
                                 'New Hampshire','Rhode Island', 'Vermont'],
                                'New England',inplace=True)

clean_area_20['Region'].replace(['New Jersey','New York','Pennsylvania'],
                                'Middle Atlantic',inplace=True)

clean_area_20['Region'].replace(['Indiana','Illinois', 'Michigan',
                                 'Ohio', 'Wisconsin'],
                                'East North Central',inplace=True)

clean_area_20['Region'].replace(['Iowa', 'Nebraska', 'Kansas', 'North Dakota',
                                 'Minnesota', 'South Dakota', 'Missouri'],
                                'West North Central',inplace=True)

clean_area_20['Region'].replace(['Delaware', 'District of Columbia', 'Florida',
                                 'Georgia', 'Maryland', 'North Carolina',
                                 'South Carolina', 'Virginia', 'West Virginia'],
                                'South Atlantic',inplace=True)

clean_area_20['Region'].replace(['Alabama', 'Louisiana', 'Oklahoma', 
                                 'Texas'],
                                'West South Central',inplace=True)

clean_area_20['Region'].replace(['Arkansas', 'Kentucky', 'Mississippi', 
                                 'Tennessee'],
                                'East South Central',inplace=True)

clean_area_20['Region'].replace(['Arizona', 'Colorado', 'Idaho','New Mexico',
                                 'Montana', 'Utah', 'Nevada', 'Wyoming'],
                                'Mountain',inplace=True)

clean_area_20['Region'].replace(['Alaska', 'California', 'Hawaii', 'Oregon',
                                 'Washington'],
                                'Pacific',inplace=True)

clean_area_20['Region'].replace(['Puerto Rico', 'Virgin Islands',
                                 'American Samoa', 'Guam', 'Northern Mariana Islands'],
                                'Territories',inplace=True)


In [46]:
clean_area_20.sort_values(by='AB_aged_medicare_aged_prorp', ascending=False).head(7)

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp,Region
4,All Areas,3793542,1,,1.0,All Areas
5,United States,3739508,1,,0.985756,United States
11,California,403418,39538223,0.010203,0.106343,Pacific
50,Texas,282244,29145505,0.009684,0.074401,West South Central
16,Florida,266853,21538187,0.01239,0.070344,South Atlantic
39,New York,224189,20201249,0.011098,0.059098,Middle Atlantic
45,Pennsylvania,163778,13002700,0.012596,0.043173,Middle Atlantic


In [47]:
clean_area_20.sort_values(by='State_Pop', ascending=False).head()

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp,Region
11,California,403418,39538223,0.010203,0.106343,Pacific
50,Texas,282244,29145505,0.009684,0.074401,West South Central
16,Florida,266853,21538187,0.01239,0.070344,South Atlantic
39,New York,224189,20201249,0.011098,0.059098,Middle Atlantic
45,Pennsylvania,163778,13002700,0.012596,0.043173,Middle Atlantic


In [48]:
clean_area_20.sort_values(by='A.B_aged_state_pop_porp', ascending=False).head()

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp,Region
26,Maine,19549,1362359,0.014349,0.005153,New England
36,New Hampshire,19679,1377529,0.014286,0.005188,New England
52,Vermont,9156,643077,0.014238,0.002414,New England
57,Wyoming,7919,576851,0.013728,0.002087,Mountain
33,Montana,14752,1084225,0.013606,0.003889,Mountain


In [49]:
clean_area_20[clean_area_20['Region'] == 'South Atlantic']

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp,Region
14,Delaware,12877,989948,0.013008,0.003394,South Atlantic
15,District of Columbia,5352,689545,0.007762,0.001411,South Atlantic
16,Florida,266853,21538187,0.01239,0.070344,South Atlantic
17,Georgia,111772,10711908,0.010434,0.029464,South Atlantic
27,Maryland,67217,6177224,0.010881,0.017719,South Atlantic
40,North Carolina,118936,10439388,0.011393,0.031352,South Atlantic
47,South Carolina,63173,5118425,0.012342,0.016653,South Atlantic
53,Virginia,96106,8631393,0.011134,0.025334,South Atlantic
55,West Virginia,23131,1793716,0.012896,0.006097,South Atlantic


In [50]:
clean_area_20.sort_values(by=['A.B_aged_state_pop_porp'], ascending=False)

2,Area of Residence,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp,Region
26,Maine,19549,1362359,0.014349,0.005153,New England
36,New Hampshire,19679,1377529,0.014286,0.005188,New England
52,Vermont,9156,643077,0.014238,0.002414,New England
57,Wyoming,7919,576851,0.013728,0.002087,Mountain
33,Montana,14752,1084225,0.013606,0.003889,Mountain
48,South Dakota,11682,886667,0.013175,0.003079,West North Central
14,Delaware,12877,989948,0.013008,0.003394,South Atlantic
55,West Virginia,23131,1793716,0.012896,0.006097,South Atlantic
56,Wisconsin,74544,5893718,0.012648,0.01965,East North Central
45,Pennsylvania,163778,13002700,0.012596,0.043173,Middle Atlantic


In [51]:
# Only select regions and not entire US
regions = clean_area_20[(clean_area_20['Region'] != 'All Areas') &
              (clean_area_20['Region'] != 'United States') &
              (clean_area_20['Region'] != 'Unknown')].groupby('Region').sum().reset_index()

In [52]:
# Check regions df
regions['Region'].replace({'Foreign Countries and Other Outyling Areas': 'Outside US'}, inplace=True)
regions

2,Region,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp
0,East North Central,560799,47368533,0.05958,0.14783
1,East South Central,203517,17389479,0.046925,0.053648
2,Outside US,20349,1,0.0,0.005364
3,Middle Atlantic,489856,42492943,0.034662,0.129129
4,Mountain,270236,24919150,0.091681,0.071236
5,New England,185837,15116205,0.078653,0.048988
6,Pacific,567234,53669422,0.056851,0.149526
7,South Atlantic,765417,66089734,0.10224,0.201768
8,Territories,33684,3285878,0.009289,0.008879
9,West North Central,255159,21616921,0.083567,0.067261


## Regions

### Regions named


> Northeast
>>New England
1. Connecticut
2. Maine
3. Massachusetts
4. New Hampshire
5. Rhode Island
6. Vermont

>> Middle Atlantic
7. New Jersey
8. New York
9. Pennsylvania

> Midwest
>>East North Central
1. Indiana
2. Illinois
3. Michigan
4. Ohio
5. Wisconsin

>>West North Central
6. Iowa
7. Nebraska
8. Kansas
9. North Dakota
10. Minnesota
11. South Dakota
12. Missouri

> South
>> South Atlantic
1. Delaware
2. District of Columbia
3. Florida
5. Georgia
6. Maryland
8. North Carolina
9. South Carolina
10. Virginia
11. West Virginia

>> East South Central
12. Alabama
13. Kentucky
14. Mississippi
15. Tennessee

>> West South Central
16. Arkansas
17. Louisiana
18. Oklahoma
19. Texas

>West
>> Mountain
1. Arizona
2. Colorado
3. Idaho
4. New Mexico
5. Montana
6. Utah
7. Nevada
8. Wyoming

>> Pacific
9. Alaska
10. California
11. Hawaii
12. Oregon
13. Washington




### Region Graphs

In [85]:
fig = px.bar(regions.sort_values('Part A and/or Part B Total'), x="Region",
             y='Part A and/or Part B Total', color="A.B_aged_state_pop_porp",
             color_continuous_scale=['#ffefd6', '#db5022', '#44131a'])
fig.update_layout(template="plotly_white")
fig.update_coloraxes(colorbar_orientation='h')
fig.show()

In [99]:
regions

2,Region,Part A and/or Part B Total,State_Pop,A.B_aged_state_pop_porp,AB_aged_medicare_aged_prorp
0,East North Central,560799,47368533,0.05958,0.14783
1,East South Central,203517,17389479,0.046925,0.053648
2,Outside US,20349,1,0.0,0.005364
3,Middle Atlantic,489856,42492943,0.034662,0.129129
4,Mountain,270236,24919150,0.091681,0.071236
5,New England,185837,15116205,0.078653,0.048988
6,Pacific,567234,53669422,0.056851,0.149526
7,South Atlantic,765417,66089734,0.10224,0.201768
8,Territories,33684,3285878,0.009289,0.008879
9,West North Central,255159,21616921,0.083567,0.067261


In [98]:
states = clean_area_20[(clean_area_20['Region'] != 'All Areas') &
              (clean_area_20['Region'] != 'United States') &
              (clean_area_20['Region'] != 'Unknown')].copy()

In [100]:
states['Region_porp'] = states['Region'].replace({'East North Central': regions.iloc[0,4],
                                                  'East South Central': regions.iloc[1,4],
                                                  'Outside US':regions.iloc[2,4],
                                                  'Middle Atlantic':regions.iloc[3,4],
                                                  'Mountain':regions.iloc[4,4],
                                                  'New England':regions.iloc[5,4],
                                                  'Pacific':regions.iloc[6,4],
                                                  'South Atlantic':regions.iloc[7,4],
                                                  'Territories':regions.iloc[8,4],
                                                  'West North Central':regions.iloc[9,4],
                                                  'West South Central':regions.iloc[10,4]})


Unnamed: 0,fips,unemp
0,01001,5.3
1,01003,5.4
2,01005,8.6
3,01007,6.6
4,01009,5.5
...,...,...
3214,72145,13.9
3215,72147,10.6
3216,72149,20.2
3217,72151,16.9


In [102]:
fig = px.choropleth(df,
                    locations='state_code', 
                    locationmode="USA-states", 
                    scope="usa",
                    color='Median Sales Price ($)',
                    color_continuous_scale="Viridis_r", 
                    
                    )
fig.show()

### All States Graphs

In [None]:
fig = px.bar(clean_area_20.iloc[2:, :], y="Area of Residence", x="AB_aged_medicare_aged_prorp", color="Part A and/or Part B Total")
fig.show()

In [None]:
fig = px.bar(clean_area_20.iloc[2:54, :], y="Area of Residence", x="A.B_aged_pop_porp", color="Part A and/or Part B Total")
fig.show()

ValueError: ignored

In [None]:
fig = px.bar(clean_area_20.iloc[2:54, :], x="Area of Residence", y="A.B_aged_pop_porp", color="Part A and/or Part B Total",
             color_continuous_scale='Turbo', title='Medicare 65 Years and Older Population Proportion of US State')
fig.update_layout(yaxis_title='Proportion of Medicare Population to State Population',
                  xaxis_title='State', legend=dict(title='Medicare Population'))
fig.show()

In [None]:
fig = px.bar(clean_area_20.iloc[2:54, :], x="Area of Residence", y="A.B_aged_pop_porp", color="State_Pop",
             color_continuous_scale='Turbo')
fig.show()

# Demo data

## takein

In [None]:
total_demo_15 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2015_total_demo_medicare_enrolled.csv')
total_demo_16 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2016_total_demo_medicare_enrolled.csv')
total_demo_17 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2017_total_demo_medicare_enrolled.csv')
total_demo_18 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2018_total_demo_medicare_enrolled.csv')
total_demo_19 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2019_total_demo_medicare_enrolled.csv')
total_demo_20 = pd.read_csv('https://raw.githubusercontent.com/oimartin/Older-and-Wiser/medicare/data/2020_total_demo_medicare_enrolled.csv')

In [None]:
display(total_demo_15.shape,
        total_demo_16.shape,
        total_demo_17.shape,
        total_demo_18.shape,
        total_demo_19.shape,
        total_demo_20.shape)

In [None]:
def demo_df(df, year):
  df.drop(columns=['Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
                            'Unnamed: 8', 'Unnamed: 9'],inplace=True)
  df.dropna(axis=0,inplace=True)
  df.columns = df.iloc[0]
  clean = df.iloc[1:, :]
  cleaner = clean.replace(',', '', regex=True)
  v_clean = cleaner[(clean['Demographic Characteristic'] != 'Age') &
      (clean['Demographic Characteristic'] != 'Sex') &
      (clean['Demographic Characteristic'] != 'Race')]
  for column in list(v_clean.columns)[1:-1]:
    v_clean[column] = v_clean[column].astype(int)
  v_clean['Year'] = year

  return v_clean

In [None]:
clean_demo_15 = demo_df(total_demo_15, '2015')
clean_demo_16 = demo_df(total_demo_16, '2016')
clean_demo_17 = demo_df(total_demo_17, '2017')
clean_demo_18 = demo_df(total_demo_18, '2018')
clean_demo_19 = demo_df(total_demo_19, '2019')
clean_demo_20 = demo_df(total_demo_20, '2020')

In [None]:
display(clean_demo_15.shape,
        clean_demo_16.shape,
        clean_demo_17.shape,
        clean_demo_18.shape,
        clean_demo_19.shape,
        clean_demo_20.shape)

## Reorganize demo data

In [None]:
index_65_2015 = clean_demo_15[clean_demo_15['Demographic Characteristic'] == '65-74 Years'].iloc[0,1]

In [None]:
def choose_65(df):
  new_df = df[df['Demographic Characteristic'] == '65-74 Years']
  return new_df

In [None]:
d65_20 = choose_65(clean_demo_20)
d65_19 = choose_65(clean_demo_19)
d65_18 = choose_65(clean_demo_18)
d65_17 = choose_65(clean_demo_17)
d65_16 = choose_65(clean_demo_16)
d65_15 = choose_65(clean_demo_15)

d65_all = pd.concat([d65_15, d65_16, d65_17, d65_18, d65_19, d65_20])

In [None]:
def choose_75_plus(df):
  new_df = df[(df['Demographic Characteristic'] == '75-84 Years') |
              (df['Demographic Characteristic'] == '85-94 Years') |
              (df['Demographic Characteristic'] == '95 Years and Over')]
  
  return new_df

In [None]:
d75_plus_20 = choose_75_plus(clean_demo_20)
d75_plus_19 = choose_75_plus(clean_demo_19)
d75_plus_18 = choose_75_plus(clean_demo_18)
d75_plus_17 = choose_75_plus(clean_demo_17)
d75_plus_16 = choose_75_plus(clean_demo_16)
d75_plus_15 = choose_75_plus(clean_demo_15)

d75_plus_all = pd.concat([d75_plus_15, d75_plus_16,
                          d75_plus_17, d75_plus_18,
                          d75_plus_19, d75_plus_20])

d75_plus_all

## Graphs

### Bar graphs

In [None]:
fig = px.bar(d75_plus_all, x="Demographic Characteristic", y="Part A and/or Part B", color="Year", barmode="group")
fig.show()

In [None]:
fig = px.bar(d65_all, x="Demographic Characteristic", y="Part A and/or Part B", color="Year", barmode="group")
fig.show()

#### Box Whisker Plot

In [None]:
fig = px.box(d65_all, y="Demographic Characteristic", x="Part A and/or Part B",
             orientation='h')
fig.show()

In [None]:
sns.boxplot(data=d65_all, y='Demographic Characteristic', x="Part A and/or Part B",
            orient='h')

In [None]:
fig = px.box(d75_plus_all, y="Demographic Characteristic", x="Part A and/or Part B",
             orientation='h', color='Demographic Characteristic')
fig.show()