# EDA For Physician Availability and Graduation Rates (Supply-side factors)

In [13]:
# Import dependencies
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [14]:
# load the data
master_path = "datasets"
supply_side_phys_df = pd.read_csv(f"{master_path}/supply_side_physicians.csv", delimiter=";")
supply_side_phys_df.head()

Unnamed: 0,State,Total Active Physicians,Percentage 65 and older,Percentage under 40,MD Residents/Fellows,Population
0,Alabama,11646,26.2,17.5,1160.0,5117673
1,Alaska,2143,23.0,14.8,12.0,736510
2,Arizona,18998,24.0,13.6,1303.0,7473027
3,Arkansas,6979,,,530.0,3069463
4,California,120957,25.0,15.3,10415.0,39198693


In [15]:
supply_side_phys_df.isna().sum()

State                       0
Total Active Physicians     0
Percentage 65 and older    14
Percentage under 40        14
MD Residents/Fellows        1
Population                  0
dtype: int64

In [25]:
# Replace null values with mean
supply_side_phys_df['Percentage 65 and older'].fillna(supply_side_phys_df['Percentage 65 and older'].mean(), inplace=True)
supply_side_phys_df['Percentage under 40'].fillna(supply_side_phys_df['Percentage under 40'].mean(), inplace=True)
# Filling Wyoming's residents according to its population ratio with the country
supply_side_phys_df['MD Residents/Fellows'] = supply_side_phys_df.apply(
    lambda row: (row['Population'] / supply_side_phys_df['Population'].sum()) * supply_side_phys_df['MD Residents/Fellows'].sum()
    if pd.isna(row['MD Residents/Fellows']) else row['MD Residents/Fellows'], axis=1
)

In [17]:
supply_side_phys_df['Physicians per 1K'] = supply_side_phys_df['Total Active Physicians']/supply_side_phys_df['Population']*1000
supply_side_phys_df['Residents per 1K'] = supply_side_phys_df['MD Residents/Fellows']/supply_side_phys_df['Population']*1000
supply_side_phys_df.head()

Unnamed: 0,State,Total Active Physicians,Percentage 65 and older,Percentage under 40,MD Residents/Fellows,Population,Physicians per 1K,Residents per 1K
0,Alabama,11646,26.2,17.5,1160.0,5117673,2.275644,0.226666
1,Alaska,2143,23.0,14.8,12.0,736510,2.909669,0.016293
2,Arizona,18998,24.0,13.6,1303.0,7473027,2.542209,0.17436
3,Arkansas,6979,22.827027,16.605405,530.0,3069463,2.273688,0.172669
4,California,120957,25.0,15.3,10415.0,39198693,3.085741,0.265698


In [18]:
supply_side_phys_df.describe()

Unnamed: 0,Total Active Physicians,Percentage 65 and older,Percentage under 40,MD Residents/Fellows,Population,Physicians per 1K,Residents per 1K
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,19612.705882,22.827027,16.605405,1812.888811,6604044.0,3.005727,0.275996
std,22889.276289,1.91067,1.751222,2278.329698,7516975.0,1.051063,0.240421
min,1281.0,18.4,11.8,12.0,585067.0,1.925807,0.016293
25%,5223.0,22.1,15.8,459.0,1870808.0,2.456159,0.168498
50%,13244.0,22.827027,16.605405,1160.0,4550595.0,2.860513,0.232013
75%,24675.5,23.7,17.45,1985.5,7665174.0,3.190242,0.314294
max,120957.0,27.4,21.5,10415.0,39198690.0,9.12088,1.697889


In [19]:
supply_side_phys_df[supply_side_phys_df['State'] == 'Wyoming']

Unnamed: 0,State,Total Active Physicians,Percentage 65 and older,Percentage under 40,MD Residents/Fellows,Population,Physicians per 1K,Residents per 1K
50,Wyoming,1281,22.827027,16.605405,160.329364,585067,2.189493,0.274036


In [20]:
# Top and bottom states for physician and resident density
top_physician_states = supply_side_phys_df.nlargest(5, "Physicians per 1K")[["State", "Physicians per 1K"]]
bottom_physician_states = supply_side_phys_df.nsmallest(5, "Physicians per 1K")[["State", "Physicians per 1K"]]

top_resident_states = supply_side_phys_df.nlargest(5, "Residents per 1K")[["State", "Residents per 1K"]]
bottom_resident_states = supply_side_phys_df.nsmallest(5, "Residents per 1K")[["State", "Residents per 1K"]]

In [21]:
top_physician_states

Unnamed: 0,State,Physicians per 1K
8,District of Columbia,9.12088
21,Massachusetts,4.810963
32,New York,4.096392
45,Vermont,4.001801
20,Maryland,3.997547


In [22]:
bottom_physician_states

Unnamed: 0,State,Physicians per 1K
12,Idaho,1.925807
24,Mississippi,2.051868
36,Oklahoma,2.118664
28,Nevada,2.174925
50,Wyoming,2.189493


In [23]:
top_resident_states

Unnamed: 0,State,Residents per 1K
8,District of Columbia,1.697889
21,Massachusetts,0.64883
39,Rhode Island,0.641636
32,New York,0.511922
38,Pennsylvania,0.42711


In [24]:
bottom_resident_states

Unnamed: 0,State,Residents per 1K
1,Alaska,0.016293
26,Montana,0.030054
12,Idaho,0.050733
41,South Dakota,0.087117
28,Nevada,0.109508
