Data from https://healthdata.gov/Health/COVID-19-Public-Therapeutic-Locator/rxn6-qnx8

In [1]:
#import library
import pandas as pd
import numpy as np
import folium
import matplotlib

### Metadata

DataframeA will include the older stocking data while DataframeB is the latest stock data

In [2]:
#define data to input

#A is the newest month, B will be the older month
df_A = pd.read_csv('C:/Users/Sydney/Desktop/apa_stock_data/COVID-19_Public_Therapeutic_Locator_20230201.csv')
df_B = pd.read_csv('C:/Users/Sydney/Desktop/apa_stock_data/COVID-19_Public_Therapeutic_Locator_20220729.csv')

date_A = 20230201 #this is the latest date of the report download
date_B = 20220729

therapy = "Paxlovid"

### Data Exploration

In [3]:
df_A.head()
df_A.columns

Index(['Provider Name', 'Address1', 'Address2', 'City', 'County', 'State Code',
       'Zip', 'National Drug Code', 'Order Label', 'Courses Available',
       'Geocoded Address', 'NPI', 'Last Report Date', 'Provider Status',
       'Provider Note'],
      dtype='object')

In [4]:
#what does our provider status tell us
df_A['Provider Status'].value_counts()

ACTIVE               98982
UNKNOWN INVENTORY     4246
Name: Provider Status, dtype: int64

In [5]:
#what are the unique order lables - anything we don't care about?
df_A["Order Label"].unique()

array(['Paxlovid', 'Renal Paxlovid', 'Lagevrio (molnupiravir)',
       'Commercial Veklury (remdesivir)'], dtype=object)

### Clean and prepare the data

In [6]:
def clean_df(df, date):
    
    #drop all unnecessary columns for this report and limit to only where the provider status is active
    df = df[df["Provider Status"] == "ACTIVE"].drop(labels = ['Provider Name', 'Address1', 'Address2','City',
                                                                'National Drug Code','NPI','Provider Note'], axis = 1)
    
    #clena up the geocoded address to be the coordinates only and drop any NA values in that column
    df = df.dropna(subset=['Geocoded Address'])
    df["Geocoded Address"] = df["Geocoded Address"].astype("string").str.strip('POINT (').str.strip(')')

    split_data = df["Geocoded Address"].str.split(' ')
    df['long'] = split_data.apply(lambda x: x[0])
    df['lat'] = split_data.apply(lambda x: x[1])
    df.head()
    
    new_col = "Courses Available" + str(date)
    df = df.rename(columns={"Courses Available": new_col})
    
    return df

In [7]:
df_A = clean_df(df_A, date_A)
df_B = clean_df(df_B, date_B)

In [8]:
df_A.head()

Unnamed: 0,County,State Code,Zip,Order Label,Courses Available20230201,Geocoded Address,Last Report Date,Provider Status,long,lat
0,Adams,CO,80233,Paxlovid,55.0,-104.955542 39.91345,01/25/2023 12:00:00 AM,ACTIVE,-104.955542,39.91345
1,Faulkner,AR,72032,Paxlovid,35.0,-92.437225 35.09223,01/30/2023 12:00:00 AM,ACTIVE,-92.437225,35.09223
2,Cook,IL,60302,Paxlovid,6.0,-87.79498 41.8795,01/30/2023 12:00:00 AM,ACTIVE,-87.79498,41.8795
3,Sullivan,MO,63556,Paxlovid,9.0,-93.12537 40.202935,01/19/2023 12:00:00 AM,ACTIVE,-93.12537,40.202935
4,Brown,WI,54303,Paxlovid,36.0,-88.047111 44.51736,01/30/2023 12:00:00 AM,ACTIVE,-88.047111,44.51736


## All Therapies

### Find the Differences in Stock Over Time

In [9]:
#df_A.merge(df_B, on=["Zip", "Order Label"], how='left')

In [10]:
def zip_change(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Zip","Order Label"]).sum()
    df2 = df2.drop(labels = ['State Code', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Zip","Order Label"]).sum()
    
    df3 = pd.merge(df1,df2,on=["Zip", "Order Label"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [11]:
zip_df = zip_change(df_A, df_B, date_A, date_B)

In [12]:
def cnty_change(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["County","Order Label"]).sum()
    df2 = df2.drop(labels = ['State Code', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["County","Order Label"]).sum()
    
    df3 = pd.merge(df1,df2,on=["County", "Order Label"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [13]:
cnty_df = cnty_change(df_A, df_B, date_A, date_B)

In [14]:
def state_change(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['Zip', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["State Code","Order Label"]).sum()
    df2 = df2.drop(labels = ['Zip', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["State Code","Order Label"]).sum()
    
    df3 = pd.merge(df1,df2,on=["State Code", "Order Label"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [15]:
state_df = state_change(df_A, df_B, date_A, date_B)

In [16]:
def nat_change(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'County', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Order Label"]).sum()
    df2 = df2.drop(labels = ['State Code', 'County', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Order Label"]).sum()
    
    df3 = pd.merge(df1,df2,on=["Order Label"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [17]:
nat_df = nat_change(df_A, df_B, date_A, date_B)

In [18]:
nat_df.head()

Unnamed: 0_level_0,Courses Available20230201,Courses Available20220729,Delta
Order Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lagevrio (molnupiravir),1126283.0,1449110.0,-322827.0
Paxlovid,1566066.0,1237893.0,328173.0
Renal Paxlovid,233780.0,187670.0,46110.0


### Export the data summaries

In [19]:
nat_df.to_excel(r'C:\Users\Sydney\Desktop\apa_stock_data\APA_stock_change_National.xlsx', index=True)
state_df.to_excel(r'C:\Users\Sydney\Desktop\apa_stock_data\APA_stock_change_State.xlsx', index=True)
zip_df.to_excel(r'C:\Users\Sydney\Desktop\apa_stock_data\APA_stock_change_Zip.xlsx', index=True)
cnty_df.to_excel(r'C:\Users\Sydney\Desktop\apa_stock_data\APA_stock_change_Cnty.xlsx', index=True)

## Select a Single Therapy

In [20]:
df_A2 = df_A[df_A["Order Label"] == therapy]
df_B2 = df_B[df_B["Order Label"] == therapy]

In [21]:
def zip_change_single(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["Zip"]).sum()
    df2 = df2.drop(labels = ['State Code', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["Zip"]).sum()
    
    df3 = pd.merge(df1,df2,on=["Zip"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [22]:
zip_df2 = zip_change_single(df_A2, df_B2, date_A, date_B)

In [23]:
def cnty_change_single(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["County"]).sum()
    df2 = df2.drop(labels = ['State Code', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["County"]).sum()
    
    df3 = pd.merge(df1,df2,on=["County"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [24]:
cnty_df2 = cnty_change_single(df_A2, df_B2, date_A, date_B)

In [25]:
def state_change_single(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['Zip', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["State Code"]).sum()
    df2 = df2.drop(labels = ['Zip', 'County', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status', 'Order Label'], axis = 1).groupby(["State Code"]).sum()
    
    df3 = pd.merge(df1,df2,on=["State Code"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [26]:
state_df2 = state_change_single(df_A2, df_B2, date_A, date_B)

In [27]:
state_df2.head()

Unnamed: 0_level_0,Courses Available20230201,Courses Available20220729,Delta
State Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,2038.0,2068.0,-30.0
AL,17872.0,16468.0,1404.0
AR,9382.0,8995.0,387.0
AS,1657.0,1594.0,63.0
AZ,26425.0,23148.0,3277.0


In [28]:
def nat_change_single(df1, df2, date1, date2):
    df1 = df1.drop(labels = ['State Code', 'County', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Order Label"]).sum()
    df2 = df2.drop(labels = ['State Code', 'County', 'Zip', 'Geocoded Address','Last Report Date', 
                             'long', 'lat', 'Provider Status'], axis = 1).groupby(["Order Label"]).sum()
    
    df3 = pd.merge(df1,df2,on=["Order Label"])
    
    df3['Delta'] = df3['Courses Available'+str(date1)] - df3['Courses Available'+str(date2)]
    
    return df3

In [29]:
nat_df2 = nat_change_single(df_A2, df_B2, date_A, date_B)

In [30]:
nat_df2.head()

Unnamed: 0_level_0,Courses Available20230201,Courses Available20220729,Delta
Order Label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Paxlovid,1566066.0,1237893.0,328173.0
