# Deliverable Notebook

In [2]:
import pandas as pd
import re
pd.set_option('display.max_rows', None)

Drug Classes:
- ALLERGY, SYSTEMIC & NASAL
- DERMATOLOGICS
- GI PRODUCTS
- IMMUNOLOGY
- MULTIPLE SCLEROSIS
- NERVOUS SYSTEM DISORDERS
- ONCOLOGICS
- OPHTHALMOLOGY, GENERAL
- OTHER CNS
- OTHER HAEMATOLOGICALS
- VACCINES (PURE, COMB, OTHER)

In [10]:
classes = ['ALLERGY, SYSTEMIC & NASAL', 'DERMATOLOGICS', 'GI PRODUCTS', 'IMMUNOLOGY', 'MULTIPLE SCLEROSIS', 'NERVOUS SYSTEM DISORDERS',
           'ONCOLOGICS', 'OPHTHALMOLOGY, GENERAL', 'OTHER CNS', 'OTHER HAEMATOLOGICALS', 'VACCINES (PURE, COMB, OTHER)']

len(classes)

11

In [13]:
for drug_class in classes:

    ########################
    # RETRIEVING DATA
    ########################

    # Retrieving all data about the drug class
    data = pd.read_csv('../2 Processed_Data/drugs_pct_changes_monthly.csv')
    data = data[data['Major Class'] == drug_class]
    data.Date = pd.to_datetime(data.Date, format='%Y_%m')

    # Only NDCs that have full period
    last_period = data[(data.Year == 2020) & (data.Month == 7)].NDC.unique()
    first_period = data[(data.Year == 2014) & (data.Month == 8)].NDC.unique()
    selected_NDC = list(set(last_period).intersection(first_period)) 
    data = data[data.NDC.isin(selected_NDC)]

    # Updating NDC that do not have LOE date to a future LOE date
    data.loc[data['Estimated LOE Date'] == 'Unspecified', 'Estimated LOE Date'] = 'May-24'

    # Getting rid of NDC that are too close to their LOE date
    data['Estimated LOE Date'] = pd.to_datetime(data['Estimated LOE Date'], format='%b-%y')
    data = data[data['Estimated LOE Date'] >= pd.to_datetime('2020-07-01')]

    # Retrieving year over year percent change data
    yy_data = pd.read_csv('../1 Data Preprocessing/Year Over Year/df_yearoveryear_class.csv')
    yy_data.Date = pd.to_datetime(yy_data.Date, format='%Y_%m')

    # Retrieving computed predictions
    prophet_1 = pd.read_csv(f'../4 Predictive Models/Results/Method 1/{drug_class}.csv')
    prophet_1 = prophet_1.rename(columns={'Unnamed: 0':'date', 'Number':'# price changes prediction', 'Percentage of drugs change price':'# price changes prediction (%)'})
    prophet_1.date = pd.to_datetime(prophet_1.date)

    prophet_2 = pd.read_csv(f'../4 Predictive Models/Results/Method 2/{drug_class}.csv')
    prophet_2.date = pd.to_datetime(prophet_2.date)



    ########################
    # BUILDING THE DELIVERABLE DATASET
    ########################

    # Extracting number of unique drug per month
    nb_drugs = data.groupby('Date')['NDC'].nunique().to_frame(name='# drugs').reset_index().rename(columns={'Date': 'date'})

    # Extracting year over year percent change in Sales weighted WAC
    yy_change = yy_data[yy_data['Major Class'] == drug_class][['Date', 'Class_wght_pct_change_y/y']].reset_index(drop=True).rename(columns={'Date': 'date', 'Class_wght_pct_change_y/y': 'Y/Y Percent change'})

    # Extracting number of changes per month
    nb_price_change = data.groupby('Date')['Changed'].sum().to_frame(name='# price changes').reset_index().rename(columns={'Date': 'date'})

    # Helper function
    def weigthed_average(data, quantity, weights):
        try:
            return (data[quantity] * data[weights]).sum() / data[weights].sum()
        except ZeroDivisionError:
            return data[quantity]    

    # Extracting Sales weighted WAC per month
    data['Sales'] = data['WAC'] * data['TRx']
    sales_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'Sales').to_frame(name = 'Sales weighted WAC').reset_index().rename(columns={'Date': 'date'})

    # Extracting TRx weighted WAC per month
    trx_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'TRx').to_frame(name = 'TRx weighted WAC').reset_index().rename(columns={'Date': 'date'})

    # Extracting Prophet 2 predictions
    prophet_2 = prophet_2[['date', 'yhat']].rename(columns={'yhat': 'Sales weighted WAC predictions'})

    # Merging all and formatting
    deliverable = nb_drugs.merge(yy_change, how='outer', on='date')
    deliverable = deliverable.merge(nb_price_change, how='outer', on='date')
    deliverable = deliverable.merge(sales_wac, how='outer', on='date')
    deliverable = deliverable.merge(trx_wac, how='outer', on='date')
    deliverable = deliverable.merge(prophet_1, how='outer', on='date')
    deliverable = deliverable.merge(prophet_2, how='outer', on='date')

    # Formatting date
    deliverable['Year'] = deliverable.date.dt.year
    deliverable['Month'] = deliverable.date.dt.month
    deliverable.drop('date', axis=1, inplace=True)

    # Creating handy class column
    deliverable['Class'] = drug_class

    # Computing percentage nb of drugs that have price change
    deliverable['# price changes (%)'] = deliverable['# price changes'] / deliverable['# drugs']

    # Computing M/M percent change
    deliverable['M/M Percent change'] = deliverable['Sales weighted WAC'].pct_change()
    deliverable['M/M Percent change predictions'] = deliverable['Sales weighted WAC predictions'].pct_change()

    # Cleaning year 2014
    deliverable = deliverable[deliverable.Year != 2014]

    # Creating actual + prediction column
    deliverable['Sales weighted WAC actual predictions'] = deliverable['Sales weighted WAC'].fillna(deliverable['Sales weighted WAC predictions'])

    # Creating date column - handier
    deliverable['Date'] = deliverable.Year.astype(str) + '-' + deliverable.Month.astype(str)
    deliverable['Date'] = pd.to_datetime(deliverable.Date)

    # Cleaning order column
    deliverable = deliverable[['Class', 'Date', 'Year', 'Month', 'Y/Y Percent change', '# drugs','# price changes', '# price changes (%)', '# price changes prediction', '# price changes prediction (%)',  'TRx weighted WAC', 'Sales weighted WAC', 'M/M Percent change', 'Sales weighted WAC predictions', 'M/M Percent change predictions']]

    deliverable.to_csv(f'Final deliverable Data/deliverable_{drug_class}.csv', index=False)


In [14]:
deliverable

Unnamed: 0,Class,Date,Year,Month,Y/Y Percent change,# drugs,# price changes,# price changes (%),# price changes prediction,# price changes prediction (%),TRx weighted WAC,Sales weighted WAC,M/M Percent change,Sales weighted WAC predictions,M/M Percent change predictions
5,"VACCINES (PURE, COMB, OTHER)",2015-01-01,2015,1,,42.0,0.0,0.0,0.0,0.0,181.647703,211.979795,0.038649,240.613581,
6,"VACCINES (PURE, COMB, OTHER)",2015-02-01,2015,2,,42.0,0.0,0.0,0.0,0.0,182.768568,217.957136,0.028198,240.026982,-0.002438
7,"VACCINES (PURE, COMB, OTHER)",2015-03-01,2015,3,,42.0,3.0,0.071429,2.0,0.046875,196.235034,231.375602,0.061565,234.83032,-0.02165
8,"VACCINES (PURE, COMB, OTHER)",2015-04-01,2015,4,,42.0,0.0,0.0,0.0,0.0,195.698141,231.92488,0.002374,226.819326,-0.034114
9,"VACCINES (PURE, COMB, OTHER)",2015-05-01,2015,5,,42.0,0.0,0.0,0.0,0.0,190.960344,228.86052,-0.013213,216.931874,-0.043592
10,"VACCINES (PURE, COMB, OTHER)",2015-06-01,2015,6,,42.0,2.0,0.047619,2.0,0.046154,188.190005,226.286949,-0.011245,216.657028,-0.001267
11,"VACCINES (PURE, COMB, OTHER)",2015-07-01,2015,7,,42.0,0.0,0.0,0.0,0.0,184.242014,224.009493,-0.010064,212.067283,-0.021184
12,"VACCINES (PURE, COMB, OTHER)",2015-08-01,2015,8,0.090608,42.0,0.0,0.0,0.0,0.0,188.442203,230.133032,0.027336,225.14017,0.061645
13,"VACCINES (PURE, COMB, OTHER)",2015-09-01,2015,9,0.102192,42.0,0.0,0.0,0.0,0.0,225.205772,259.633697,0.12819,262.472435,0.165818
14,"VACCINES (PURE, COMB, OTHER)",2015-10-01,2015,10,0.106534,42.0,13.0,0.309524,16.0,0.371429,240.232997,268.674221,0.03482,274.78065,0.046893
