# Deliverable Notebook

In [3]:
import pandas as pd
import re
pd.set_option('display.max_rows', None)

Drug Classes:
- ANTIDIABETICS
- PAIN
- IMMUNOLOGY
- MENTAL HEALTH
- ADHD
- OTHER CNS
- OSTEOPOROSIS
- ERECTILE DYSFUNCTION
- ONCOLOGICS
- ANTICOAGULANTS
- VIRAL HEPATITIS
- IMMUNOSUPPRESSANTS
- HIV ANTIVIRALS
- ANTIBACTERIALS
- LIPID REGULATORS
- ANTIVIRALS, HERPES
- NERVOUS SYSTEM DISORDERS
- ANTI-OBESITY PREPS, EXCL. DIETETICS
- COUGH COLD, INCL FLU ANTIVIRALS
- VACCINES (PURE, COMB, OTHER)
- ANTIHYPERTENSIVES, PLAIN & COMBO
- ANTI-PARASITICS, ANTIMALARIALS, INSECTICIDES
- DERMATOLOGICS
- BPH (BENIGN PROSTATIC HYPERTROPHY)
- CANCER DETOX AG, ANTI-NAUSEANTS
- RESPIRATORY AGENTS
- SPEC IMMUNOGLOBULINS & ANTITOXIC SERA
- MIOTICS+ANTIGLAUCOMA PREPS
- SYST ANTIFUNGALS
- DIAGNOSTIC EQUIP & ACCESSORIES & ALLERGEN TESTS
- THROMBOPOIETIN AGONISTS
- ANTI-ULCERANTS
- SEX HORMONES (ANDROGENS, OESTROGENS, PROGESTOGENS,
- ANTITUBERCULARS
- OTHER CARDIOVASCULARS
- CORTICOSTEROIDS, PLAIN & COMBO
- HORMONAL CONTRACEPTION, SYSTEMIC&TOPICAL
- GENITO-URINARY ANTIBACTERIALS & ANTISEPTICS & ANTI
- URINARY INCONTINENCE
- HYPOTHALAMIC HORMONES
- GI PRODUCTS
- BLOOD COAGULATION
- OPHTHALMOLOGY, GENERAL
- ANTIGOUT PREPS
- HEMATOPOIETIC GROWTH FACTORS
- NASAL PREPS, TOPICAL
- ANTI-INFLAMMATORY AND DIGESTIVE ENZYMES AND MODULA
- ALLERGY, SYSTEMIC & NASAL
- ALL OTHER THERAPEUTICS
- J07D6 TUBERCULOSIS VACCINES
- EAR TREATMENTS&EYE-EAR COMBOS
- ERYTHROPOIETINS
- MULTIPLE SCLEROSIS
- THYROID ANTI-THYROID AND IODINE PREPS
- OTHER HORMONES
- ANTI-ALZHEIMERS
- BISPHOSPHONATES TUMOR-RELATED & BONY METASTASES
- OCULAR ANTINEOVASCULARISATION
- INTERFERONS, EXCL MS MARKET
- HOSPITAL SOLUTIONS
- ALL OTHER UROLOGICAL
- IMAGING
- OTHER ALIMENTARY TRACT AND METABOLISM PRODUCTS
- ANTI-ANAEMICS, IRON AND ALL COMBINATIONS
- ALL OTHER RESPIRATORY
- POLYVAL IMMUNOGLOBLULINS IV&IM
- GROWTH HORMONES
- LABOUR INDUCERS
- VITAMINS & MINERALS
- OTHER WOMEN'S HEALTH
- OTHER HAEMATOLOGICALS
- A05A1 CHOLERETICS+CHOLEKINETIC
- DIETETICS
- RESP ANTIVIRALS EX FLU
- A.C.T.H.

In [4]:
drug_class = 'VACCINES (PURE, COMB, OTHER)'

In [7]:
# Retrieving all data about the drug class
data = pd.read_csv('../Processed_Data/drugs_pct_changes_monthly.csv')
data = data[data['Major Class'] == drug_class]
data.Date = pd.to_datetime(data.Date, format='%Y_%m')

# Only NDCs that have full period
last_period = data[(data.Year == 2020) & (data.Month == 7)].NDC.unique()
first_period = data[(data.Year == 2014) & (data.Month == 8)].NDC.unique()
selected_NDC = list(set(last_period).intersection(first_period)) 
data = data[data.NDC.isin(selected_NDC)]

# Retrieving year over year percent change data
yy_data = pd.read_csv('../Year Over Year/df_yearoveryear_class.csv')
yy_data.Date = pd.to_datetime(yy_data.Date, format='%Y_%m')

# Retrieving computed predictions
prophet_1 = pd.read_csv(f'../Predictive Models/Results/Method 1/{drug_class}.csv')
prophet_1 = prophet_1.rename(columns={'Unnamed: 0':'date', 'Number':'# price changes prediction', 'Percentage of drugs change price':'Percent # price changes prediction'})
prophet_1.date = pd.to_datetime(prophet_1.date)

prophet_2 = pd.read_csv(f'../Predictive Models/Results/Method 2/{drug_class}.csv')
prophet_2.date = pd.to_datetime(prophet_2.date)

In [8]:
# Extracting number of unique drug per month
nb_drugs = data.groupby('Date')['NDC'].nunique().to_frame(name='# drugs').reset_index().rename(columns={'Date': 'date'})

# Extracting year over year percent change in Sales weighted WAC
yy_change = yy_data[yy_data['Major Class'] == drug_class][['Date', 'Class_wght_pct_change_y/y']].reset_index(drop=True).rename(columns={'Date': 'date', 'Class_wght_pct_change_y/y': 'Y/Y Percent change'})

# Extracting number of changes per month
nb_price_change = data.groupby('Date')['Changed'].sum().to_frame(name='# price changes').reset_index().rename(columns={'Date': 'date'})

# Helper function
def weigthed_average(data, quantity, weights):
    try:
        return (data[quantity] * data[weights]).sum() / data[weights].sum()
    except ZeroDivisionError:
        return data[quantity]    
    
# Extracting Sales weighted WAC per month
data['Sales'] = data['WAC'] * data['TRx']
sales_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'Sales').to_frame(name = 'Sales weighted WAC').reset_index().rename(columns={'Date': 'date'})

# Extracting TRx weighted WAC per month
trx_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'TRx').to_frame(name = 'TRx weighted WAC').reset_index().rename(columns={'Date': 'date'})

# Extracting Prophet 2 predictions
prophet_2 = prophet_2[['date', 'yhat']].rename(columns={'yhat': 'Sales weighted WAC predictions'})

# Merging all and formatting
deliverable = nb_drugs.merge(yy_change, how='outer', on='date')
deliverable = deliverable.merge(nb_price_change, how='outer', on='date')
deliverable = deliverable.merge(sales_wac, how='outer', on='date')
deliverable = deliverable.merge(trx_wac, how='outer', on='date')
deliverable = deliverable.merge(prophet_1, how='outer', on='date')
deliverable = deliverable.merge(prophet_2, how='outer', on='date')

# Formatting date
deliverable['Year'] = deliverable.date.dt.year
deliverable['Month'] = deliverable.date.dt.month
deliverable.drop('date', axis=1, inplace=True)

# Creating handy class column
deliverable['Class'] = drug_class

# Computing percentage nb of drugs that have price change
deliverable['Percent # price changes'] = deliverable['# price changes'] / deliverable['# drugs']

deliverable = deliverable[['Class', 'Year', 'Month', 'Y/Y Percent change', '# drugs','# price changes', '# price changes prediction', 'Percent # price changes', 'Percent # price changes prediction',  'TRx weighted WAC', 'Sales weighted WAC', 'Sales weighted WAC predictions']]
deliverable = deliverable[deliverable.Year != 2014]

In [9]:
deliverable

Unnamed: 0,Class,Year,Month,Y/Y Percent change,# drugs,# price changes,# price changes prediction,Percent # price changes,Percent # price changes prediction,TRx weighted WAC,Sales weighted WAC,Sales weighted WAC predictions
5,"VACCINES (PURE, COMB, OTHER)",2015,1,,42.0,0.0,0.0,0.0,0.0,181.647703,211.979795,240.613581
6,"VACCINES (PURE, COMB, OTHER)",2015,2,,42.0,0.0,0.0,0.0,0.0,182.768568,217.957136,240.026982
7,"VACCINES (PURE, COMB, OTHER)",2015,3,,42.0,3.0,2.0,0.071429,0.046875,196.235034,231.375602,234.83032
8,"VACCINES (PURE, COMB, OTHER)",2015,4,,42.0,0.0,0.0,0.0,0.0,195.698141,231.92488,226.819326
9,"VACCINES (PURE, COMB, OTHER)",2015,5,,42.0,0.0,0.0,0.0,0.0,190.960344,228.86052,216.931874
10,"VACCINES (PURE, COMB, OTHER)",2015,6,,42.0,2.0,2.0,0.047619,0.046154,188.190005,226.286949,216.657028
11,"VACCINES (PURE, COMB, OTHER)",2015,7,,42.0,0.0,0.0,0.0,0.0,184.242014,224.009493,212.067283
12,"VACCINES (PURE, COMB, OTHER)",2015,8,0.090608,42.0,0.0,0.0,0.0,0.0,188.442203,230.133032,225.14017
13,"VACCINES (PURE, COMB, OTHER)",2015,9,0.102192,42.0,0.0,0.0,0.0,0.0,225.205772,259.633697,262.472435
14,"VACCINES (PURE, COMB, OTHER)",2015,10,0.106534,42.0,13.0,16.0,0.309524,0.371429,240.232997,268.674221,274.78065


In [10]:
deliverable.to_csv(f'deliverable_{drug_class}.csv', index=False)