# Deliverable Notebook

In [7]:
import pandas as pd
import re
pd.set_option('display.max_rows', None)

Drug Classes:
- A.C.T.H.
- A05A1 CHOLERETICS+CHOLEKINETIC
- ADHD
- ALL OTHER RESPIRATORY
- ALL OTHER THERAPEUTICS
- ALL OTHER UROLOGICAL
- ALLERGY, SYSTEMIC & NASAL
- ANTI-ALZHEIMERS
- ANTI-ANAEMICS, IRON AND ALL COMBINATIONS
- ANTI-INFLAMMATORY AND DIGESTIVE ENZYMES AND MODULA
- ANTI-OBESITY PREPS, EXCL. DIETETICS
- ANTI-PARASITICS, ANTIMALARIALS, INSECTICIDES
- ANTI-ULCERANTS
- ANTIBACTERIALS
- ANTICOAGULANTS
- ANTIDIABETICS
- ANTIGOUT PREPS
- ANTIHYPERTENSIVES, PLAIN & COMBO
- ANTITUBERCULARS
- ANTIVIRALS, HERPES
- BISPHOSPHONATES TUMOR-RELATED & BONY METASTASES
- BLOOD COAGULATION
- BPH (BENIGN PROSTATIC HYPERTROPHY)
- CANCER DETOX AG, ANTI-NAUSEANTS
- CORTICOSTEROIDS, PLAIN & COMBO
- COUGH COLD, INCL FLU ANTIVIRALS
- DERMATOLOGICS
- DIAGNOSTIC EQUIP & ACCESSORIES & ALLERGEN TESTS
- DIETETICS
- EAR TREATMENTS&EYE-EAR COMBOS
- ERECTILE DYSFUNCTION
- ERYTHROPOIETINS
- GENITO-URINARY ANTIBACTERIALS & ANTISEPTICS & ANTI
- GI PRODUCTS
- GROWTH HORMONES
- HEMATOPOIETIC GROWTH FACTORS
- HIV ANTIVIRALS
- HORMONAL CONTRACEPTION, SYSTEMIC&TOPICAL
- HOSPITAL SOLUTIONS
- HYPOTHALAMIC HORMONES
- IMAGING
- IMMUNOLOGY
- IMMUNOSUPPRESSANTS
- INTERFERONS, EXCL MS MARKET
- J07D6 TUBERCULOSIS VACCINES
- LABOUR INDUCERS
- LIPID REGULATORS
- MENTAL HEALTH
- MIOTICS+ANTIGLAUCOMA PREPS
- MULTIPLE SCLEROSIS
- NASAL PREPS, TOPICAL
- NERVOUS SYSTEM DISORDERS
- OCULAR ANTINEOVASCULARISATION
- ONCOLOGICS
- OPHTHALMOLOGY, GENERAL
- OSTEOPOROSIS
- OTHER ALIMENTARY TRACT AND METABOLISM PRODUCTS
- OTHER CARDIOVASCULARS
- OTHER CNS
- OTHER HAEMATOLOGICALS
- OTHER HORMONES
- OTHER WOMEN'S HEALTH
- PAIN
- POLYVAL IMMUNOGLOBLULINS IV&IM
- RESP ANTIVIRALS EX FLU
- RESPIRATORY AGENTS
- SEX HORMONES (ANDROGENS, OESTROGENS, PROGESTOGENS,
- SPEC IMMUNOGLOBULINS & ANTITOXIC SERA
- SYST ANTIFUNGALS
- THROMBOPOIETIN AGONISTS
- THYROID ANTI-THYROID AND IODINE PREPS
- URINARY INCONTINENCE
- VACCINES (PURE, COMB, OTHER)
- VIRAL HEPATITIS
- VITAMINS & MINERALS

In [8]:
drug_class = 'ONCOLOGICS'

In [33]:
# Retrieving all data about the drug class
data = pd.read_csv('../Processed_Data/drugs_pct_changes_monthly.csv')
data = data[data['Major Class'] == drug_class]
data.Date = pd.to_datetime(data.Date, format='%Y_%m')

# Only NDCs that have full period
last_period = data[(data.Year == 2020) & (data.Month == 7)].NDC.unique()
first_period = data[(data.Year == 2014) & (data.Month == 8)].NDC.unique()
selected_NDC = list(set(last_period).intersection(first_period)) 
data = data[data.NDC.isin(selected_NDC)]

# Updating NDC that do not have LOE date to a future LOE date
data.loc[data['Estimated LOE Date'] == 'Unspecified', 'Estimated LOE Date'] = 'May-24'

# Getting rid of NDC that are too close to their LOE date
data['Estimated LOE Date'] = pd.to_datetime(data['Estimated LOE Date'], format='%b-%y')
data = data[data['Estimated LOE Date'] >= pd.to_datetime('2020-07-01')]

# Retrieving year over year percent change data
yy_data = pd.read_csv('../Year Over Year/df_yearoveryear_class.csv')
yy_data.Date = pd.to_datetime(yy_data.Date, format='%Y_%m')

# Retrieving computed predictions
prophet_1 = pd.read_csv(f'../Predictive Models/Results/Method 1/{drug_class}.csv')
prophet_1 = prophet_1.rename(columns={'Unnamed: 0':'date', 'Number':'# price changes prediction', 'Percentage of drugs change price':'Percent # price changes prediction'})
prophet_1.date = pd.to_datetime(prophet_1.date)

prophet_2 = pd.read_csv(f'../Predictive Models/Results/Method 2/{drug_class}.csv')
prophet_2.date = pd.to_datetime(prophet_2.date)

In [34]:
# Extracting number of unique drug per month
nb_drugs = data.groupby('Date')['NDC'].nunique().to_frame(name='# drugs').reset_index().rename(columns={'Date': 'date'})

# Extracting year over year percent change in Sales weighted WAC
yy_change = yy_data[yy_data['Major Class'] == drug_class][['Date', 'Class_wght_pct_change_y/y']].reset_index(drop=True).rename(columns={'Date': 'date', 'Class_wght_pct_change_y/y': 'Y/Y Percent change'})

# Extracting number of changes per month
nb_price_change = data.groupby('Date')['Changed'].sum().to_frame(name='# price changes').reset_index().rename(columns={'Date': 'date'})

# Helper function
def weigthed_average(data, quantity, weights):
    try:
        return (data[quantity] * data[weights]).sum() / data[weights].sum()
    except ZeroDivisionError:
        return data[quantity]    
    
# Extracting Sales weighted WAC per month
data['Sales'] = data['WAC'] * data['TRx']
sales_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'Sales').to_frame(name = 'Sales weighted WAC').reset_index().rename(columns={'Date': 'date'})

# Extracting TRx weighted WAC per month
trx_wac = data.groupby('Date').apply(weigthed_average, 'WAC', 'TRx').to_frame(name = 'TRx weighted WAC').reset_index().rename(columns={'Date': 'date'})

# Extracting Prophet 2 predictions
prophet_2 = prophet_2[['date', 'yhat']].rename(columns={'yhat': 'Sales weighted WAC predictions'})

# Merging all and formatting
deliverable = nb_drugs.merge(yy_change, how='outer', on='date')
deliverable = deliverable.merge(nb_price_change, how='outer', on='date')
deliverable = deliverable.merge(sales_wac, how='outer', on='date')
deliverable = deliverable.merge(trx_wac, how='outer', on='date')
deliverable = deliverable.merge(prophet_1, how='outer', on='date')
deliverable = deliverable.merge(prophet_2, how='outer', on='date')

# Formatting date
deliverable['Year'] = deliverable.date.dt.year
deliverable['Month'] = deliverable.date.dt.month
deliverable.drop('date', axis=1, inplace=True)

# Creating handy class column
deliverable['Class'] = drug_class

# Computing percentage nb of drugs that have price change
deliverable['Percent # price changes'] = deliverable['# price changes'] / deliverable['# drugs']

# Cleaning year 2014
deliverable = deliverable[deliverable.Year != 2014]

# Creating actual + prediction column
deliverable['Sales weighted WAC actual predictions'] = deliverable['Sales weighted WAC'].fillna(deliverable['Sales weighted WAC predictions'])

# Cleaning order column
deliverable = deliverable[['Class', 'Year', 'Month', 'Y/Y Percent change', '# drugs','# price changes', '# price changes prediction', 'Percent # price changes', 'Percent # price changes prediction',  'TRx weighted WAC', 'Sales weighted WAC', 'Sales weighted WAC predictions', 'Sales weighted WAC actual predictions']]


In [35]:
deliverable

Unnamed: 0,Class,Year,Month,Y/Y Percent change,# drugs,# price changes,# price changes prediction,Percent # price changes,Percent # price changes prediction,TRx weighted WAC,Sales weighted WAC,Sales weighted WAC predictions,Sales weighted WAC actual predictions
5,ONCOLOGICS,2015,1,,101.0,38.0,3377.0,0.376238,0.387283,418.283366,773.54425,750.069985,773.54425
6,ONCOLOGICS,2015,2,,101.0,1.0,193.0,0.009901,0.022099,415.088713,781.351537,772.333942,781.351537
7,ONCOLOGICS,2015,3,,101.0,18.0,1389.0,0.178218,0.159341,424.112657,777.334335,772.605919,777.334335
8,ONCOLOGICS,2015,4,,101.0,5.0,715.0,0.049505,0.081967,420.885857,782.359787,787.883042,782.359787
9,ONCOLOGICS,2015,5,,101.0,4.0,238.0,0.039604,0.027322,419.627985,765.668346,779.870701,765.668346
10,ONCOLOGICS,2015,6,,101.0,32.0,1585.0,0.316832,0.181818,426.332098,771.678365,783.618877,771.678365
11,ONCOLOGICS,2015,7,,101.0,17.0,1476.0,0.168317,0.169312,414.383324,747.994231,765.327566,747.994231
12,ONCOLOGICS,2015,8,0.092108,101.0,0.0,46.0,0.0,0.005291,418.658213,766.391428,781.166914,766.391428
13,ONCOLOGICS,2015,9,0.099199,101.0,9.0,643.0,0.089109,0.073684,434.048781,795.135125,798.744696,795.135125
14,ONCOLOGICS,2015,10,0.101094,101.0,25.0,1461.0,0.247525,0.167513,445.660378,806.421294,803.6973,806.421294


In [6]:
deliverable.to_csv(f'deliverable_{drug_class}.csv', index=False)