# Computing percent changes for each drug per month

In [None]:
# Importing basic libraries
import pandas as pd
import numpy as np
import time
import datetime

# Plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(12,10)})
plt.style.use('fivethirtyeight')
from tqdm import tqdm

### Loading Data

In [None]:
# Loading the dataset
df = pd.read_csv('../Processed_Data/merge_with_iqvia_2014_01_2020_09_monthly_V3.csv')


df = df[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form', 'Pack Size', 
         'Pack Quantity', 'WAC', 'Month', 'Year', 'TRx']]


df = df.dropna(subset=['WAC'])


df['Date'] = df.Year.astype(str) + '_' + df.Month.astype(str).apply(lambda x: '0' + x if int(x)<10 else x)
df.head()

### 1. Creating percent changes per month for each drug

Need to check when dropping duplicates. Some drugs have same name, same form, but **different NDC and different prices!**

In [None]:
# Sorting values
df = df.sort_values(['NDC', 'Date']).reset_index(drop=True)

# List of unique NDC 
NDCs = df.NDC.unique()

# Empty Dataframe to store new data
df_pct = pd.DataFrame(columns=df.columns)

# For each drug, compute pct change and append to dataframe
for NDC in tqdm(NDCs):
    df_temp = df[df.NDC == NDC].copy()
    
    df_temp['Pct_change'] = df_temp.WAC.pct_change()
    
    df_pct = pd.concat((df_pct, df_temp))

print("Shape: ", df_pct.shape)
df_pct.head(10)

### 2. Adding boolean to check if price has increased

In [None]:
df_pct['changed'] = df_pct.Pct_change.apply(lambda x: 1 if (x>0 or x<0) else 0)
df_pct.head(20)

In [None]:
df_pct[df_pct['Major Class'] == 'A05A1 CHOLERETICS+CHOLEKINETIC']

### 3. Saving

In [None]:
# Saving dataset
df_pct.to_csv('drugs_pct_changes_TRX_monthly.csv', index=False)

## Another method

In [None]:
# Pivoting to get WAC per date for each drug in a row
df_pivot = df.pivot_table(values=['WAC'], index=['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form'], columns='Date')
df_pivot.columns = df_pivot.columns.droplevel(0)
df_pivot = df_pivot.rename_axis(None, axis=1)
df_pivot = df_pivot.reset_index()
df_pivot.head()

In [None]:
# Computing percent changes between two consecutive months  
percent_changes = df_pivot[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']].copy()
prices = df_pivot.drop(labels=percent_changes.columns, axis=1).copy()

# For each column compute percent change
for i in range(len(prices.columns) - 1):
    pct_change = (prices.iloc[:, i+1] - prices.iloc[:, i]) / prices.iloc[:, i]
    percent_changes[f'{prices.iloc[:, i+1].name}'] = pct_change

percent_changes.head()

Be careful when computing the average pct_change for the different drugs,  Some drugs have same name, same form, but **different NDC and different prices!**

In [None]:
# Averaging Percent change over all duplicated druges (multiple NDC for same drug)
percent_changes = pd.merge(percent_changes[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']].drop_duplicates(subset='Product'),
                           percent_changes.groupby(['Product']).mean().reset_index().drop('NDC', axis=1),
                           left_on='Product', right_on='Product', how='left')
percent_changes.head()

In [None]:
# percent_changes.to_csv('drugs_pct_changes_monthly.csv', index=False)

### Creating boolean for positive pct changes

In [None]:
bool_changes = percent_changes
for column in range(5, percent_changes.shape[1]):
    bool_changes.iloc[:, column] = bool_changes.iloc[:, column].apply(lambda x: 1 if x > 0 else 0)
    
bool_changes.head()

In [None]:
# bool_changes.to_csv('drugs_bool_changes_monthly.csv', index=False)

# The issue

In [None]:
df[(df['Product'] == 'ZYVOX 04/2000 PFZ') & (df['Prod Form'] == 'ORALS') & (df['Date'] == '2017_02')]