# Computing percent changes for each drug per month

In [1]:
# Importing basic libraries
import pandas as pd
import numpy as np
import time
import datetime

# Plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(12,10)})
# sns.set_style('white')
plt.style.use('fivethirtyeight')
from tqdm import tqdm


### Data

In [5]:
df = pd.read_csv('../Processed_Data/merge_with_iqvia_2017_01_2020_09_monthly.csv')
df = df[['NDC', 'Product','Major Class', 'Acute/Chronic', 'Prod Form', 'WAC', 'Month', 'Year', 'TRx']]
df = df.dropna(subset=['WAC'])
df['Date'] = df.Year.astype(str) + '_' + df.Month.astype(str).apply(lambda x: '0' + x if int(x)<10 else x)
df.head()

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,WAC,Month,Year,TRx,Date
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,12,2017,240,2017_12
1,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,11,2017,258,2017_11
2,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,10,2017,175,2017_10
3,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,9,2017,190,2017_09
4,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,8,2017,176,2017_08


In [9]:
df_unique = df.drop_duplicates(subset=['Product', 'Month', 'Year']).sort_values(['Product', 'Date']).reset_index(drop=True)

drugs = df_unique.Product.unique()
df_pct = pd.DataFrame(columns=df_unique.columns)
for drug in tqdm(drugs):
    df_temp = df_unique[df_unique.Product == drug]
    df_temp['Pct_change'] = df_temp.WAC.pct_change()
    df_pct = pd.concat((df_pct, df_temp))
    
df_pct.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_temp['Pct_change'] = df_temp.WAC.pct_change()
100%|██████████| 1491/1491 [00:20<00:00, 72.63it/s]


Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,WAC,Month,Year,TRx,Date,Pct_change
0,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,29.5008,1,2017,0,2017_01,
1,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,29.5008,2,2017,2,2017_02,0.0
2,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,29.5008,3,2017,0,2017_03,0.0
3,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,29.5008,4,2017,0,2017_04,0.0
4,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,29.5008,5,2017,0,2017_05,0.0


In [10]:
# df_pct.to_csv('drugs_pct_changes_TRX_monthly.csv', index=False)

### Percent changes

In [None]:
# Pivoting to get prices per date
df_pivot = df.pivot_table(values=['WAC'], index=['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form'], columns='date')
df_pivot.columns = df_pivot.columns.droplevel(0)
df_pivot = df_pivot.rename_axis(None, axis=1)
df_pivot = df_pivot.reset_index()
df_pivot.head()

In [None]:
# Computing percent changes between two consecutive months  
percent_changes = df_pivot[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']]
prices = df_pivot.drop(labels=percent_changes.columns, axis=1)

for i in range(len(prices.columns) - 1):
    pct_change = (prices.iloc[:, i+1] - prices.iloc[:, i]) / prices.iloc[:, i]
    percent_changes[f'{prices.iloc[:, i+1].name}'] = pct_change
    
percent_changes.head()

In [None]:
# Correcting the different NDC for unique product
percent_changes = pd.merge(percent_changes.groupby(['Product']).mean().reset_index().drop('NDC', axis=1), 
                           percent_changes[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']].drop_duplicates(subset='Product'),
                           left_on='Product', right_on='Product', how='left')
percent_changes = percent_changes[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form'] + list(percent_changes.columns[1: 43])]
percent_changes.head()

In [None]:
# percent_changes.to_csv('drugs_pct_changes_monthly.csv', index=False)

### Creating boolean for positive pct changes

In [None]:
bool_changes = percent_changes
for column in range(5, percent_changes.shape[1]):
    bool_changes.iloc[:, column] = bool_changes.iloc[:, column].apply(lambda x: 1 if x > 0 else 0)
    
bool_changes.head()

In [None]:
# bool_changes.to_csv('drugs_bool_changes_monthly.csv', index=False)