# Computing percent changes for each drug per month

In [36]:
# Importing basic libraries
import pandas as pd
import numpy as np
import time
import datetime

# Plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(12,10)})
# sns.set_style('white')
plt.style.use('fivethirtyeight')
from tqdm import tqdm


# Data

In [37]:
df = pd.read_csv('../../Data/Processed_data/merge_with_iqvia_2017_01_2020_09_monthly.csv')
df = df[['NDC', 'Product','Major Class', 'Acute/Chronic', 'Prod Form', 'WAC', 'Month', 'Year', 'TRx']]
df = df.dropna(subset=['WAC'])
df['date'] = df.Year.astype(str) + '_' + df.Month.astype(str).apply(lambda x: '0' + x if int(x)<10 else x)
df.head()

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,WAC,Month,Year,TRx,date
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,12,2017,240,2017_12
1,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,11,2017,258,2017_11
2,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,10,2017,175,2017_10
3,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,9,2017,190,2017_09
4,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,338.0,8,2017,176,2017_08


# Percent changes

In [38]:
# Pivoting to get prices per date
df_pivot = df.pivot_table(values=['WAC'], index=['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form'], columns='date')
df_pivot.columns = df_pivot.columns.droplevel(0)
df_pivot = df_pivot.rename_axis(None, axis=1)
df_pivot = df_pivot.reset_index()
df_pivot.head()

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,2017_01,2017_02,2017_03,2017_04,2017_05,...,2019_10,2019_11,2019_12,2020_01,2020_02,2020_03,2020_04,2020_05,2020_06,2020_07
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,313.0,313.0,313.0,313.0,313.0,...,379.7,379.7,379.7,379.7,379.7,398.65,398.65,398.65,398.65,398.65
1,2143380,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,313.0,313.0,313.0,313.0,313.0,...,379.7,379.7,379.7,379.7,379.7,398.65,398.65,398.65,398.65,398.65
2,2143401,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,313.0,313.0,313.0,313.0,313.0,...,379.7,379.7,379.7,379.7,379.7,398.65,398.65,398.65,398.65,398.65
3,2143480,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,313.0,313.0,313.0,313.0,313.0,...,379.7,379.7,379.7,379.7,379.7,398.65,398.65,398.65,398.65,398.65
4,2143601,EMGALITY 10/2018 LLY,PAIN,ACUTE,INJECTABLES,,,,,,...,575.0,575.0,575.0,575.0,575.0,603.6,603.6,603.6,603.6,603.6


In [39]:
# Computing percent changes between two consecutive months  
percent_changes = df_pivot[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']]
prices = df_pivot.drop(labels=percent_changes.columns, axis=1)

for i in range(len(prices.columns) - 1):
    pct_change = (prices.iloc[:, i+1] - prices.iloc[:, i]) / prices.iloc[:, i]
    percent_changes[f'{prices.iloc[:, i+1].name}'] = pct_change
    
percent_changes.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  percent_changes[f'{prices.iloc[:, i+1].name}'] = pct_change


Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,2017_02,2017_03,2017_04,2017_05,2017_06,...,2019_10,2019_11,2019_12,2020_01,2020_02,2020_03,2020_04,2020_05,2020_06,2020_07
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,0.0,0.0,0.0,0.0,0.079872,...,0.0,0.0,0.0,0.0,0.0,0.049908,0.0,0.0,0.0,0.0
1,2143380,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,0.0,0.0,0.0,0.0,0.079872,...,0.0,0.0,0.0,0.0,0.0,0.049908,0.0,0.0,0.0,0.0
2,2143401,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,0.0,0.0,0.0,0.0,0.079872,...,0.0,0.0,0.0,0.0,0.0,0.049908,0.0,0.0,0.0,0.0
3,2143480,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,0.0,0.0,0.0,0.0,0.079872,...,0.0,0.0,0.0,0.0,0.0,0.049908,0.0,0.0,0.0,0.0
4,2143601,EMGALITY 10/2018 LLY,PAIN,ACUTE,INJECTABLES,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.049739,0.0,0.0,0.0,0.0


In [40]:
# Correcting the different NDC for unique product
percent_changes = pd.merge(percent_changes.groupby(['Product']).mean().reset_index().drop('NDC', axis=1), 
                           percent_changes[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form']].drop_duplicates(subset='Product'),
                           left_on='Product', right_on='Product', how='left')
percent_changes = percent_changes[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form'] + list(percent_changes.columns[1: 43])]
percent_changes

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,2017_02,2017_03,2017_04,2017_05,2017_06,...,2019_10,2019_11,2019_12,2020_01,2020_02,2020_03,2020_04,2020_05,2020_06,2020_07
0,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,0.000000,0.000000,0.0,0.000000,,...,,,,,,,,,,
1,59148000613,ABILIFY 11/2002 OTS,MENTAL HEALTH,CHRONIC,ORALS,0.000000,0.000000,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0,0.0,0.0
2,59148001871,ABILIFY MAINTENA 03/2013 OTS,MENTAL HEALTH,CHRONIC,INJECTABLES,0.044999,0.000000,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0,0.0,0.0
3,59148003085,ABILIFY MYCITE 03/2019 OTS,MENTAL HEALTH,CHRONIC,ORALS,,,,,,...,,,,,,0.000000,0.0,0.0,0.0,0.0
4,68817013450,ABRAXANE 02/2005 CC5,ONCOLOGICS,ACUTE,INJECTABLES,0.017503,0.000000,0.0,0.017504,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.050000,0.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1486,2759701,ZYPREXA INTRAMUSCULAR 04/2004 LLY,MENTAL HEALTH,CHRONIC,INJECTABLES,0.000000,0.000000,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.029083,0.0,0.0,0.0,0.0
1487,2763511,ZYPREXA RELPREVV 03/2010 LLY,MENTAL HEALTH,CHRONIC,OTHER SYSTEMICS,0.000000,0.000000,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0,0.0,0.0
1488,2445385,ZYPREXA ZYDIS 09/2000 LLY,MENTAL HEALTH,CHRONIC,ORALS,0.000000,0.000000,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.029047,0.0,0.0,0.0,0.0
1489,57894015012,ZYTIGA 05/2011 JAN,ONCOLOGICS,CHRONIC,ORALS,0.000000,0.088995,0.0,0.000000,0.000000e+00,...,0.0,0.0,0.0,0.00000,0.000000,0.000000,0.0,0.0,0.0,0.0


In [None]:
# percent_changes.to_csv('drugs_pct_changes_monthly.csv', index=False)

## Creating boolean for positive pct changes

In [49]:
bool_changes = percent_changes
for column in range(5, percent_changes.shape[1]):
    bool_changes.iloc[:, column] = bool_changes.iloc[:, column].apply(lambda x: 1 if x > 0 else 0)
    
bool_changes

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,2017_02,2017_03,2017_04,2017_05,2017_06,...,2019_10,2019_11,2019_12,2020_01,2020_02,2020_03,2020_04,2020_05,2020_06,2020_07
0,187065142,8-MOP 01/1991 B.U,DERMATOLOGICS,ACUTE,ORALS,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,59148000613,ABILIFY 11/2002 OTS,MENTAL HEALTH,CHRONIC,ORALS,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,59148001871,ABILIFY MAINTENA 03/2013 OTS,MENTAL HEALTH,CHRONIC,INJECTABLES,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,59148003085,ABILIFY MYCITE 03/2019 OTS,MENTAL HEALTH,CHRONIC,ORALS,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,68817013450,ABRAXANE 02/2005 CC5,ONCOLOGICS,ACUTE,INJECTABLES,1,0,0,1,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1486,2759701,ZYPREXA INTRAMUSCULAR 04/2004 LLY,MENTAL HEALTH,CHRONIC,INJECTABLES,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1487,2763511,ZYPREXA RELPREVV 03/2010 LLY,MENTAL HEALTH,CHRONIC,OTHER SYSTEMICS,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1488,2445385,ZYPREXA ZYDIS 09/2000 LLY,MENTAL HEALTH,CHRONIC,ORALS,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
1489,57894015012,ZYTIGA 05/2011 JAN,ONCOLOGICS,CHRONIC,ORALS,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [50]:
# bool_changes.to_csv('drugs_bool_changes_monthly.csv', index=False)