# Computing percent changes month to month

In [1]:
# Importing basic libraries
import pandas as pd
import numpy as np
import time
import datetime

# Importing plotting libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(rc={'figure.figsize':(12,10)})
plt.style.use('fivethirtyeight')
from tqdm import tqdm

## Loading Data

In [4]:
# Loading the dataset
df = pd.read_csv('../Processed_Data/merge_with_iqvia_2014_01_2020_09_monthly_V3.csv')

# Extracting only useful columns
df = df[['NDC', 'Product', 'Major Class', 'Acute/Chronic', 'Prod Form', 'Pack Size', 
         'Pack Quantity', 'WAC', 'Month', 'Year', 'TRx']]

# Droping all records for which we do not have any price data (WAC)
df = df.dropna(subset=['WAC'])

# Creating a YYYY-MM column to make things easier
df['Date'] = df.Year.astype(str) + '_' + df.Month.astype(str).apply(lambda x: '0' + x if int(x)<10 else x)
print("Shape:", df.shape)
df.head()

Shape: (272280, 12)


Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,Pack Size,Pack Quantity,WAC,Month,Year,TRx,Date
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,10,2014,,2014_10
1,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,11,2014,6.0,2014_11
2,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,12,2014,22.0,2014_12
3,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,1,2015,21.0,2015_01
4,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,2,2015,15.0,2015_02


## 1. Creating percent changes month to month for each drug

In [3]:
# Sorting by drug identifier (NDC) and date
df = df.sort_values(['NDC', 'Date']).reset_index(drop=True)

# Retrieving List of unique drugs 
NDCs = df.NDC.unique()

# Empty Dataframe to store new data
df_pct = pd.DataFrame(columns=df.columns)

# For each drug, compute pct change month to month and append to dataframe
for NDC in tqdm(NDCs):
    
    # Extracting drug
    df_temp = df[df.NDC == NDC].copy()
    
    # Already sorted by date so we can use pct_change() method
    df_temp['Pct_change'] = df_temp.WAC.pct_change()
    
    # Appending to dataframe
    df_pct = pd.concat((df_pct, df_temp))

print("Shape: ", df_pct.shape)
df_pct.head(10)

100%|██████████| 4633/4633 [04:44<00:00, 16.29it/s]

Shape:  (272280, 13)





Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,Pack Size,Pack Quantity,WAC,Month,Year,TRx,Date,Pct_change
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,10,2014,,2014_10,
1,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,11,2014,6.0,2014_11,0.0
2,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,12,2014,22.0,2014_12,0.0
3,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,1,2015,21.0,2015_01,0.0
4,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,2,2015,15.0,2015_02,0.0
5,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,3,2015,33.0,2015_03,0.0
6,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,4,2015,47.0,2015_04,0.0
7,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,5,2015,36.0,2015_05,0.08904
8,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,6,2015,54.0,2015_06,0.0
9,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,7,2015,55.0,2015_07,0.0


## 2. Adding boolean: wether or not the price did change from month to month

In [5]:
df_pct['Changed'] = df_pct.Pct_change.apply(lambda x: 1 if (x>0 or x<0) else 0)
df_pct.head(10)

Unnamed: 0,NDC,Product,Major Class,Acute/Chronic,Prod Form,Pack Size,Pack Quantity,WAC,Month,Year,TRx,Date,Pct_change,Changed
0,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,10,2014,,2014_10,,0
1,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,11,2014,6.0,2014_11,0.0,0
2,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,12,2014,22.0,2014_12,0.0,0
3,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,1,2015,21.0,2015_01,0.0,0
4,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,2,2015,15.0,2015_02,0.0,0
5,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,3,2015,33.0,2015_03,0.0,0
6,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,244.16,4,2015,47.0,2015_04,0.0,0
7,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,5,2015,36.0,2015_05,0.08904,1
8,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,6,2015,54.0,2015_06,0.0,0
9,2143301,TRULICITY 10/2014 LLY,ANTIDIABETICS,CHRONIC,INJECTABLES,1,0.5,265.9,7,2015,55.0,2015_07,0.0,0


## Saving

In [7]:
# Saving dataset
# df_pct.to_csv('drugs_pct_changes_monthly.csv', index=False)