## Clinical Dosage Baseline



In [1]:
import pandas as pd
warf = pd.read_csv('../data/warfarin.csv')

Filter out Unknown entries

In [2]:
warf = warf.fillna('Unknown')

In [3]:
warf = warf.loc[warf['Age'] != 'Unknown']
warf = warf.loc[warf['Height (cm)'] != 'Unknown']
warf = warf.loc[warf['Weight (kg)'] != 'Unknown']
warf = warf.loc[warf['Carbamazepine (Tegretol)'] != 'Unknown']
warf = warf.loc[warf['Phenytoin (Dilantin)'] != 'Unknown']
warf = warf.loc[warf['Rifampin or Rifampicin'] != 'Unknown']
warf = warf.loc[warf['Amiodarone (Cordarone)'] != 'Unknown']

In [4]:
warf['Carbamazepine (Tegretol)'] = warf['Carbamazepine (Tegretol)'].map({1.0: True, 0.0: False})
warf['Phenytoin (Dilantin)'] = warf['Phenytoin (Dilantin)'].map({1.0: True, 0.0:False})
warf['Rifampin or Rifampicin'] = warf['Rifampin or Rifampicin'].map({1.0: True, 0.0:False})

Convert ages to decades

In [5]:
warf['Age'] = warf['Age'].map({'10 - 19': 1, '20 - 29': 2, '30 - 39': 3, '40 - 49': 4, '50 - 59': 5, '60 - 69': 6, '70 - 79': 7, '80 - 89' : 8, '90+' : 9})

Convert Race to One-Hots

In [6]:
warf = pd.get_dummies(warf, prefix=['Race'],columns=['Race'])

Get Enzyme Inducer Status

In [7]:
warf['Enzyme inducer status'] = warf["Carbamazepine (Tegretol)"] & warf["Phenytoin (Dilantin)"] & warf["Rifampin or Rifampicin"]
warf['Enzyme inducer status'].value_counts()

False    2122
Name: Enzyme inducer status, dtype: int64

Calculate Clinical Dose

In [8]:
warf['Clinical Dose'] = 4.0376 - 0.2546*warf['Age'] + 0.0118*warf['Height (cm)'] + 0.0134*warf['Weight (kg)'] - 0.6752*warf['Race_Asian'] + 0.4060*warf['Race_Black or African American'] + 0.0443*warf['Race_Unknown'] + 1.2799*warf['Enzyme inducer status'] - 0.5695*warf['Amiodarone (Cordarone)']
warf['Clinical Dose'] =  warf['Clinical Dose']*warf['Clinical Dose']

Convert everything to the 0-21, 21-49 and 49-200 ranges

In [9]:
bins = pd.IntervalIndex.from_tuples([(0, 20.9999), (20.9999, 49), (49, 20000)])
warf["Therapeutic Dose of Warfarin"] = pd.cut(warf["Therapeutic Dose of Warfarin"], bins)
warf['Clinical Dose'] = pd.cut(warf['Clinical Dose'], bins)

In [10]:
warf

Unnamed: 0,PharmGKB Subject ID,Gender,Ethnicity,Age,Height (cm),Weight (kg),Indication for Warfarin Treatment,Comorbidities,Diabetes,Congestive Heart Failure and/or Cardiomyopathy,...,VKORC1 -4451 consensus,Unnamed: 63,Unnamed: 64,Unnamed: 65,Race_Asian,Race_Black or African American,Race_Unknown,Race_White,Enzyme inducer status,Clinical Dose
432,PA126718538,female,not Hispanic or Latino,6,168.58,83.6,7,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
438,PA126718583,female,not Hispanic or Latino,5,153.49,70.7,1,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
445,PA126718400,female,not Hispanic or Latino,2,163.55,88.6,1,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
446,PA126718415,male,not Hispanic or Latino,4,183.67,123,1,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
449,PA126718534,male,not Hispanic or Latino,6,169.98,81.3,1,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
456,PA151378990,female,not Hispanic or Latino,6,166.07,97.5,1,Hyperlipidemia,Unknown,Unknown,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
457,PA151378992,male,not Hispanic or Latino,3,176.13,88.2,1,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,A/C,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
458,PA126718511,female,not Hispanic or Latino,5,169.85,123.4,2,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"
460,PA151378986,male,not Hispanic or Latino,3,183.67,166.9,2,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,C/C,Unknown,Unknown,Unknown,0,1,0,0,False,"(49.0, 20000.0]"
470,PA126718498,female,not Hispanic or Latino,7,176.12,72.3,3,No Cardiomyopathy; No Diabetes; No Hyperlipide...,0,0,...,Unknown,Unknown,Unknown,Unknown,0,0,0,1,False,"(20.9999, 49.0]"


Calculate number of correct predictions

In [11]:
warf['Correct'] = warf['Clinical Dose'] == warf["Therapeutic Dose of Warfarin"]
warf['Correct'].value_counts()

True     1403
False     719
Name: Correct, dtype: int64

In [13]:
1403/(1403+719)

0.6611687087653157