Loading and Processing The Data

In [1]:
import pandas as pd
warf = pd.read_csv('data/warfarin.csv')

In [2]:
warf = warf.fillna('Unknown')

In [3]:
warf = warf.loc[warf['Age'] != 'Unknown']
warf = warf.loc[warf['Height (cm)'] != 'Unknown']
warf = warf.loc[warf['Weight (kg)'] != 'Unknown']
warf = warf.loc[warf['Carbamazepine (Tegretol)'] != 'Unknown']
warf = warf.loc[warf['Phenytoin (Dilantin)'] != 'Unknown']
warf = warf.loc[warf['Rifampin or Rifampicin'] != 'Unknown']
warf = warf.loc[warf['Amiodarone (Cordarone)'] != 'Unknown']

In [4]:
warf['Carbamazepine (Tegretol)'] = warf['Carbamazepine (Tegretol)'].map({1.0: True, 0.0: False})
warf['Phenytoin (Dilantin)'] = warf['Phenytoin (Dilantin)'].map({1.0: True, 0.0:False})
warf['Rifampin or Rifampicin'] = warf['Rifampin or Rifampicin'].map({1.0: True, 0.0:False})

In [5]:
cols = ['Cyp2C9 genotypes', 'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']
warf = pd.get_dummies(warf, prefix = cols, columns=cols)

In [6]:
warf['Age'] = warf['Age'].map({'10 - 19': 1, '20 - 29': 2, '30 - 39': 3, '40 - 49': 4, '50 - 59': 5, '60 - 69': 6, '70 - 79': 7, '80 - 89' : 8, '90+' : 9})

In [7]:
warf = pd.get_dummies(warf, prefix=['Race'],columns=['Race'])

In [8]:
warf['Enzyme inducer status'] = warf["Carbamazepine (Tegretol)"] & warf["Phenytoin (Dilantin)"] & warf["Rifampin or Rifampicin"]
warf['Enzyme inducer status'].value_counts()

False    2122
Name: Enzyme inducer status, dtype: int64

Using Pharmacogenetic Dosing Algorithm to Predict Dosage

In [9]:
warf['Pharmacogenetic Dose'] = \
5.6044 \
- 0.2614*warf['Age']\
+ 0.0087*warf['Height (cm)']\
+ 0.0128*warf['Weight (kg)']\
- 0.1092*warf['Race_Asian']\
- 0.2760*warf['Race_Black or African American']\
- 0.1032*warf['Race_Unknown']\
+ 1.1816*warf['Enzyme inducer status']\
-   0.5503*warf['Amiodarone (Cordarone)']\
- 0.5211*warf['Cyp2C9 genotypes_*1/*2']\
- 0.9357*warf['Cyp2C9 genotypes_*1/*3']\
- 1.0616*warf['Cyp2C9 genotypes_*2/*2']\
- 1.9206*warf['Cyp2C9 genotypes_*2/*3']\
- 2.3312*warf['Cyp2C9 genotypes_*3/*3']\
- 0.2188*warf['Cyp2C9 genotypes_Unknown']\
- 1.6974*warf['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T_A/A']\
- 0.8677*warf['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T_A/G']\
- 0.4854*warf['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T_Unknown']
warf['Pharmacogenetic Dose'] =  warf['Pharmacogenetic Dose']*warf['Pharmacogenetic Dose']

In [10]:
bins = pd.IntervalIndex.from_tuples([(0, 20.9999), (20.9999, 49), (49, 20000)])
warf["Therapeutic Dose of Warfarin"] = pd.cut(warf["Therapeutic Dose of Warfarin"], bins)
warf['Pharmacogenetic Dose'] = pd.cut(warf['Pharmacogenetic Dose'], bins)

In [11]:
warf['Correct'] = warf['Pharmacogenetic Dose'] == warf["Therapeutic Dose of Warfarin"]
warf['Correct'].value_counts()

True     1459
False     663
Name: Correct, dtype: int64

Accuracy

In [13]:
print("Accuracy: ", 1459/(1459+663))

Accuracy:  0.6875589066918002
