In [1]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold, cross_val_score

In [2]:
df = pd.read_csv("drug.csv")

df.head(3)

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC


In [3]:
label_encoder = LabelEncoder()

categorical_features = [feature for feature in df.columns if df[feature].dtypes == 'O']

for feature in categorical_features:
    df[feature]=label_encoder.fit_transform(df[feature])

In [4]:
X = df.drop("Drug", axis=1)
y = df["Drug"]

In [5]:
model = DecisionTreeClassifier(criterion="entropy")
model.fit(X, y)

In [6]:
kfold = KFold(random_state=42, shuffle=True)
cv_results = cross_val_score(model, X, y, cv=kfold, scoring="accuracy")
print(cv_results.mean(), cv_results.std())

0.99 0.012247448713915901


In [7]:
import pickle

pickle_file = open('model.pkl', 'ab')
pickle.dump(model, pickle_file)                     
pickle_file.close()

In [11]:
label_encoder = LabelEncoder()

categorical_features = [feature for feature in df.columns if df[feature].dtypes == 'O']
for feature in categorical_features:
    print(feature, list(df[feature].unique()), list(label_encoder.fit_transform(df[feature].unique())), "\n")

In [1]:
# set dictionaries to map the text-like values into their encoded equivalents and then develop a simple function to make an individual predictions
import pickle

gender_map = {"F": 0, "M": 1}
bp_map = {"HIGH": 0, "LOW": 1, "NORMAL": 2}
cholestol_map = {"HIGH": 0, "NORMAL": 1}
drug_map = {0: "DrugY", 3: "drugC", 4: "drugX", 1: "drugA", 2: "drugB"}

def predict_drug(Age, 
                 Sex, 
                 BP, 
                 Cholesterol, 
                 Na_to_K):

    # 1. Read the machine learning model from its saved state ...
    pickle_file = open('model.pkl', 'rb')     
    model = pickle.load(pickle_file)
    
    # 2. Transform the "raw data" passed into the function to the encoded / numerical values using the maps / dictionaries
    Sex = gender_map[Sex]
    BP = bp_map[BP]
    Cholesterol = cholestol_map[Cholesterol]

    # 3. Make an individual prediction for this set of data
    y_predict = model.predict([[Age, Sex, BP, Cholesterol, Na_to_K]])[0]

    # 4. Return the "raw" version of the prediction i.e. the actual name of the drug rather than the numerical encoded version
    return drug_map[y_predict] 

In [3]:
# Suppress the warnings for cleaner output
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

print(predict_drug(47, "F", "LOW",  "HIGH", 14))

print(predict_drug(60, "F", "LOW",  "HIGH", 20))

drugC
DrugY
