In [1]:
#Load Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
df = pd.read_csv ('./drugdataset.csv')
df

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,1,2,1,25.355,drugY
1,47,0,1,1,13.093,drugC
2,47,0,1,1,10.114,drugC
3,28,1,0,1,7.798,drugX
4,61,1,1,1,18.043,drugY
...,...,...,...,...,...,...
195,56,1,1,1,11.567,drugC
196,16,0,1,1,12.006,drugC
197,52,0,0,1,9.894,drugX
198,23,0,0,0,14.020,drugX


In [3]:
#Check the number of rows and columns
df.shape

(200, 6)

In [4]:
df['Drug'].value_counts()

drugY    91
drugX    54
drugA    23
drugB    16
drugC    16
Name: Drug, dtype: int64

In [5]:
#Check columns in dataset
df.columns

Index(['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K', 'Drug'], dtype='object')

In [6]:
#Check the data types of each column
df.dtypes

Age              int64
Sex              int64
BP               int64
Cholesterol      int64
Na_to_K        float64
Drug            object
dtype: object

In [7]:
#Identify number of Classes (i.e. Drug)
df.Drug.unique()

array(['drugY', 'drugC', 'drugX', 'drugA', 'drugB'], dtype=object)

In [8]:
#Checking for missing values
df.isna().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [9]:
df.describe()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K
count,200.0,200.0,200.0,200.0,200.0
mean,44.315,0.48,1.09,0.515,16.084485
std,16.544315,0.500854,0.821752,0.501029,7.223956
min,15.0,0.0,0.0,0.0,6.269
25%,31.0,0.0,0.0,0.0,10.4455
50%,45.0,0.0,1.0,1.0,13.9365
75%,58.0,1.0,2.0,1.0,19.38
max,74.0,1.0,2.0,1.0,38.247


In [10]:
#Create x and y variables
X = df.drop('Drug',axis=1).to_numpy()
y = df['Drug'].to_numpy()

#Create Train and Test datasets
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y,test_size = 0.2,random_state=100)

#Scale the data
from sklearn.preprocessing import StandardScaler  
sc = StandardScaler()  
x_train2 = sc.fit_transform(X_train)
x_test2 = sc.transform(X_test)

In [11]:
#Script for Logistical Regression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix  

for name,method in [('Logistic Regression', LogisticRegression(solver='liblinear',random_state=100))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict)) 

[[ 5  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  2  0  1]
 [ 0  0  0 10  1]
 [ 0  0  0  1 17]]
              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         5
       drugB       1.00      1.00      1.00         3
       drugC       1.00      0.67      0.80         3
       drugX       0.91      0.91      0.91        11
       drugY       0.89      0.94      0.92        18

    accuracy                           0.93        40
   macro avg       0.96      0.90      0.93        40
weighted avg       0.93      0.93      0.92        40



In [12]:
#Script for Neural Network
from sklearn.neural_network import MLPClassifier  
mlp = MLPClassifier(hidden_layer_sizes=(5,4,5),
                    activation='relu',solver='adam',
                    max_iter=10000,random_state=100)  
mlp.fit(x_train2, y_train) 
predictions = mlp.predict(x_test2) 

#Evaluation Report and Matrix
from sklearn.metrics import classification_report, confusion_matrix  
target_names=['drugA', 'drugB','drugC','drugX','drugY']
print(confusion_matrix(y_test,predictions))  
print(classification_report(y_test,predictions,target_names=target_names)) 

[[ 5  0  0  0  0]
 [ 0  2  0  0  1]
 [ 0  0  3  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  1 17]]
              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         5
       drugB       1.00      0.67      0.80         3
       drugC       1.00      1.00      1.00         3
       drugX       0.92      1.00      0.96        11
       drugY       0.94      0.94      0.94        18

    accuracy                           0.95        40
   macro avg       0.97      0.92      0.94        40
weighted avg       0.95      0.95      0.95        40



In [13]:
#Script for Decision Tree
from sklearn.tree import DecisionTreeClassifier  

for name,method in [('DT', DecisionTreeClassifier(random_state=100))]: 
    method.fit(x_train2,y_train)
    predict = method.predict(x_test2)
    target_names=['drugA', 'drugB','drugC','drugX','drugY']
    print('\nEstimator: {}'.format(name)) 
    print(confusion_matrix(y_test,predict))  
    print(classification_report(y_test,predict,target_names=target_names)) 


Estimator: DT
[[ 4  1  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0 10  1]
 [ 0  0  0  0 18]]
              precision    recall  f1-score   support

       drugA       1.00      0.80      0.89         5
       drugB       0.75      1.00      0.86         3
       drugC       1.00      1.00      1.00         3
       drugX       1.00      0.91      0.95        11
       drugY       0.95      1.00      0.97        18

    accuracy                           0.95        40
   macro avg       0.94      0.94      0.93        40
weighted avg       0.96      0.95      0.95        40



In [14]:
#Script for Neural Network (Change number of neurons)
from sklearn.neural_network import MLPClassifier  
mlp = MLPClassifier(hidden_layer_sizes=(100),
                    activation='relu',solver='adam',
                    max_iter=10000,random_state=100)  
mlp.fit(x_train2, y_train) 
predictions = mlp.predict(x_test2) 

#Evaluation Report and Matrix
from sklearn.metrics import classification_report, confusion_matrix  
target_names=['drugA', 'drugB','drugC','drugX','drugY']
print(confusion_matrix(y_test,predictions))  
print(classification_report(y_test,predictions,target_names=target_names)) 

[[ 5  0  0  0  0]
 [ 0  3  0  0  0]
 [ 0  0  3  0  0]
 [ 0  0  0 11  0]
 [ 0  0  0  1 17]]
              precision    recall  f1-score   support

       drugA       1.00      1.00      1.00         5
       drugB       1.00      1.00      1.00         3
       drugC       1.00      1.00      1.00         3
       drugX       0.92      1.00      0.96        11
       drugY       1.00      0.94      0.97        18

    accuracy                           0.97        40
   macro avg       0.98      0.99      0.99        40
weighted avg       0.98      0.97      0.98        40

