#Build an ANN model for Drug classification.

This project aims to analyze the relationship between various medical parameters and drug effectiveness. The dataset consists of patient information,
including age, sex, blood pressure levels (BP), cholesterol levels, sodium-to-potassium ratio (Na_to_K), drug type, and corresponding labels. The goal is to
develop a model that can accurately predict the class or category of a given drug based on its features.

Dataset Link: https://www.kaggle.com/datasets/prathamtripathi/drug-classification

###Task 1: Read the dataset and do data pre-processing

###Task 2: Build the ANN model with (input layer, min 3 hidden layers & output layer)

###Task 3: Test the model with random data


#Name: Shruti Goyal
#Register Number: 20MIC0059
#Mail ID: shruti.goyal2020@vitstudent.ac.in

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [2]:
df = pd.read_csv('/content/drug200.csv')
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [3]:
df.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [4]:
df.Drug.unique()

array(['DrugY', 'drugC', 'drugX', 'drugA', 'drugB'], dtype=object)

In [5]:
df.Sex.unique()

array(['F', 'M'], dtype=object)

In [6]:
df.BP.unique()

array(['HIGH', 'LOW', 'NORMAL'], dtype=object)

In [7]:
df.Cholesterol.unique()

array(['HIGH', 'NORMAL'], dtype=object)

In [8]:
df.Drug.value_counts()

DrugY    91
drugX    54
drugA    23
drugC    16
drugB    16
Name: Drug, dtype: int64

In [9]:
#Spliting the data into dependent and independent variables

x = df.iloc[:,0:5]
x.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K
0,23,F,HIGH,HIGH,25.355
1,47,M,LOW,HIGH,13.093
2,47,M,LOW,HIGH,10.114
3,28,F,NORMAL,HIGH,7.798
4,61,F,LOW,HIGH,18.043


In [10]:
y = pd.get_dummies(df.Drug)
y.head()

Unnamed: 0,DrugY,drugA,drugB,drugC,drugX
0,1,0,0,0,0
1,0,0,0,1,0
2,0,0,0,1,0
3,0,0,0,0,1
4,1,0,0,0,0


In [11]:
#Spliiting the train and test data

xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size = 0.25, random_state = 11)

In [12]:
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((150, 5), (50, 5), (150, 5), (50, 5))

In [14]:
#encodint the categorical data into numerical data using label encoder
#for bp
le = LabelEncoder()
xtrain.BP = le.fit_transform(xtrain.BP)
xtest.BP = le.transform(xtest.BP)
xtrain.BP.head(), xtest.BP.head()

(42     2
 154    1
 123    2
 194    0
 150    0
 Name: BP, dtype: int64,
 174    0
 33     0
 173    1
 186    0
 22     1
 Name: BP, dtype: int64)

In [15]:
#for Cholesterol
len = LabelEncoder()
xtrain.Cholesterol = len.fit_transform(xtrain.Cholesterol)
xtest.Cholesterol = len.transform(xtest.Cholesterol)
xtrain.Cholesterol.head(), xtest.Cholesterol.head()

(42     1
 154    1
 123    0
 194    0
 150    1
 Name: Cholesterol, dtype: int64,
 174    1
 33     1
 173    1
 186    0
 22     1
 Name: Cholesterol, dtype: int64)

In [16]:
#for Sex
leb = LabelEncoder()
xtrain.Sex = leb.fit_transform(xtrain.Sex)
xtest.Sex = leb.transform(xtest.Sex)
xtrain.Sex.head(), xtest.Sex.head()

(42     1
 154    1
 123    0
 194    0
 150    1
 Name: Sex, dtype: int64,
 174    1
 33     0
 173    0
 186    1
 22     1
 Name: Sex, dtype: int64)

In [17]:
xtrain.head(8)

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K
42,50,1,2,1,15.79
154,37,1,1,1,16.724
123,36,0,2,0,16.753
194,46,0,0,0,34.686
150,49,1,0,1,8.7
192,72,1,1,0,16.31
112,35,1,1,1,9.17
156,31,1,0,1,11.227


In [19]:
xtest.head(8)

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K
174,42,1,0,1,12.766
33,65,0,0,1,31.876
173,41,0,1,1,18.739
186,70,1,0,0,9.849
22,47,1,1,1,30.568
53,24,0,0,1,18.457
134,42,0,0,0,21.036
56,65,1,0,1,11.34


In [22]:
ytrain.head(8)

Unnamed: 0,DrugY,drugA,drugB,drugC,drugX
42,1,0,0,0,0
154,1,0,0,0,0
123,1,0,0,0,0
194,1,0,0,0,0
150,0,1,0,0,0
192,1,0,0,0,0
112,0,0,0,0,1
156,0,1,0,0,0


In [23]:
ytest.head(8)

Unnamed: 0,DrugY,drugA,drugB,drugC,drugX
174,0,1,0,0,0
33,1,0,0,0,0
173,1,0,0,0,0
186,0,0,1,0,0
22,1,0,0,0,0
53,1,0,0,0,0
134,1,0,0,0,0
56,0,0,1,0,0


In [26]:
#ANN Model

#initializing the sequential model:

model = Sequential()

#input layer
model.add(Dense(5,activation ='relu'))

#3 hidden layers
model.add(Dense(26,activation ='relu'))
model.add(Dense(18,activation ='relu'))
model.add(Dense(10,activation ='relu'))


#output layer (5 outputs for classification : DrugY	drugA	drugB	drugC	drugX)
model.add(Dense(5,activation ='softmax'))


In [27]:
model.compile(optimizer='adam', loss = 'categorical_crossentropy', metrics =['accuracy'])

In [28]:
model.fit(xtrain, ytrain, batch_size= 5, epochs = 18,validation_data = (xtest,ytest))

Epoch 1/18
Epoch 2/18
Epoch 3/18
Epoch 4/18
Epoch 5/18
Epoch 6/18
Epoch 7/18
Epoch 8/18
Epoch 9/18
Epoch 10/18
Epoch 11/18
Epoch 12/18
Epoch 13/18
Epoch 14/18
Epoch 15/18
Epoch 16/18
Epoch 17/18
Epoch 18/18


<keras.callbacks.History at 0x7f32c2ff0550>

In [29]:
#testing the model with random data
model.predict([[23,1,0,1,48.32]])



array([[9.5245594e-01, 1.1689170e-02, 3.6406794e-03, 5.4649607e-04,
        3.1667668e-02]], dtype=float32)

In [30]:
#testing the model with random data
model.predict([[35,0,2,0,12.32]])



array([[0.4620252 , 0.15716761, 0.08230219, 0.07542893, 0.2230761 ]],
      dtype=float32)