# Drug Classification


In [1]:
# Import the essential libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = pd.read_csv('/content/drive/MyDrive/Drug_Dataset.csv')
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [2]:
# Checking for null values
df.isna().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [3]:
# Checking for unique categories
df['Drug'].unique()

array(['DrugY', 'drugC', 'drugX', 'drugA', 'drugB'], dtype=object)

In [4]:
df['Sex'].unique()

array(['F', 'M'], dtype=object)

In [5]:
df['BP'].unique()

array(['HIGH', 'LOW', 'NORMAL'], dtype=object)

In [6]:
df['Cholesterol'].unique()

array(['HIGH', 'NORMAL'], dtype=object)

In [7]:
# Checking the shape of dataset
df.shape

(200, 6)

In [8]:
# Splitting the dataset into dependent and independent 
x = df.iloc[:, :5].values
x

array([[23, 'F', 'HIGH', 'HIGH', 25.355],
       [47, 'M', 'LOW', 'HIGH', 13.093],
       [47, 'M', 'LOW', 'HIGH', 10.114],
       [28, 'F', 'NORMAL', 'HIGH', 7.798],
       [61, 'F', 'LOW', 'HIGH', 18.043],
       [22, 'F', 'NORMAL', 'HIGH', 8.607],
       [49, 'F', 'NORMAL', 'HIGH', 16.275],
       [41, 'M', 'LOW', 'HIGH', 11.037],
       [60, 'M', 'NORMAL', 'HIGH', 15.171],
       [43, 'M', 'LOW', 'NORMAL', 19.368],
       [47, 'F', 'LOW', 'HIGH', 11.767],
       [34, 'F', 'HIGH', 'NORMAL', 19.199],
       [43, 'M', 'LOW', 'HIGH', 15.376],
       [74, 'F', 'LOW', 'HIGH', 20.942],
       [50, 'F', 'NORMAL', 'HIGH', 12.703],
       [16, 'F', 'HIGH', 'NORMAL', 15.516],
       [69, 'M', 'LOW', 'NORMAL', 11.455],
       [43, 'M', 'HIGH', 'HIGH', 13.972],
       [23, 'M', 'LOW', 'HIGH', 7.298],
       [32, 'F', 'HIGH', 'NORMAL', 25.974],
       [57, 'M', 'LOW', 'NORMAL', 19.128],
       [63, 'M', 'NORMAL', 'HIGH', 25.917],
       [47, 'M', 'LOW', 'NORMAL', 30.568],
       [48, 'F', 'LOW',

In [9]:
object_cols = df.select_dtypes(include=['object']).columns
df_encoded = pd.get_dummies(df, columns=object_cols)

x = df_encoded
x = df_encoded.values
x

array([[23.   , 25.355,  1.   , ...,  0.   ,  0.   ,  0.   ],
       [47.   , 13.093,  0.   , ...,  0.   ,  1.   ,  0.   ],
       [47.   , 10.114,  0.   , ...,  0.   ,  1.   ,  0.   ],
       ...,
       [52.   ,  9.894,  0.   , ...,  0.   ,  0.   ,  1.   ],
       [23.   , 14.02 ,  0.   , ...,  0.   ,  0.   ,  1.   ],
       [40.   , 11.349,  1.   , ...,  0.   ,  0.   ,  1.   ]])

In [10]:
y = pd.get_dummies(df.iloc[:, 5:]).values
y

array([[1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 1,

In [11]:
from sklearn.model_selection import train_test_split


# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=21)

In [12]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((160, 14), (40, 14), (160, 5), (40, 5))

In [13]:
from keras.models import Sequential
from keras.layers import Dense

model = Sequential()
model.add(Dense(14, activation='relu')) # Input layer

model.add(Dense(64, activation='relu')) # Hidden layer 1
model.add(Dense(32, activation='relu'))  # Hidden layer 2
model.add(Dense(16, activation='relu'))  # Hidden layer 3


model.add(Dense(5, activation='softmax'))  # Output layer

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [14]:
y_train

array([[0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [0, 0, 0, 0, 1],
       [1, 0, 0, 0, 0],
       [1, 0, 0,

In [15]:
model.fit(x_train, y_train, epochs=20, batch_size=4, validation_data=(x_test, y_test))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f310c141bd0>

In [16]:
model.predict(x_test)



array([[6.99081495e-02, 4.69126523e-01, 5.85123152e-02, 4.80077602e-02,
        3.54445249e-01],
       [9.87041175e-01, 1.14948442e-02, 1.00746518e-03, 2.19529647e-05,
        4.34485759e-04],
       [4.66489047e-01, 4.21116233e-01, 2.43884027e-02, 5.29231550e-03,
        8.27139989e-02],
       [9.40604031e-01, 5.47341220e-02, 2.79345410e-03, 5.77371575e-05,
        1.81076513e-03],
       [8.38208780e-06, 3.38763185e-02, 3.77342745e-04, 2.51034461e-02,
        9.40634489e-01],
       [8.66815209e-01, 1.24665417e-01, 2.90401676e-03, 1.33875859e-04,
        5.48151601e-03],
       [9.98675227e-01, 1.23588648e-03, 7.88156831e-05, 1.88967206e-07,
        9.92087644e-06],
       [9.74478364e-01, 2.27895007e-02, 2.06223968e-03, 3.42728745e-05,
        6.35667529e-04],
       [5.89508621e-04, 1.19149171e-01, 9.34416836e-04, 2.83525065e-02,
        8.50974381e-01],
       [9.65645671e-01, 3.01763341e-02, 2.48155883e-03, 9.86113082e-05,
        1.59773184e-03],
       [9.98253286e-01, 1.6368

In [17]:
classes = ['DrugY', 'drugC', 'drugX', 'drugA', 'drugB']

y_pred = model.predict(x_test)


# Convert y_test to a 1D array
y_test = np.array([np.argmax(val) for val in y_test])
y_test

y_pred = np.array([np.argmax(val) for val in y_pred])
y_pred

y_test.shape, y_pred.shape



((40,), (40,))

In [18]:
y_pred

array([1, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 0, 4, 0, 0, 1, 0, 2, 0,
       0, 4, 1, 0, 0, 4, 4, 0, 0, 0, 1, 0, 4, 0, 2, 0, 0, 2])

In [19]:
y_test

array([1, 0, 1, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 4, 0, 4, 2, 0, 1, 0, 2, 0,
       0, 4, 1, 0, 0, 4, 3, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 2])

In [20]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100}")

Accuracy: 87.5
