In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

**Task 1: Read the dataset and do data pre-processing**

In [2]:
df = pd.read_csv("/content/drug200.csv")
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [3]:
df.tail()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.02,drugX
199,40,F,LOW,NORMAL,11.349,drugX


In [4]:
df.shape

(200, 6)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


In [6]:
df.isnull().any

<bound method NDFrame._add_numeric_operations.<locals>.any of        Age    Sex     BP  Cholesterol  Na_to_K   Drug
0    False  False  False        False    False  False
1    False  False  False        False    False  False
2    False  False  False        False    False  False
3    False  False  False        False    False  False
4    False  False  False        False    False  False
..     ...    ...    ...          ...      ...    ...
195  False  False  False        False    False  False
196  False  False  False        False    False  False
197  False  False  False        False    False  False
198  False  False  False        False    False  False
199  False  False  False        False    False  False

[200 rows x 6 columns]>

In [7]:
df.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [8]:
df.describe()

Unnamed: 0,Age,Na_to_K
count,200.0,200.0
mean,44.315,16.084485
std,16.544315,7.223956
min,15.0,6.269
25%,31.0,10.4455
50%,45.0,13.9365
75%,58.0,19.38
max,74.0,38.247


**Task 1: Data Preprocessing**

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [10]:
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['BP'] = label_encoder.fit_transform(df['BP'])
df['Cholesterol'] = label_encoder.fit_transform(df['Cholesterol'])
df['Drug'] = label_encoder.fit_transform(df['Drug'])

In [11]:
print('Sex : ',df['Sex'],'\n')
print('BP : \n',df['BP'],'\n')
print('Cholesterol : \n',df['Cholesterol'],'\n')
print('Drug : \n',df['Drug'],'\n')

Sex :  0      0
1      1
2      1
3      0
4      0
      ..
195    0
196    1
197    1
198    1
199    0
Name: Sex, Length: 200, dtype: int64 

BP : 
 0      0
1      1
2      1
3      2
4      1
      ..
195    1
196    1
197    2
198    2
199    1
Name: BP, Length: 200, dtype: int64 

Cholesterol : 
 0      0
1      0
2      0
3      0
4      0
      ..
195    0
196    0
197    0
198    1
199    1
Name: Cholesterol, Length: 200, dtype: int64 

Drug : 
 0      0
1      3
2      3
3      4
4      0
      ..
195    3
196    3
197    4
198    4
199    4
Name: Drug, Length: 200, dtype: int64 



In [12]:
x=df.drop('Drug', axis=1)
y=df['Drug']

In [13]:
df

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,0,0,0,25.355,0
1,47,1,1,0,13.093,3
2,47,1,1,0,10.114,3
3,28,0,2,0,7.798,4
4,61,0,1,0,18.043,0
...,...,...,...,...,...,...
195,56,0,1,0,11.567,3
196,16,1,1,0,12.006,3
197,52,1,2,0,9.894,4
198,23,1,2,1,14.020,4


In [14]:
x.shape,y.shape

((200, 5), (200,))

In [15]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2, random_state=10)

In [16]:
xtrain.shape, xtest.shape

((160, 5), (40, 5))

In [17]:
ytrain.shape, ytest.shape

((160,), (40,))

**Task 2: Building an ANN Model**

In [18]:
#pip install keras
#ip install tensorflow

In [19]:
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [20]:
model = keras.Sequential()
model.add(Dense(64, activation="relu", input_shape =(xtrain.shape[1],)))
model.add(layers.Dense(32, activation="relu"))
model.add(layers.Dense(16, activation="relu"))
model.add(layers.Dense(8, activation="relu"))
model.add(layers.Dense(5, activation="softmax"))

In [21]:
x=df.iloc[:,0:5]
y=df.iloc[:,5:]
x, y

(     Age  Sex  BP  Cholesterol  Na_to_K
 0     23    0   0            0   25.355
 1     47    1   1            0   13.093
 2     47    1   1            0   10.114
 3     28    0   2            0    7.798
 4     61    0   1            0   18.043
 ..   ...  ...  ..          ...      ...
 195   56    0   1            0   11.567
 196   16    1   1            0   12.006
 197   52    1   2            0    9.894
 198   23    1   2            1   14.020
 199   40    0   1            1   11.349
 
 [200 rows x 5 columns],
      Drug
 0       0
 1       3
 2       3
 3       4
 4       0
 ..    ...
 195     3
 196     3
 197     4
 198     4
 199     4
 
 [200 rows x 1 columns])

In [22]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [23]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                384       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 16)                528       
                                                                 
 dense_3 (Dense)             (None, 8)                 136       
                                                                 
 dense_4 (Dense)             (None, 5)                 45        
                                                                 
Total params: 3,173
Trainable params: 3,173
Non-trainable params: 0
_________________________________________________________________


In [24]:
#Train the model

In [25]:
ytrain_encoded = label_encoder.fit_transform(ytrain)
ytest_encoded = label_encoder.transform(ytest)

In [26]:
model.fit(xtrain,ytrain,epochs=20, batch_size=16, validation_data=(xtest,ytest))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fa51108a4a0>

In [27]:
model

<keras.engine.sequential.Sequential at 0x7fa514155690>

In [28]:
ypred = model.predict(xtest)
ypred



array([[8.89063835e-01, 2.36197822e-02, 6.88375731e-04, 3.88187245e-02,
        4.78093810e-02],
       [3.66582990e-01, 1.04136460e-01, 2.85621043e-02, 1.02139585e-01,
        3.98578823e-01],
       [4.14020061e-01, 1.71278939e-01, 7.08532427e-03, 7.60913566e-02,
        3.31524312e-01],
       [6.14862800e-01, 4.27593701e-02, 4.39838925e-03, 6.74772933e-02,
        2.70502150e-01],
       [8.52034330e-01, 2.43673362e-02, 1.25224091e-04, 2.56112535e-02,
        9.78617445e-02],
       [9.94101107e-01, 4.97012108e-04, 9.45039574e-06, 2.98709376e-03,
        2.40527606e-03],
       [2.23135829e-01, 1.66830063e-01, 3.31607386e-02, 8.19960758e-02,
        4.94877249e-01],
       [7.18844593e-01, 4.85854372e-02, 3.66329867e-03, 7.19396621e-02,
        1.56967029e-01],
       [9.96804178e-01, 2.29176949e-04, 1.02249169e-05, 1.53721101e-03,
        1.41918589e-03],
       [1.36978224e-01, 1.63420767e-01, 1.94977328e-01, 1.21692270e-01,
        3.82931501e-01],
       [9.99916375e-01, 2.5160

In [29]:
loss, accuracy = model.evaluate(xtest, ytest)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

Test Loss: 1.1678
Test Accuracy: 0.5000


**Task 3: Test the model with random data**

In [31]:
import numpy as np
random_sample = np.array([[40, 0, 1, 1, 15]])
predictions = model.predict(random_sample)
predicted_class = label_encoder.inverse_transform(np.argmax(predictions, axis=1))
print("Predicted Drug Class:", predicted_class)

Predicted Drug Class: [0]


In [32]:
ypred = model.predict([[40, 0, 1, 1, 15]])
ypred



array([[0.46453828, 0.11864559, 0.00900595, 0.08394153, 0.32386857]],
      dtype=float32)

In [33]:
ypred= np.argmax(ypred)

In [34]:
ypred

0

In [35]:
comp = pd.DataFrame(ytest)
comp.columns = ['Actual Value']
comp['Predicted values'] = ypred
comp

Unnamed: 0,Actual Value,Predicted values
59,0,0
5,4,0
20,0,0
198,4,0
52,0,0
19,0,0
162,4,0
55,3,0
69,0,0
2,3,0
