## Task :  Build an ANN model for Drug classification

Import libraires


In [1]:
# importing all the neccessary libraires
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Load Dataset

In [3]:
# reading the csv file
drugs_df = pd.read_csv('drug200.csv')
drugs_df

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY
...,...,...,...,...,...,...
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.020,drugX


Display Head

In [None]:
# displaying the first 5 records
drugs_df.head()

Display Tail

In [None]:
# displaying the last 5 records
drugs_df.tail()

Size of dataset

In [None]:
# size of the dataset
drugs_df.shape

Checking Data Types of Dataset

In [None]:
# data types of the dataset
drugs_df.dtypes

In [None]:
# unique values in a dataset
drugs_df["Drug"].sort_values(ascending=True).unique()
# five types of drugs

In [None]:
# replacing the DrugY by drugY
drugs_df["Drug"].replace({'DrugY':'drugY'},inplace=True)

In [None]:
# get the five point summary
drugs_df.describe()

Checking missing Values

In [None]:
# checking for missing values
drugs_df.isnull().sum()

In [None]:
# Checking for duplicates
drugs_df.duplicated()

In [None]:
# columns in the dataset
drugs_df.columns

In [None]:
# value count
drugs_df["Drug"].value_counts()

Checking Frequency

In [None]:
# frequency of the drugs
sns.countplot(x=drugs_df["Drug"])

Visualization

In [None]:
# matplotlib
plt.figure(figsize=(10,6))

plt.subplot(2,2,1)
plt.scatter(y=drugs_df["Drug"],x=drugs_df["Age"])

plt.subplot(2,2,2)
plt.scatter(y=drugs_df["Drug"],x=drugs_df["Cholesterol"])

plt.subplot(2,2,3)
plt.scatter(y=drugs_df["Drug"],x=drugs_df["BP"])

plt.subplot(2,2,4)
plt.scatter(y=drugs_df["Drug"],x=drugs_df["BP"])

In [None]:
drug=drugs_df["Drug"]=="drugY"
drugs_df.loc[drug]

Scatter Plot

In [None]:
plt.scatter(x=drugs_df["Drug"],y=drugs_df["Cholesterol"])

Box Plot

In [None]:
sns.boxplot(drugs_df["Na_to_K"])

In [None]:
# print the categories present in the columns
print(drugs_df["Sex"].value_counts())
print(drugs_df["BP"].value_counts())
print(drugs_df["Cholesterol"].value_counts())

In [None]:
plt.scatter(y=drugs_df["Age"],x=drugs_df["Na_to_K"])

In [None]:
drugs_df["Na_to_K"].sort_values(ascending=True).values


In [None]:
outliers=drugs_df["Na_to_K"]>=31
drugs_df.loc[outliers]


In [None]:
drugs_df.drop([131,96, 98, 184,188,194,24,128,65,23], inplace=True)


In [None]:
sns.boxplot(drugs_df["Na_to_K"])

In [None]:
data= drugs_df["Na_to_K"]>=30 & (drugs_df["Age"]==20)
drugs_df.loc[data]

In [None]:
print(drugs_df["Na_to_K"].min())
print(drugs_df["Na_to_K"].mean())
print(drugs_df["Na_to_K"].max())
print(drugs_df["Age"].min())
print(drugs_df["Age"].max())

In [None]:
print(plt.scatter(x=drugs_df["BP"],y=drugs_df["Na_to_K"]))


In [None]:
drugs_df["Sex"].value_counts()

In [None]:
sns.countplot(drugs_df["Sex"])

In [None]:
# encoding the categorical vairables sex and drugs as one hot encoding and others as label encoder
from sklearn.preprocessing import OneHotEncoder ,LabelEncoder


In [None]:
drugs_df = pd.get_dummies(drugs_df, columns=["Sex","Drug"])

In [None]:
drugs_df

In [None]:
drugs_df["BP"]=drugs_df["BP"].replace({'LOW':1,'NORMAL':2,'HIGH':3})
drugs_df["Cholesterol"].replace({'LOW':1,'NORMAL':2,'HIGH':3},inplace=True)


In [None]:
drugs_df

In [None]:
# Splitting the data into dependent and independent variavble
X = drugs_df.iloc[:,:-5]
X

In [None]:
y = drugs_df.iloc[:,6:]
y

In [None]:
X.shape,y.shape

Splitting the data into training and test set

In [None]:
# Splitting the data into training and test set
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

Scaling the Data

In [None]:
# Scaling the data
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.fit_transform(X_test)

In [None]:
X_train

In [None]:
X_test

Build ANN Model

In [None]:
# Buidling the ANN model
from tensorflow.keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.losses import categorical_crossentropy


In [None]:
model = Sequential()

In [None]:
X.shape

In [None]:
model.add(Dense(units=6,activation='relu',kernel_initializer='random_uniform')) # input layer
model.add(Dense(units=20,activation='relu',kernel_initializer='random_uniform')) # hidden layer 1
model.add(Dense(units=20,activation='relu',kernel_initializer='random_uniform')) # hidden layer 2
model.add(Dense(units=20,activation='relu',kernel_initializer='random_uniform')) # hidden layer 3
model.add(Dense(units=5,activation='softmax',kernel_initializer='random_uniform')) # Output layer
model.compile(optimizer='adam',loss="categorical_crossentropy", metrics=['accuracy'])

In [None]:
X_train.shape,y_train.shape

In [None]:
model.fit(X_train,y_train,batch_size=32,epochs=200)

In [None]:
ypred=model.predict(X_test)
ypred=np.argmax(ypred, axis=1)


In [None]:
y_test=np.array(y_test)
y_test

In [None]:
y_test

In [None]:
ypred

In [None]:
df ={
    'y_pred' :ypred,
    'y_test' :indices_of_ones
}
pd.DataFrame(df)

In [None]:
indices_of_ones = []

for i in range(len(y_test)):
    for j in range(len(y_test[i])):
        if y_test[i][j] == 1:
            indices_of_ones.append(( j))

print(indices_of_ones)

In [None]:
drugs_df.head(1)

In [None]:
X.shape

In [None]:
X.iloc[0]

In [None]:
ypred[0].argmax()

In [None]:
# y_test.iloc[0:1]

In [None]:
ypred.shape

In [None]:
X_train.shape,y_train.shape

In [None]:
X_test.shape,y_test.shape

In [None]:
from sklearn.metrics import accuracy_score
cm= accuracy_score(indices_of_ones,ypred)
cm