In [None]:

import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder,StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
from sklearn import naive_bayes

# Loading the Dataset

In [None]:

data = pd.read_csv('/kaggle/input/drug-classification/drug200.csv')
data.head()

In [None]:
data.shape

In [None]:
for i in ['Sex', 'BP', 'Cholesterol', 'Drug']:
    print(data[i].unique())

In [None]:
data.info()

# Data Preprocessing

In [None]:


X, y = data.drop('Drug', axis=1), data['Drug']
print(X.shape, y.shape)

In [None]:


ct = ColumnTransformer([('cat', OneHotEncoder(), ['Sex', 'BP', 'Cholesterol'])], remainder='passthrough')
X = ct.fit_transform(X)

In [None]:


for i in [6,4,1]:
    X = np.delete(X, i, 1)

In [None]:
X[:2]

In [None]:
y = LabelEncoder().fit_transform(y)
y[:10]

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print(y_train.shape, y_test.shape)

# Applying ML Algorithm

In [None]:

clf = DecisionTreeClassifier(max_depth=5).fit(X_train, y_train)
print('Training Score: {:.2f}'.format(clf.score(X_train, y_train)))
print('Testing Score: {:.2f}'.format(clf.score(X_test, y_test)))

In [None]:
scaler = StandardScaler()
scaler.fit(X_train)
x_test_scaled = scaler.transform(X_test)
y_pred_n  = clf.predict(x_test_scaled)
nb = naive_bayes.GaussianNB()
nb.fit(X_train, y_train)
nb_acc = 100*nb.score(X_test, y_test)
print('Naive Bayes Predictions: \n', nb.predict(X_test), '\n Accuracy:', nb_acc, '%')

print(classification_report(y_test,y_pred_n))

data = pd.get_dummies(data,drop_first=True)
matrix_corr = data.corr().index
plt.figure(figsize=(15,15))
sns.heatmap(data[matrix_corr].corr(),annot=True,cmap='YlGn_r')

# Model Evaluation

In [None]:


print(classification_report(y_test,clf.predict(X_test)))