In [None]:
#Libs
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv("../input/drug-classification/drug200.csv")
data.head()

In [None]:
data.duplicated().any()

> No duplicate data

In [None]:
data.isnull().any()

> No Null Values

In [None]:
data.describe()

In [None]:
data.info()

***Inference***

> Here we find that we have 2 col **Num** and 4 col **Cate** (obj)

 Numerical Cols

1. Age
2. Sodium to potassium Ration in Blood (Na_to_K)

Categorical Cols

1. Sex
2. Blood Pressure Levels (Bp)
3. Cholesterol Levels (Cholesterol)
4. Drug Type (Drug)

# **Data Visualization**

In [None]:
plt.figure(figsize=(12,7))
plt.title("No. of Gender of the patients", fontsize=20)
ax = sns.countplot(data=data, x="Sex", palette="hls")
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,7))
ax = sns.countplot(data=data, x="BP", palette="hls")
plt.title("No. of Blood Pressure Levels", fontsize=20)
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,7))
ax = sns.countplot(data=data, x="Cholesterol", palette="hls")
plt.title("No. of Cholesterol Levels", fontsize=20)
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,7))
ax = sns.countplot(data=data, x="Drug", palette="hls")
plt.title("No. of Drug Type", fontsize=20)
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,6))
plt.title("Distribution of Sodium to potassium Ration in Blood", fontsize=20)
sns.distplot(data['Na_to_K'])
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.title("Distribution of Ages", fontsize=20)
sns.distplot(data['Age'])
plt.show()

In [None]:
plt.figure(figsize=(12,6))
plt.title("Distribution of Gender Ages", fontsize=20)
sns.boxplot(data=data, x='Age', y='Sex', palette="hls")
plt.show()

In [None]:
plt.figure(figsize=(12,7))
plt.title("No. of Gender of the patients per. Drug type", fontsize=20)
ax = sns.countplot(data=data, x="Drug", hue='Sex', palette="hls")
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,7))
plt.title("No. of Gender of the patients per. Blood Pressure Levels", fontsize=20)
ax = sns.countplot(data=data, x="BP", hue='Sex', palette="hls")
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
plt.figure(figsize=(12,7))
plt.title("No. of Gender of the patients per. Cholesterol Levels", fontsize=20)
ax = sns.countplot(data=data, x="Cholesterol", hue='Sex', palette="hls")
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

# **Data Processing**

In [None]:
data = pd.get_dummies(data, columns=["Sex", "BP", "Cholesterol"])
data.Drug = data.Drug.replace(["DrugY", "drugC", "drugX", "drugA", "drugB"], [0, 1, 2, 3,4])
data.head()

In [None]:
corr = data.corr()
plt.figure(figsize=(12,7))
matrix = np.triu(corr)
sns.heatmap(corr, annot= True, fmt=".1g", cmap= 'jet', linewidths=1, linecolor='black', mask=matrix)

In [None]:
data = data.drop(["Sex_F", "Cholesterol_NORMAL", "BP_NORMAL"], axis=1)

# **Splitting Data**

In [None]:
features = data.drop(["Drug"],axis=1)
target = data.Drug
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)

> **Data was splitted to 20% Test Set and 80% Train Set**

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

# **Training Data**


In [None]:
#Models Libs

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

In [None]:
#Result Dict
res_dfTrain = {}
res_dfTest = {}

# **KNN Model With GridSearch**

In [None]:
GS = {
    "n_neighbors": np.arange(1,20),
    'weights': ['distance', 'uniform'],
    'p':np.arange(1,5),
    "algorithm": ['ball_tree', 'kd_tree', 'auto']
}

knn = KNeighborsClassifier()
knn_GS = GridSearchCV(knn, GS, cv=5)
knn_GS.fit(X_train, y_train)

res_dfTrain["KNN Train Score"] = knn_GS.best_score_
res_dfTest["KNN Test Score"] = knn_GS.score(X_test, y_test)

print("HP: " + str(knn_GS.best_params_))
print("Train Score: " + str(knn_GS.best_score_))
print("Test Score: " + str(knn_GS.score(X_test, y_test)))

# **RF Model With GridSearch**

In [None]:
GS = {
    "criterion": ["gini", "entropy"],
    "n_estimators": np.arange(0,50)
}

rf = RandomForestClassifier()
rf_GS = GridSearchCV(rf, GS, cv=5)
rf_GS.fit(X_train, y_train)

res_dfTrain["RF Train Score"] = rf_GS.best_score_
res_dfTest["RF Test Score"] = rf_GS.score(X_test, y_test)

print("HP: " + str(rf_GS.best_params_))
print("Train Score: " + str(rf_GS.best_score_))
print("Test Score: " + str(rf_GS.score(X_test, y_test)))

# **SVM Model With GridSearch**

In [None]:
GS = {
    "C": [.001, .01, 0.1, 1, 10],
    "kernel": ["linear", "poly", "rbf", "sigmoid"],
    "degree": [1, 3, 5],
    "gamma": ["scale", "auto"]
}

svm = SVC()
svm_GS = GridSearchCV(svm, GS, cv=5)
svm_GS.fit(X_train, y_train)

res_dfTrain["SVM Train Score"] = svm_GS.best_score_
res_dfTest["SVM Test Score"] = svm_GS.score(X_test, y_test)

print("HP: " + str(svm_GS.best_params_))
print("Train Score: " + str(svm_GS.best_score_))
print("Test Score: " + str(svm_GS.score(X_test, y_test)))

# **Models Visualization**

In [None]:
df_res_train = pd.DataFrame.from_dict(res_dfTrain, orient="index", columns=["Score"])
df_res_train.sort_values(by=["Score"], ascending=False)

In [None]:
plt.figure(figsize=(12,7))
plt.title("Train Score Result", fontsize=20)
ax = sns.barplot(x = df_res_train.index, y = df_res_train.Score)
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

In [None]:
df_res_test = pd.DataFrame.from_dict(res_dfTest, orient="index", columns=["Score"])
df_res_test.sort_values(by=["Score"], ascending=False)

In [None]:
plt.figure(figsize=(12,7))
plt.title("Test Score Result", fontsize=20)
ax = sns.barplot(x = df_res_test.index, y = df_res_test.Score)
for bar in ax.patches:
    bar_value = bar.get_height()
    text = f'{bar_value:,}'
    text_x = bar.get_x() + bar.get_width() / 2
    text_y = bar.get_y() + bar_value
    bar_color = bar.get_facecolor()
    ax.text(text_x, text_y, text, ha='center', va='bottom', color=bar_color,size=12)

**The Best Models for This problem are Random Forest (RF) & Support Vector Machine (SVM)**