# Drug Classification Model Based on Decision Trees

In [None]:
# Importing necessary libraries for data exploration
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
# Getting the file location
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

## Data Exploration

In [None]:
# reading the file
src = '/kaggle/input/drug-classification/drug200.csv'
data_drug = pd.read_csv(src)

In [None]:
#Exploring data
data_drug.head()

In [None]:
data_drug.dtypes

In [None]:
data_drug.info()

## Data Preparation

In [None]:
# Separating Features and Target
X = data_drug.drop('Drug', axis=1)
y = data_drug['Drug']

In [None]:
# Separating categorical columns & numerical columns in features set
cat_columns = ['Sex', 'BP', 'Cholesterol']
num_columns = ['Age', 'Na_to_K']

In [None]:
# Importing encoder and scaler from scikit learn library
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# Encoding categorical values
encoder = OrdinalEncoder()
X_cat_encode = encoder.fit_transform(X[cat_columns])
X_cat_encode = pd.DataFrame(X_cat_encode, columns = cat_columns)
X_encoded = pd.concat((X[num_columns], X_cat_encode), axis=1)
X_encoded.head(10)

In [None]:
#Splitting the data with 80:20 ratio
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size = 0.2, random_state = 42)

In [None]:
#Scaling the features data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Model Building

In [None]:
# Importing DecisionTreeClassifer from scikit learn library
from sklearn.tree import DecisionTreeClassifier

In [None]:
#Decision Tree Classifier model
model = DecisionTreeClassifier(max_depth=5)
model.fit(X_train, y_train)

## Prediction & Evaluation

In [None]:
# predicting the values for X_test
y_pred = model.predict(X_test)

In [None]:
#Importing metrics for evaluation
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score

In [None]:
# Confusion Matrix
confusion_matrix(y_test, y_pred)

In [None]:
# Classification report
print(classification_report(y_test, y_pred))