In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib. pyplot as plt
from sklearn import tree

In [None]:
# Load dataset

df = pd.read_csv('datasets/drug.csv')
print(df)

In [None]:
# Display dataset information

df.info()

In [None]:
# List patients older than 50 years

df[df['Age'] > 50]

In [None]:
# SHow top 5 rows with highest Na_to_K values

df.nlargest(5, 'Na_to_K')

In [None]:
# Count occurrences of each drug type

df['Drug'].value_counts()

In [None]:
# Find maximum and minimum age

min = df['Age'].min()

print("Maximum Age:", max)
print("Minimum Age:", min)

In [None]:
# Find max, min, avg of Na_to_K for patients taking 'DrugX'

max = df[df['Drug'] == 'DrugX']['Na_to_K'].max()
min = df[df['Drug'] == 'DrugX']['Na_to_K'].min()
avg = df[df['Drug'] == 'DrugX']['Na_to_K'].mean()

print("For DrugX - Maximum Na_to_K:", max)
print("For DrugX - Minimum Na_to_K:", min)
print("For DrugX - Average Na_to_K:", avg)

In [None]:
# Compare age distribution for different drugs

df.boxplot(column='Age', by='Drug')
plt.show()

In [None]:
#Plot Age vs Na_to_K

plt.scatter(df['Age'], df['Na_to_K'], c='blue')
plt.title('Age vs Na_to_K')
plt.xlabel('Age')
plt.ylabel('Na_to_K')
plt.grid(linestyle='--', alpha=0.7)
plt.show()


In [None]:
# Train Decision Tree Classifier

x = df[['Age', 'Na_to_K']]
y = df['Drug']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print("Accuracy:", accuracy_score(y_test, y_pred))



In [None]:
# Enhanced Decision Tree Classifier with Categorical Features

df['Sex'] = LabelEncoder().fit_transform(df['Sex'])
df['BP'] = LabelEncoder().fit_transform(df['BP'])
df['Cholesterol'] = LabelEncoder().fit_transform(df['Cholesterol'])

x = df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']]
y = df['Drug']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

model = DecisionTreeClassifier(criterion='entropy', max_depth=4)
model.fit(x_train, y_train)

y_pred = model.predict(x_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

plt.figure(figsize=(10,8))
tree.plot_tree(model, filled=True, feature_names=x.columns, class_names=model.classes_)
plt.title('Decision Tree')
plt.show()

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(10,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='YlGnBu')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()