In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier

In [2]:
df=pd.read_csv("drug200.csv",delimiter=",")
df

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY
...,...,...,...,...,...,...
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.020,drugX


In [3]:
df.shape

(200, 6)

In [4]:
df.columns

Index(['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K', 'Drug'], dtype='object')

In [5]:
X=df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']].values
y=df['Drug']

In [6]:
X[0:5]

array([[23, 'F', 'HIGH', 'HIGH', 25.355],
       [47, 'M', 'LOW', 'HIGH', 13.093],
       [47, 'M', 'LOW', 'HIGH', 10.114],
       [28, 'F', 'NORMAL', 'HIGH', 7.798],
       [61, 'F', 'LOW', 'HIGH', 18.043]], dtype=object)

In [7]:
y[0:5]

0    drugY
1    drugC
2    drugC
3    drugX
4    drugY
Name: Drug, dtype: object

In [8]:
from sklearn import preprocessing

In [9]:

le_sex = preprocessing.LabelEncoder()
le_sex.fit(['F','M'])
X[:, 1] = le_sex.transform(X[:, 1])


le_BP = preprocessing.LabelEncoder()
le_BP.fit([ 'LOW', 'NORMAL', 'HIGH'])
X[:,2] = le_BP.transform(X[:,2])

le_Chol = preprocessing.LabelEncoder()
le_Chol.fit([ 'NORMAL', 'HIGH'])
X[:,3] = le_Chol.transform(X[:,3]) 

X[0:5]

array([[23, 0, 0, 0, 25.355],
       [47, 1, 1, 0, 13.093],
       [47, 1, 1, 0, 10.114],
       [28, 0, 2, 0, 7.798],
       [61, 0, 1, 0, 18.043]], dtype=object)

In [10]:
df.head()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,drugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,drugY


In [11]:
df.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [12]:
unique_values = np.unique(X[:, 2])
print(unique_values)

[0 1 2]


In [13]:
X[0:5]

array([[23, 0, 0, 0, 25.355],
       [47, 1, 1, 0, 13.093],
       [47, 1, 1, 0, 10.114],
       [28, 0, 2, 0, 7.798],
       [61, 0, 1, 0, 18.043]], dtype=object)

In [14]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=3)

In [15]:
drugTree = DecisionTreeClassifier(criterion="entropy", max_depth = 4)
drugTree

In [16]:
drugTree.fit(X_train,y_train)


In [17]:
predtree=drugTree.predict(X_test)

In [18]:
print("y test : ",y_test[:5])
print("\n y predict : ",predtree[:5])

y test :  40     drugY
51     drugX
139    drugX
197    drugX
170    drugX
Name: Drug, dtype: object

 y predict :  ['drugY' 'drugX' 'drugX' 'drugX' 'drugX']


In [19]:
from sklearn import metrics
print("DecisionTrees's Accuracy: ", metrics.accuracy_score(y_test, predtree))

DecisionTrees's Accuracy:  1.0


In [20]:
import graphviz

In [21]:
from  io import StringIO
import pydotplus
import matplotlib.image as mpimg
from sklearn import tree
%matplotlib inline from  io import StringIO
import pydotplus
import matplotlib.image as mpimg
from sklearn import tree
%matplotlib inline 

UsageError: unrecognized arguments: from io import StringIO


In [None]:
import graphviz
from sklearn.tree import export_graphviz
import pydotplus
from IPython.display import Image
filename = "drugtree.png"

# نام ویژگی‌ها
featureNames = df.columns[0:5]

# تولید کد DOT برای درخت تصمیم
dot_data = export_graphviz(drugTree, 
                           feature_names=featureNames,  # نام ویژگی‌ها
                           class_names=np.unique(y_train).astype(str),  # کلاس‌ها به صورت رشته
                           filled=True,  
                           special_characters=True, 
                           rotate=False)  

# تبدیل کد DOT به گراف
graph = pydotplus.graph_from_dot_data(dot_data)  

# ذخیره گراف به فایل PNG
graph.write_png(filename)

# نمایش تصویر در Jupyter Notebook
Image(filename=filename)

In [None]:
from sklearn.tree import DecisionTreeClassifier, plot_tree

In [None]:
plt.figure(figsize=(12, 8))
plot_tree(drugTree, filled=True, feature_names=['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K'], class_names=drugTree.classes_, rounded=True)
plt.show()