# Decision Tree Classification

In [7]:
import pandas as pd

In [8]:
data=pd.read_csv("decision_tree_dataset.csv")

In [9]:
data

Unnamed: 0,company,job,degree,salary_more_then_50000
0,Reliance,sales executive,bachelors,0
1,Reliance,sales executive,masters,0
2,Reliance,business manager,bachelors,1
3,Reliance,business manager,masters,1
4,Reliance,computer programmer,bachelors,0
5,Reliance,computer programmer,masters,1
6,Torrent,sales executive,masters,0
7,Torrent,computer programmer,bachelors,0
8,Torrent,business manager,bachelors,0
9,Torrent,business manager,masters,1


In [10]:
target = data['salary_more_then_50000']

In [11]:
inputs = data.drop('salary_more_then_50000',axis='columns')

In [12]:
inputs.head()

Unnamed: 0,company,job,degree
0,Reliance,sales executive,bachelors
1,Reliance,sales executive,masters
2,Reliance,business manager,bachelors
3,Reliance,business manager,masters
4,Reliance,computer programmer,bachelors


In [13]:
from sklearn.preprocessing import LabelEncoder
le_company = LabelEncoder()
le_job = LabelEncoder()
le_degree = LabelEncoder()


In [14]:
inputs['company_n'] = le_company.fit_transform(inputs['company'])
inputs['job_n'] = le_job.fit_transform(inputs['job'])
inputs['degree_n'] = le_degree.fit_transform(inputs['degree'])

# Decode

inputs['degree_n'] = le_degree.inverse_transform(inputs['degree_n'])

In [15]:
inputs

Unnamed: 0,company,job,degree,company_n,job_n,degree_n
0,Reliance,sales executive,bachelors,0,2,0
1,Reliance,sales executive,masters,0,2,1
2,Reliance,business manager,bachelors,0,0,0
3,Reliance,business manager,masters,0,0,1
4,Reliance,computer programmer,bachelors,0,1,0
5,Reliance,computer programmer,masters,0,1,1
6,Torrent,sales executive,masters,2,2,1
7,Torrent,computer programmer,bachelors,2,1,0
8,Torrent,business manager,bachelors,2,0,0
9,Torrent,business manager,masters,2,0,1


In [16]:
inputs_n = inputs.drop(['company','job','degree'],axis='columns')

In [17]:
inputs_n

Unnamed: 0,company_n,job_n,degree_n
0,0,2,0
1,0,2,1
2,0,0,0
3,0,0,1
4,0,1,0
5,0,1,1
6,2,2,1
7,2,1,0
8,2,0,0
9,2,0,1


In [18]:
target

0     0
1     0
2     1
3     1
4     0
5     1
6     0
7     0
8     0
9     1
10    1
11    1
12    1
13    1
14    1
15    1
Name: salary_more_then_50000, dtype: int64

In [19]:
from sklearn import tree
model = tree.DecisionTreeClassifier()

In [20]:
model.fit(inputs_n, target)

DecisionTreeClassifier()

In [21]:
model.score(inputs_n,target)

1.0

# Prediction

In [22]:
# 0-No Salary is not greater than 50000

In [23]:
model.predict([[0,2,0]])



array([0])

In [24]:
# 1-Yes Salary is not greater than 50000

In [25]:
model.predict([[0,1,2]])



array([1])

In [26]:
prediction_data=model.predict(inputs_n)

# Confusion Matrix

In [27]:
from sklearn import metrics 

In [28]:
# confusion matrix,accuracy,classification_report in sklearn
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report 

In [29]:
# confusion matrix
matrix = confusion_matrix(target,prediction_data, labels=[1,0])
print('Confusion matrix : \n',matrix)

Confusion matrix : 
 [[10  0]
 [ 0  6]]


In [30]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(target,prediction_data)
cm

array([[ 6,  0],
       [ 0, 10]])

# Accuracy

In [31]:
accuracy = metrics.accuracy_score(target, prediction_data)

In [32]:
accuracy

1.0

In [33]:
accuracy_percentage = 100 * accuracy
accuracy_percentage

100.0

# Classification Report

In [34]:
report=classification_report(target, prediction_data) 

In [35]:
report

'              precision    recall  f1-score   support\n\n           0       1.00      1.00      1.00         6\n           1       1.00      1.00      1.00        10\n\n    accuracy                           1.00        16\n   macro avg       1.00      1.00      1.00        16\nweighted avg       1.00      1.00      1.00        16\n'

In [36]:
import pickle
# Saving model
pickle.dump(model, open('model.pkl','wb'))
model=pickle.load(open('model.pkl','rb'))
#print(y_predict)

In [40]:
from BentoML_model import Classifier
classifier_service = Classifier()\

classifier_service.pack('model' , model)

save_path = classifier_service.save()

save_path






[2022-01-16 08:49:39,055] INFO - BentoService bundle 'Classifier:20220116084935_FD6184' saved to: /home/keval2415/bentoml/repository/Classifier/20220116084935_FD6184
/home/keval2415/bentoml/repository/Classifier/20220116084935_FD6184


In [41]:
model.predict([[2,0,1]])



array([1])

In [42]:
from bentoml import load

save_path = "/home/keval2415/bentoml/repository/Classifier/20220116084935_FD6184"
model = load(save_path)

model.predict([[1,2,0]])





'Salary more then 50k'