# Pickle 

In [9]:
# Source (following):  https://www.datacamp.com/tutorial/decision-tree-classification-python

# Load libraries
import pandas as pd
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
from sklearn.model_selection import train_test_split # Import train_test_split function
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation

# library for save and load scikit-learn models
import pickle

# Telecust1 dataset
# https://www.kaggle.com/datasets/prathamtripathi/customersegmentation
col_names = columns = ['region', 'tenure', 'age', 'income', 'marital', 'address', 'ed', 'employ', 'retire', 'gender', 'reside', 'custcat']
# load dataset
telecust = pd.read_csv("Telecust1.csv",  skiprows=[0], names=col_names)

#split dataset in features and target variable
feature_cols = ['region','tenure', 'age', 'income', 'marital', 'ed', 'employ', 'retire', 'gender', 'reside']
X = telecust[feature_cols] # Features
y = telecust.custcat # Target variable

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1) # 70% training and 30% test

# Create Decision Tree classifer object
clf = DecisionTreeClassifier(max_depth = 3, min_samples_leaf = 1, min_samples_split = 2)

# Train Decision Tree Classifer
clf = clf.fit(X_train,y_train)

# file name, I'm using *.pickle as a file extension
filename = "decision_tree.pickle"

# save model
pickle.dump(clf, open(filename, "wb"))

#Predict the response for test dataset
#y_pred = clf.predict(X_test)


In [10]:
# load model
loaded_model = pickle.load(open(filename, "rb"))

# you can use loaded model to compute predictions
y_pred = loaded_model.predict(X_test)

In [11]:
print(y_pred)

['A' 'A' 'A' 'C' 'C' 'A' 'C' 'C' 'A' 'A' 'D' 'D' 'A' 'D' 'A' 'D' 'A' 'C'
 'D' 'A' 'C' 'A' 'A' 'D' 'C' 'A' 'A' 'A' 'A' 'A' 'C' 'C' 'A' 'C' 'C' 'A'
 'C' 'D' 'B' 'C' 'A' 'C' 'D' 'C' 'A' 'A' 'C' 'A' 'C' 'A' 'B' 'C' 'C' 'D'
 'C' 'A' 'C' 'A' 'C' 'A' 'A' 'A' 'D' 'C' 'D' 'B' 'A' 'A' 'D' 'A' 'D' 'B'
 'C' 'A' 'A' 'A' 'A' 'C' 'C' 'C' 'C' 'C' 'C' 'D' 'C' 'A' 'A' 'A' 'A' 'A'
 'A' 'D' 'A' 'A' 'C' 'B' 'C' 'C' 'C' 'B' 'A' 'D' 'A' 'A' 'D' 'C' 'C' 'A'
 'D' 'C' 'C' 'A' 'A' 'D' 'A' 'A' 'A' 'A' 'C' 'A' 'C' 'B' 'A' 'A' 'B' 'C'
 'B' 'D' 'A' 'D' 'A' 'A' 'D' 'C' 'B' 'D' 'A' 'A' 'C' 'C' 'D' 'C' 'D' 'D'
 'D' 'C' 'C' 'B' 'C' 'B' 'D' 'C' 'D' 'C' 'A' 'A' 'D' 'A' 'D' 'A' 'A' 'A'
 'C' 'C' 'D' 'D' 'A' 'A' 'B' 'A' 'C' 'A' 'A' 'A' 'A' 'D' 'A' 'A' 'A' 'A'
 'A' 'A' 'B' 'C' 'A' 'A' 'A' 'C' 'C' 'A' 'A' 'A' 'D' 'C' 'C' 'C' 'A' 'C'
 'D' 'A' 'D' 'A' 'A' 'B' 'C' 'D' 'D' 'B' 'A' 'C' 'C' 'A' 'A' 'C' 'B' 'A'
 'C' 'B' 'D' 'C' 'C' 'C' 'A' 'D' 'A' 'A' 'A' 'C' 'B' 'D' 'C' 'D' 'A' 'C'
 'A' 'C' 'A' 'C' 'A' 'C' 'C' 'A' 'A' 'A' 'C' 'C' 'C

In [12]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.41333333333333333
