# Decision Tree with Mock Data

In [1]:
# import dependencies
import pandas as pd
from path import Path
from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [9]:
# load data
data = 'mock_data.csv'
mock_df = pd.read_csv(data)

# set unique County_FIPS_Code value as index
mock_df.set_index("County_FIPS_Code", inplace=True)

mock_df.head()

Unnamed: 0_level_0,Hesitancy_Level,SVI_Level,CVAC_Concern_Level,Mask_Requirement_Status
County_FIPS_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2,5,3,1
1,1,1,3,1
2,2,2,1,0
3,2,4,5,0
4,1,3,3,0


In [10]:
# define features set
X = mock_df.copy()
X = X.drop("Mask_Requirement_Status", axis=1)
X.head()

Unnamed: 0_level_0,Hesitancy_Level,SVI_Level,CVAC_Concern_Level
County_FIPS_Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2,5,3
1,1,1,3
2,2,2,1
3,2,4,5
4,1,3,3


In [11]:
# define target vector
y = mock_df["Mask_Requirement_Status"].values
y[:5]

array([1, 1, 0, 0, 0], dtype=int64)

In [13]:
# split data into train & test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [14]:
# default: 75-25 split for train/test sets 
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(750, 3)
(250, 3)
(750,)
(250,)


In [25]:
# manual set: 80/20 split with train_size parameter
X_train2, X_test2, y_train2, y_test2 = train_test_split(X, y, random_state=78, train_size=0.80)

In [26]:
# show new shape of sets for new split size parameter
print(X_train2.shape)
print(X_test2.shape)
print(y_train2.shape)
print(y_test2.shape)

(800, 3)
(200, 3)
(800,)
(200,)


In [27]:
# create StandardScaler instance
scaler = StandardScaler()

In [28]:
# fit StandardScaler
X_scaler = scaler.fit(X_train)

In [29]:
# scale data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [30]:
# create decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [31]:
# fit/train the model
model = model.fit(X_train_scaled, y_train)

In [34]:
# predict using testing data
predictions = model.predict(X_test_scaled)
predictions

array([1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 1, 0, 0], dtype=int64)

In [50]:
# Model Evaluation:

# score the model on accuracy
acc_score = accuracy_score(y_test, predictions)

# calculate confusion matrix
cm = confusion_matrix(y_test, predictions)
cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"],
    columns=["Predicted 0", "Predicted 1"]
)

# display results
print("\n*****Mock Decision Tree Model Evaluation*****\n")
print(f"Accuracy Score : {acc_score}")
print("---------------------------------------")
print("Confusion Matrix")
display(cm_df)
print("---------------------------------------")
print("Classification Report\n")
print(classification_report(y_test, predictions))



*****Mock Decision Tree Model Evaluation*****

Accuracy Score : 0.472
---------------------------------------
Confusion Matrix


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,74,43
Actual 1,89,44


---------------------------------------
Classification Report

              precision    recall  f1-score   support

           0       0.45      0.63      0.53       117
           1       0.51      0.33      0.40       133

    accuracy                           0.47       250
   macro avg       0.48      0.48      0.46       250
weighted avg       0.48      0.47      0.46       250

