# Version 2.0 RecModel Decision Tree Classification  

Import the Libs needed for the script.

In [15]:
import pandas as pd
import numpy as np
import requests
import re
import seaborn
import matplotlib.pyplot as plt
import tensorflow as tf
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# Read Clean Dataset
Read the clean data after running the Cleaning_Data.py script to fill missing data and scalerize the features. This makes it easy to work with and make data discrete.

In [4]:
dataset = pd.read_csv('Cleaned_Dataset.csv')
print(dataset.shape)
dataset.head()

(223, 10)


Unnamed: 0,0,5,1,4,0.1,2,5.1,3,2.1,4.1
0,0,3,1,4,0,0,0,3,2,4
1,1,2,0,0,3,0,2,0,0,9
2,1,3,1,0,4,0,2,0,0,2
3,1,2,1,0,2,0,2,0,0,0
4,1,4,1,0,4,0,2,0,0,2


# Seperate Data Features vs. Targets
The seperate X_data represent the input values. 'PLACE' and 'target' represent the Credit card target. What should the model predict given X_data. Also this snipet of code converts Pandas => numpy/matrix data type to feed into model with the '.as_matrix()' function.

In [5]:
from sklearn.preprocessing import normalize
FEATURES = dataset.columns[0:8]
X_data = dataset[FEATURES].as_matrix()
X_data = normalize(X_data)
print(X_data.shape)
print(X_data)
PLACE = dataset.columns[9]
target = dataset[PLACE].as_matrix()
print(target.shape)


(223, 8)
[[0.         0.50709255 0.16903085 ... 0.         0.         0.50709255]
 [0.23570226 0.47140452 0.         ... 0.         0.47140452 0.        ]
 [0.1796053  0.53881591 0.1796053  ... 0.         0.3592106  0.        ]
 ...
 [0.2        0.6        0.2        ... 0.         0.4        0.        ]
 [0.21320072 0.85280287 0.21320072 ... 0.         0.         0.        ]
 [0.21821789 0.65465367 0.21821789 ... 0.         0.         0.        ]]
(223,)


  This is separate from the ipykernel package so we can avoid doing imports until
  


This snipet converts the single vector Y and turn it into a classification matrix with each class being the columns and the Rows being each Person/user.

In [6]:
from sklearn.preprocessing import normalize
num_classes = 14
num_of_samples = X_data.shape[0]
label = np.zeros((num_of_samples,num_classes),dtype='int64')
print(label)

[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [7]:
for i in range(0,223):
    if target[i]== 0:
        label[i,0]= 1
    elif target[i] == 1:
        label[i,1] = 1
    elif target[i] == 2:
        label[i,2] = 1
    elif target[i] == 3:
        label[i,3] = 1
    elif target[i] == 4:
        label[i,4] = 1
    elif target[i] == 5:
        label[i,5] = 1
    elif target[i] == 6:
        label[i,6] = 1
    elif target[i] == 7:
        label[i,7] = 1
    elif target[i] == 8:
        label[i,8] = 1
    elif target[i] == 9:
        label[i,9] = 1
    elif target[i] == 10:
        label[i,10] = 1
    elif target[i] == 11:
        label[i,11] = 1
    elif target[i] == 12:
        label[i,12] = 1
    else:
        label[i,13] = 1
Y = label
print(Y.shape)

(223, 14)


In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, Y, test_size=0.3, random_state=1)
print(y_test[20])

[0 1 0 0 0 0 0 0 0 0 0 0 0 0]


# The Model.
Use multiple decision trees to help make an accurate prediction for the type of credit cards that the customer should get. The first function is the code for a single decision tree and helps set the foundation for multiple decision trees. The second function is the application of multiple decision trees at once to create a RANDOM FORREST of trees. 

In [17]:
def dtc_predict_actual(X_train, y_train, X_test, y_test):

  dtc = DecisionTreeClassifier()
  dtc.fit(X_train, y_train)
  predictions = dtc.predict(X_test)

  return predictions, y_test

In [10]:
#Random Forest 
def rf_predict_actual(X_train, y_train, n_estimators):
  rfc = RandomForestClassifier(n_estimators=n_estimators)
  predicted = cross_val_predict(rfc, X_train, y_train, cv=10)

  return predicted, y_train

Test the accuracy 

In [11]:
def calculate_accuracy(predicted, actual):
    total = len(predicted)
    count = 0
    for i in range (0, len(predicted)):
        indicator = True
        flag = True
        for k in range(0,13):
            if predicted[i,k] != actual [i,k]:
                flag = False
            if flag == False:
                indicator = False
            flag = True 
        if indicator != False:
            count+=1
        indicator = True
    return count / total *100

# Test The Trees 

In [19]:
number_estimators = 1000 
predicted, actual = dtc_predict_actual(X_train, y_train, X_test, y_test)
print(calculate_accuracy(predicted, actual))
predicted1, actual1 = rf_predict_actual(X_train, y_train, number_estimators)
print(calculate_accuracy(predicted, actual))

80.59701492537313
80.59701492537313


In [21]:
test_subject = 3
test_value = predicted [test_subject-1:test_subject, :]
result = test_value
print(result)
num = np.argmax(actual)
real = np.argmax(actual[test_subject-1:test_subject, :])
print(actual[test_subject-1:test_subject, :])


[[0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[0 0 0 1 0 0 0 0 0 0 0 0 0 0]]


In [32]:
print("Model Prediction")
if num ==0:
    print("Predicted: American Express Cobalt")
elif num ==1:
    print("Predicted: American Express Essential")
elif num ==2:
    print("Predicted: American Express Marriot Bonvoy")
elif num ==3:
    print("Predicted: American Express Simply Cash")
elif num ==4:
    print("Predicted: American Express Simply Cash Preferred")
elif num ==5:
    print("Predicted: Meridian Visa Infinite")
elif num ==6:
    print("Predicted: PC Financial Mastercard")
elif num ==7:
    print("Predicted: Scotiabank Momentum No-Fee Visa")
elif num ==8:
    print("Predicted: Scotiabank Momentum Visa Infinite")
elif num ==9:
    print("Predicted: Scotiabank SCENE Visa")
elif num ==10:
    print("Predicted: True Line Gold Mastercard")
elif num ==11:
    print("Predicted: True Line Mastercard")
elif num ==12:
    print("Predicted: American Express Blue Sky")
elif num ==13:
    print("Predicted: Not Enough")

print()
    
print("Ground Truth: ")
num = real
if num ==0:
    print("Predicted: American Express Cobalt")
elif num ==1:
    print("Predicted: American Express Essential")
elif num ==2:
    print("Predicted: American Express Marriot Bonvoy")
elif num ==3:
    print("Predicted: American Express Simply Cash")
elif num ==4:
    print("Predicted: American Express Simply Cash Preferred")
elif num ==5:
    print("Predicted: Meridian Visa Infinite")
elif num ==6:
    print("Predicted: PC Financial Mastercard")
elif num ==7:
    print("Predicted: Scotiabank Momentum No-Fee Visa")
elif num ==8:
    print("Predicted: Scotiabank Momentum Visa Infinite")
elif num ==9:
    print("Predicted: Scotiabank SCENE Visa")
elif num ==10:
    print("Predicted: True Line Gold Mastercard")
elif num ==11:
    print("Predicted: True Line Mastercard")
elif num ==12:
    print("Predicted: American Express Blue Sky")
elif num ==13:
    print("Predicted: Not Enough")

Model Prediction
Predicted: American Express Cobalt

Ground Truth: 
Predicted: American Express Cobalt
