In [None]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

# Import Decision Tree Classifier model from Scikit-Learn
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.tree import export_graphviz
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from tensorflow import keras

import json
import graphviz

In [None]:
animalData = pd.read_csv('train.csv')
animalData.head()

In [None]:
petIDs = np.array(animalData["PetID"])
magnitude = []
score = []

# loop through IDs, do some math with the magnitudes and score and save that into a Numpy array, then add it to the animals Database
for id in petIDs:
  try: 
    with open("./train_sentiment/" + id + '.json') as json_file:
      itemData = json.load(json_file)
      magnitude.append(itemData["documentSentiment"]["magnitude"])
      score.append(itemData["documentSentiment"]["score"])
  except:
    magnitude.append(0.0)
    score.append(0.0)
    pass

jsonData = {"Magnitude" : magnitude, "Score" : score}
animalData = animalData.join(pd.DataFrame(data = jsonData))

In [None]:
predictors = animalData.drop(["Name", "State", "RescuerID", "Description", "PetID", "AdoptionSpeed", "VideoAmt", "PhotoAmt", "Breed1", "Breed2", "Color1", "Color2", "Color3"], axis = 1)
response = pd.DataFrame(animalData["AdoptionSpeed"])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(predictors, response, test_size = 0.25)

svcModel = LinearSVC()
svcModel.fit(X_train, y_train)

y_train_pred = svcModel.predict(X_train)
y_test_pred = svcModel.predict(X_test)

In [None]:
# Check the Goodness of Fit (on Train Data)
print("Goodness of Fit of Model \tTrain Dataset")
print("Classification Accuracy \t:", svcModel.score(X_train, y_train))
print()

# Check the Goodness of Fit (on Test Data)
print("Goodness of Fit of Model \tTest Dataset")
print("Classification Accuracy \t:", svcModel.score(X_test, y_test))
print()

In [None]:
matrix = confusion_matrix(y_train, y_train_pred)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})

In [None]:
matrix = confusion_matrix(y_test, y_test_pred)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})

In [None]:
model = keras.Sequential([
    keras.layers.Dense(13, input_shape = (13,)),
    keras.layers.Dense(8, activation = "elu"),
    keras.layers.Dense(8, activation = "selu"),
    keras.layers.Dense(5, activation = "softmax")
])

model.compile(optimizer = "adam", loss = "sparse_categorical_crossentropy", metrics = ["accuracy"])

In [None]:
model.fit(X_train, y_train, epochs = 15)

test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test acc: ", test_acc)

In [None]:
predictions = model.predict(X_train)
results = []

for prediction in predictions:
    results.append(np.argmax(prediction))

results = pd.DataFrame(np.array(results))

In [None]:
matrix = confusion_matrix(y_train, results)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})

In [None]:
predictions = model.predict(X_test)
results = []

for prediction in predictions:
    results.append(np.argmax(prediction))

results = pd.DataFrame(np.array(results))

In [None]:
matrix = confusion_matrix(y_test, results)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})

In [None]:
# Decision Tree using Train Data
dectree = DecisionTreeClassifier(max_depth = 3)  # create the decision tree object
dectree.fit(X_train, y_train)                    # train the decision tree model
#max depth increases the number of variables to be considered, making the classification accuracy better

# Predict Response corresponding to Predictors
y_train_pred = dectree.predict(X_train)
y_test_pred = dectree.predict(X_test)

# Check the Goodness of Fit (on Train Data)
print("Goodness of Fit of Model \tTrain Dataset")
print("Classification Accuracy \t:", dectree.score(X_train, y_train))
print()

# Check the Goodness of Fit (on Test Data)
print("Goodness of Fit of Model \tTest Dataset")
print("Classification Accuracy \t:", dectree.score(X_test, y_test))
print()

# Plot the Confusion Matrix for Train and Test
f, axes = plt.subplots(1, 2, figsize=(12, 4))
sb.heatmap(confusion_matrix(y_train, y_train_pred),
           annot = True, fmt=".0f", annot_kws={"size": 18}, ax = axes[0])
sb.heatmap(confusion_matrix(y_test, y_test_pred), 
           annot = True, fmt=".0f", annot_kws={"size": 18}, ax = axes[1])

# Plot the Decision Tree
treedot = export_graphviz(dectree,                                      # the model
                          feature_names = X_train.columns,              # the features 
                          out_file = None,                              # output file
                          filled = True,                                # node colors
                          rounded = True,                               # make pretty
                          special_characters = True)                    # postscript

graphviz.Source(treedot)

# Random Forest

In [None]:
# Create the Random Forest object
rforest = RandomForestClassifier(n_estimators = 100,  # n_estimators denote number of trees
                                 max_depth = 4)       # set the maximum depth of each tree

# Fit Random Forest on Train Data
rforest.fit(X_train, y_train.values.ravel())

In [None]:
# Predict Legendary values corresponding to Total
y_train_pred = rforest.predict(X_train)
y_test_pred = rforest.predict(X_test)

# Check the Goodness of Fit (on Train Data)
print("Goodness of Fit of Model \tTrain Dataset")
print("Classification Accuracy \t:", rforest.score(X_train, y_train))
print()

# Check the Goodness of Fit (on Test Data)
print("Goodness of Fit of Model \tTest Dataset")
print("Classification Accuracy \t:", rforest.score(X_test, y_test))
print()

# Plot the Confusion Matrix for Train and Test
f, axes = plt.subplots(1, 2, figsize=(12, 4))
sb.heatmap(confusion_matrix(y_train, y_train_pred),
           annot = True, fmt=".0f", annot_kws={"size": 18}, ax = axes[0])
sb.heatmap(confusion_matrix(y_test, y_test_pred), 
           annot = True, fmt=".0f", annot_kws={"size": 18}, ax = axes[1])

# Esther