In [None]:
# Basic Libraries
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt # we only need pyplot
sb.set() # set the default Seaborn style for graphics

# Import Decision Tree Classifier model from Scikit-Learn
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.tree import export_graphviz
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

import json
import graphviz

In [None]:
animalData = pd.read_csv('train.csv')
animalData.head()

In [None]:
petIDs = np.array(animalData["PetID"])
magnitude = []
score = []

# loop through IDs, do some math with the magnitudes and score and save that into a Numpy array, then add it to the animals Database
for id in petIDs:
  try: 
    with open("./train_sentiment/" + id + '.json') as json_file:
      itemData = json.load(json_file)
      magnitude.append(itemData["documentSentiment"]["magnitude"])
      score.append(itemData["documentSentiment"]["score"])
  except:
    magnitude.append(0.0)
    score.append(0.0)
    pass

jsonData = {"Magnitude" : magnitude, "Score" : score}
animalData = animalData.join(pd.DataFrame(data = jsonData))

In [None]:
predictors = animalData.drop(["Name", "State", "RescuerID", "Description", "PetID", "AdoptionSpeed", "VideoAmt", "PhotoAmt"], axis = 1)
response = pd.DataFrame(animalData["AdoptionSpeed"])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(predictors, response, test_size = 0.25)

svcModel = LinearSVC()

svcModel.fit(X_train, y_train)

y_train_pred = svcModel.predict(X_train)
y_test_pred = svcModel.predict(X_test)

In [None]:
# Check the Goodness of Fit (on Train Data)
print("Goodness of Fit of Model \tTrain Dataset")
print("Classification Accuracy \t:", svcModel.score(X_train, y_train))
print()

# Check the Goodness of Fit (on Test Data)
print("Goodness of Fit of Model \tTest Dataset")
print("Classification Accuracy \t:", svcModel.score(X_test, y_test))
print()

In [None]:
matrix = confusion_matrix(y_train, y_train_pred)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})

In [None]:
matrix = confusion_matrix(y_test, y_test_pred)
sb.heatmap(matrix, annot = True, fmt=".0f", annot_kws={"size": 18})