# About this notebook 

#### Objective: Petfinder Machine Learning


<div class="span5 alert alert-success">
<p> <I> Petfinder Machine Learning: </I> The objective for this report is to share machine learning results to build a model to predict the speed at which a pet is adopted.  The classification areas are...   
    <br>
0 - Pet was adopted on the same day it was listed  
1 - Pet was adopted between 1 and 7 days (1st week) after being listed  
2 - Pet was adopted between 8 and 30 days (1st month) after being listed  
3 - Pet was adopted between 31 and 90 days (2nd & 3rd month) after being listed  
4 – No adoption after 100 days of being listed. (There are no pets in this dataset that waited between 90 and 100 days   
 </p>
</div>

<div class="span5 alert alert-success">
<p> <I> Data fields: </I> For a list of the features available to predict the adoption rate visit the source data at: <br>   https://www.kaggle.com/c/petfinder-adoption-prediction/data 
 </p>
</div>

<div class="span5 alert alert-info">
<p> <B>  Imports and Data Loading: </B>  </p>
</div>

In [81]:
import warnings
warnings.filterwarnings('ignore')

%cd C:\Users\Ken\Documents\KenP\Applications-DataScience\SpringboardCourseWork\CapstoneProject2Repository\09 PetfindersData\TrainingData

C:\Users\Ken\Documents\KenP\Applications-DataScience\SpringboardCourseWork\CapstoneProject2Repository\09 PetfindersData\TrainingData


In [82]:
#Imports
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier 

from sklearn.naive_bayes import GaussianNB

from sklearn.ensemble import RandomForestClassifier

from sklearn import svm

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [83]:
#Import the csv file
dfi = pd.read_csv('train.csv')

<div class="span5 alert alert-info">
<p> <B>  Machine Learning: </B> 

</p>
</div>

<div class="span5 alert alert-success">
<p> Prepare to run the algorithms
 </p>
</div>

In [84]:
#Drop the columns that are not needed
dfm = dfi.drop(['Name','RescuerID','Description','PetID'],axis=1)
dfm.head(1)

Unnamed: 0,Type,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,VideoAmt,PhotoAmt,AdoptionSpeed
0,1,2,0,26,2,2,0,0,2,1,1,1,2,1,1,0,41326,0,3,3


In [85]:
#Create the array
array = dfm.values
X = array[:,0:19]
Y = array[:,19]

In [86]:
#Create a training and test data set
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size,
random_state=seed)

<div class="span5 alert alert-success">
<p> Algorithm 1: Logistic Regression
 </p>
</div>

In [87]:
#Algorithm 1: Fit the model
model = LogisticRegression(multi_class='multinomial', solver='newton-cg')

model.fit(X_train,Y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='newton-cg',
          tol=0.0001, verbose=0, warm_start=False)

In [88]:
#Algorithm 1: Predict the labels of the test set
Y_pred = model.predict(X_test)

In [89]:
#Algorithm 1: Score the model
print('CONFUSION MATRIX FOR LOGISTIC REGRESSION')
print(confusion_matrix(Y_test, Y_pred))

print()

print('CLASSIFICATION REPORT')
print(classification_report(Y_test, Y_pred))

print()

print('ACCURACY SCORE')
print(accuracy_score(Y_test,Y_pred))

CONFUSION MATRIX FOR LOGISTIC REGRESSION
[[  0  41  30   9  53]
 [  0 236 340  71 377]
 [  0 225 428 148 559]
 [  0 122 287 173 475]
 [  0 158 247  85 884]]

CLASSIFICATION REPORT
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       133
          1       0.30      0.23      0.26      1024
          2       0.32      0.31      0.32      1360
          3       0.36      0.16      0.22      1057
          4       0.38      0.64      0.48      1374

avg / total       0.33      0.35      0.32      4948


ACCURACY SCORE
0.3478172999191593


<div class="span5 alert alert-success">
<p> Algorithm 2: K Nearest Neighbors (KNN)
 </p>
</div>

In [90]:
#Algorithm 2: Fit the model
model = KNeighborsClassifier(n_neighbors=3)

model.fit(X_train,Y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=3, p=2,
           weights='uniform')

In [91]:
#Algorithm 2: Predict the labels of the test set
Y_pred = model.predict(X_test)

In [92]:
#Algorithm 2: Score the model
print('CONFUSION MATRIX FOR KNN')
print(confusion_matrix(Y_test, Y_pred))

print()

print('CLASSIFICATION REPORT')
print(classification_report(Y_test, Y_pred))

print()

print('ACCURACY SCORE')
print(accuracy_score(Y_test,Y_pred))

CONFUSION MATRIX FOR KNN
[[ 17  53  30  10  23]
 [ 62 438 262 125 137]
 [ 88 478 406 205 183]
 [ 54 324 268 231 180]
 [ 54 367 320 168 465]]

CLASSIFICATION REPORT
             precision    recall  f1-score   support

          0       0.06      0.13      0.08       133
          1       0.26      0.43      0.33      1024
          2       0.32      0.30      0.31      1360
          3       0.31      0.22      0.26      1057
          4       0.47      0.34      0.39      1374

avg / total       0.34      0.31      0.32      4948


ACCURACY SCORE
0.31467259498787387


<div class="span5 alert alert-success">
<p> Algorithm 3: Naive Bayes
 </p>
</div>

In [93]:
#Algorithm 3: Fit the model
model = GaussianNB()

model.fit(X_train,Y_train)

GaussianNB(priors=None)

In [94]:
#Algorithm 3: Predict the labels of the test set
Y_pred = model.predict(X_test)

In [95]:
#Algorithm 3: Score the model
print('CONFUSION MATRIX FOR NAIVE-BAYES')
print(confusion_matrix(Y_test, Y_pred))

print()

print('CLASSIFICATION REPORT')
print(classification_report(Y_test, Y_pred))

print()

print('ACCURACY SCORE')
print(accuracy_score(Y_test,Y_pred))

CONFUSION MATRIX FOR NAIVE-BAYES
[[  5  80  12  15  21]
 [ 21 575 222  86 120]
 [ 24 625 340 138 233]
 [ 23 402 234 174 224]
 [ 27 452 239  90 566]]

CLASSIFICATION REPORT
             precision    recall  f1-score   support

          0       0.05      0.04      0.04       133
          1       0.27      0.56      0.36      1024
          2       0.32      0.25      0.28      1360
          3       0.35      0.16      0.22      1057
          4       0.49      0.41      0.45      1374

avg / total       0.36      0.34      0.33      4948


ACCURACY SCORE
0.3354890864995958


<div class="span5 alert alert-success">
<p> Algorithm 4: Random Forest Classifier
 </p>
</div>

In [96]:
#Algorithm 4: Fit the model
model = RandomForestClassifier()

model.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [97]:
#Algorithm 4: Predict the labels of the test set
Y_pred = model.predict(X_test)

In [98]:
#Algorithm 4: Score the model
print('CONFUSION MATRIX FOR RANDOM FOREST CLASSIFIER')
print(confusion_matrix(Y_test, Y_pred))

print()

print('CLASSIFICATION REPORT')
print(classification_report(Y_test, Y_pred))

print()

print('ACCURACY SCORE')
print(accuracy_score(Y_test,Y_pred))

CONFUSION MATRIX FOR RANDOM FOREST CLASSIFIER
[[  2  47  39  24  21]
 [ 22 351 327 147 177]
 [ 13 336 498 245 268]
 [  6 217 326 260 248]
 [  7 207 296 201 663]]

CLASSIFICATION REPORT
             precision    recall  f1-score   support

          0       0.04      0.02      0.02       133
          1       0.30      0.34      0.32      1024
          2       0.34      0.37      0.35      1360
          3       0.30      0.25      0.27      1057
          4       0.48      0.48      0.48      1374

avg / total       0.35      0.36      0.35      4948


ACCURACY SCORE
0.35852869846402585


<div class="span5 alert alert-success">
<p> Algorithm 5: SVM (SVM)
 </p>
</div>

In [None]:
#Algorithm 5: Fit the model
model = svm.SVC(kernel='linear')

model.fit(X_train,Y_train)

In [None]:
#Algorithm 2: Predict the labels of the test set
Y_pred = model.predict(X_test)

In [None]:
#Algorithm 2: Score the model
print('CONFUSION MATRIX FOR KNN')
print(confusion_matrix(Y_test, Y_pred))

print()

print('CLASSIFICATION REPORT')
print(classification_report(Y_test, Y_pred))

print()

print('ACCURACY SCORE')
print(accuracy_score(Y_test,Y_pred))