# Machine Learning

### Will a dog get adopted?
* Predict outcome of a dog

In [None]:
%matplotlib inline

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, Normalizer, MinMaxScaler
from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import Pipeline

from sklearn.svm import LinearSVC
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import BaggingClassifier

from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score


import warnings
warnings.filterwarnings('ignore')

In [None]:
unique_df = pd.read_csv("../data/unique_austin_shelter.csv")
unique_df.set_index("animal_id", inplace=True)

In [None]:
features = ['breed', 'intake_condition', 'intake_type', 'sex', 'fixed', 'time_in_shelter', 'age_in']
features1 = ['sex', 'fixed', 'time_in_shelter', 'age_in']

In [None]:
new_unique_df = unique_df[unique_df.in_shelter == "No"][['breed', 'intake_condition', 'intake_type', 'sex', 'fixed', 'time_in_shelter', 'age_in','outcome_type']]

In [None]:
new_unique_df.dropna(inplace=True)

**Hyperparameter Tuning**
1. Features
2. Scaling
3. K 

## Defining Training Data

In [None]:
x_train_dict = new_unique_df[features1][:5000].to_dict(orient="records")
y_train = new_unique_df['outcome_type'][:5000]

In [None]:
vec = DictVectorizer(sparse=False)
vec.fit(x_train_dict)
x_train = vec.transform(x_train_dict)

scaler = MinMaxScaler()
scaler.fit(x_train)
x_train_sc = scaler.transform(x_train)

In [None]:
vec = DictVectorizer(sparse=False)
scaler = StandardScaler()

pipeline = Pipeline([
    ("vectorizer", vec), 
    ("scaler", scaler), 
    ("model", model)
])

## Model: KNeighborsClassifier

In [None]:
model = KNeighborsClassifier()
n_neighbors = np.array([1,2,3,4,5,6,7,8,9,10])
scalers = [Normalizer(), StandardScaler(), MinMaxScaler()]

In [None]:
pipeline.get_params().keys()

In [None]:
grid = GridSearchCV(pipeline,param_grid=dict(scaler=scalers),cv=5)
grid.fit(x_train_dict, y_train)

In [None]:
print(grid.best_score_)
print(grid.best_estimator_.n_neighbors)

In [None]:
pd.DataFrame(grid.cv_results_)

## Model: RandomForestClassifier

In [None]:
#model = KNeighborsClassifier(n_neighbors=10)
model = RandomForestClassifier()
#model.fit(x_train_sc, y_train)

In [None]:
y_train_pred = model.predict(x_train_sc)

**TRAINING**

In [None]:
print("Training Accuracy")
print(accuracy_score(y_train, y_train_pred))

In [None]:
print("Training Precision")
print(precision_score(y_train, y_train_pred,pos_label="Adoption", average=None).mean())

In [None]:
print("Training Recall")
print(recall_score(y_train, y_train_pred,pos_label="Adoption", average=None).mean())

In [None]:
true_positives = ((y_train_pred == "Transfer") & (y_train == "Transfer")).sum()

precision = true_positives / (y_train_pred == "Transfer").sum()
recall = true_positives / (y_train == "Transfer").sum()
    
precision, recall

In [None]:
pipeline = Pipeline([
    ("vectorizer", vec), 
    ("scaler", scaler), 
    ("model", model)
])

print(cross_val_score(pipeline, x_train_dict, y_train, cv=10, scoring="accuracy").mean())

In [None]:
new_dog = pd.DataFrame()
new_dog['breed'] = ['German Shepherd Mix']
new_dog['intake_condition'] = ['Normal']
new_dog['intake_type'] = ['Owner Surrender']
new_dog['sex'] = ['Male']
new_dog['fixed'] = ['No']
new_dog['time_in_shelter'] = ['3 days 07:44:00.000000000']
new_dog['age_in'] = [4.0]

In [None]:
new_dog = new_dog.to_dict(orient="records")
new_dog = vec.transform(new_dog)
new_dog_sc = scaler.transform(new_dog)

preds = model.predict(new_dog_sc)


In [None]:
preds

In [None]:
new_unique_df[:10]