In [1]:
# Import dependencies

import pandas as pd
import numpy as np
import pickle
from sqlalchemy import create_engine
from config import my_password
from sklearn.preprocessing import StandardScaler
import petpy as pt
import geopy as gp
import gmaps
from config import petfinder_api_key
from config import petfinder_secret
from config import gkey
from geopy.geocoders import Nominatim

In [2]:
# Prompt the user for dog input
age = str(input("What age dog are you interested in adopting (baby, young, adult, senior)?"))
gender = str(input("What gender dog are you intersted in adopting (male, female)?"))
location = str(input("What state are you located in (e.g. WI)?"))

What age dog are you interested in adopting (baby, young, adult, senior)?baby
What gender dog are you intersted in adopting (male, female)?male
What state are you located in (e.g. WI)?wi


In [3]:
# Initiate petpy
pf = pt.Petfinder(key=petfinder_api_key, secret=petfinder_secret)

In [4]:
# Make API call to obtain DataFrame of adoptable dogs in Wisconsin
adoptable_dogs = pf.animals(animal_type='dog', status='adoptable', age=age, gender=gender, location=location, results_per_page=100, pages=300, return_df=True)

pages parameter exceeded maximum number of available pages available from the Petfinder API. As a result, the maximum number of pages 1 was returned


In [5]:
# Rename columns
adoptable_dogs = adoptable_dogs.rename(columns={"breeds.primary":"breeds_primary", "breeds.secondary":"breeds_secondary", "breeds.mixed":"breeds_mixed", "breeds.unknown":"breeds_unknown", "colors.primary":"colors_primary", "colors.secondary":"colors_secondary", "colors.tertiary":"colors_tertiary", "attributes.spayed_neutered":"spayed_neutered", "attributes.house_trained":"house_trained", "attributes.declawed":"declawed", "attributes.special_needs":"special_needs", "attributes.shots_current":"shots_current", "environment.children":"environment_children", "environment.dogs":"environment_dogs", "environment.cats":"environment_cats",
                                                  "primary_photo_cropped.small":"photo_small", "primary_photo_cropped.medium":"photo_medium", "primary_photo_cropped.large":"photo_large", "primary_photo_cropped.full":"photo_full", "contact.email":"email", "contact.phone":"phone", "contact.address.address1": "address_1", "contact.address.address2":"address_2", "contact.address.city":"city", "contact.address.state":"state", "contact.address.postcode":"postcode", "contact.address.country":"country"})

In [6]:
testing_dogs=adoptable_dogs[['id', 'age', 'gender', 'size', 'breeds_primary', 'breeds_mixed', 'breeds_unknown', 'spayed_neutered', 'house_trained', 
                            'special_needs', 'shots_current', 'city', 'state']]

In [7]:
# Create location column with City, State
testing_dogs['location'] = testing_dogs['city'] + ', ' + testing_dogs['state']
testing_dogs = testing_dogs.drop(['city', 'state'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [8]:
# Read dog adoptions DataFrame from PostgreSQL
db_string = f"postgresql://postgres:{my_password}@127.0.0.1:5432/PetFindingDB"
engine = create_engine(db_string)
populations_df = pd.read_sql("select * from \"populations\"", con=engine)

In [9]:
testing_dogs=testing_dogs.merge(populations_df, left_on='location', right_on='city')
testing_dogs = testing_dogs.drop(['city', 'location'], axis=1)

In [10]:
# Drop any rows with NaN values and confirm they have been dropped
testing_dogs=testing_dogs.dropna(how='any')

In [11]:
# Make copy of adoptions_df and encode boolean values
encoded_df = testing_dogs.copy()
testing_dogs[['breeds_mixed', 'breeds_unknown', 'spayed_neutered','house_trained', 'special_needs', 'shots_current']] = encoded_df[['breeds_mixed', 'breeds_unknown', 'spayed_neutered','house_trained', 'special_needs', 'shots_current']].astype(int)
encoded_df.head()

Unnamed: 0,id,age,gender,size,breeds_primary,breeds_mixed,breeds_unknown,spayed_neutered,house_trained,special_needs,shots_current,population
0,55818461,Baby,Male,Medium,Husky,True,False,False,False,False,False,7092
1,55818460,Baby,Male,Medium,Husky,True,False,False,False,False,False,7092
2,55803415,Baby,Male,Medium,Husky,True,False,False,False,False,False,7092
3,55815124,Baby,Male,Medium,Mixed Breed,False,False,False,False,False,True,74098
4,55815119,Baby,Male,Medium,Mixed Breed,False,False,False,False,False,True,74098


In [12]:
# Make copy of adoptions_df and encode boolean values
encoded_df = testing_dogs.copy()
testing_dogs[['breeds_mixed', 'breeds_unknown', 'spayed_neutered','house_trained', 'special_needs', 'shots_current']] = encoded_df[['breeds_mixed', 'breeds_unknown', 'spayed_neutered','house_trained', 'special_needs', 'shots_current']].astype(int)

In [13]:
# Encode gender column
encoded_df['gender'] = encoded_df['gender'].replace(['Female', 'Male'], [0,1])

In [14]:
# Encode age and size columns
encoded_df = pd.get_dummies(encoded_df, columns=['age', 'size'])

In [15]:
encoded_df['breed_pitbull'] = np.where(encoded_df['breeds_primary'] == 'Pit Bull Terrier', 1, 0)
encoded_df = encoded_df.drop(columns=["breeds_primary"])

In [16]:
def bucketPopulation(row):
    if row['population'] > 0 and row['population'] <= 10000:
        return '0 to 10,000'
    elif row['population'] > 10000 and row['population'] <= 50000:
        return '10,000 to 50,000'
    elif row['population'] > 50000 and row['population'] <= 100000:
        return '50,000 to 100,000'
    return 'greater than 100,000'

In [17]:
encoded_df['bucketed_population'] = encoded_df.apply(lambda row: bucketPopulation(row), axis=1)
encoded_df.drop('population', axis=1, inplace=True)

In [18]:
# Encode age and size columns
encoded_df = pd.get_dummies(encoded_df, columns=['bucketed_population'])

In [19]:
# Read dog adoptions DataFrame from PostgreSQL
db_string = f"postgresql://postgres:{my_password}@127.0.0.1:5432/PetFindingDB"
engine = create_engine(db_string)
standard_format = pd.read_sql("select * from \"standard_format\"", con=engine)

In [20]:
# Merge current dogs for adoption with the test data to get the same number of columns for model
encoded_df = pd.DataFrame.merge(encoded_df, standard_format, how='outer')

In [21]:
# Drop the test data
encoded_df = encoded_df[encoded_df['id'].notna()]

In [22]:
encoded_df = encoded_df.fillna(0)
encoded_df = encoded_df.drop(['index', 'duration'], axis=1)

In [23]:
# Creating a StandardScaler instance.
scaler = StandardScaler()

In [24]:
# Create features
X_test = encoded_df.drop(columns='id')
dog_ids = encoded_df['id']
X_scaler = scaler.fit(X_test)
X_test_scaled = X_scaler.transform(X_test)

In [25]:
#Load model
loaded_model = pickle.load(open('finalized_model.sav', 'rb'))

In [26]:
# Making predictions using the testing data.
predictions = loaded_model.predict(X_test_scaled)
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0])

In [27]:
predictions_df = pd.DataFrame.from_dict(dog_ids)
predictions_df['duration'] = predictions

In [28]:
predictions_df['duration'].value_counts()

0    51
1     1
Name: duration, dtype: int64

In [29]:
# Merge current dogs for adoption with the test data to get the same number of columns for model
viz_df = pd.DataFrame.merge(adoptable_dogs, predictions_df, how='left')

In [30]:
cold_dogs=viz_df.loc[viz_df['duration'] == 1]
hot_dogs= viz_df.loc[viz_df['duration'] == 0]

In [31]:
geolocator = Nominatim(user_agent='http')

In [32]:
cold_dogs['location'] = viz_df['city'] + ', ' + viz_df['state']
# cold_dogs = cold_dogs.drop(['city', 'state'], axis=1)
hot_dogs['location'] = viz_df['city'] + ', ' + viz_df['state']
# hot_dogs = cold_dogs.drop(['city', 'state'], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [33]:
cold_dogs[['lat', 'lon']] = cold_dogs['location'].apply(geolocator.geocode).apply(lambda x: pd.Series([x.latitude, x.longitude], index=['lat', 'lon']))
hot_dogs[['lat', 'lon']] = hot_dogs['location'].apply(geolocator.geocode).apply(lambda x: pd.Series([x.latitude, x.longitude], index=['lat', 'lon']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [34]:
# Configure gmaps to use your Google API key.
gmaps.configure(api_key=gkey)

In [35]:
# 9. Using the template add city name, the country code, the weather description and maximum temperature for the city.
info_box_template = """
<dl>
<dt>Name</dt><dd>{name}</dd>
<dt>Age</dt><dd>{age}</dd>
<dt>URL</dt><dd>{url}</dd>
<dt>Location</dt><dd>{location}</dd>
</dl>
"""

# 10a. Get the data from each row and add it to the formatting template and store the data in a list.
cold_dog_info = [info_box_template.format(**row) for index, row in cold_dogs.iterrows()]
hot_dog_info = [info_box_template.format(**row) for index, row in hot_dogs.iterrows()]

# 10b. Get the latitude and longitude from each row and store in a new DataFrame.
cold_locations = cold_dogs[["lat", "lon"]]
hot_locations = hot_dogs[["lat", "lon"]]

In [36]:
# 11a. Add a marker layer for each city to the map. 
cold_fig = gmaps.figure(center=(30.0, 31.0), zoom_level=1.5)
marker_layer = gmaps.marker_layer(cold_locations, info_box_content=cold_dog_info)
cold_fig.add_layer(marker_layer)

# 11b. Display the figure
cold_fig

Figure(layout=FigureLayout(height='420px'))

In [37]:
# 11a. Add a marker layer for each city to the map. 
hot_fig = gmaps.figure(center=(30.0, 31.0), zoom_level=1.5)
marker_layer = gmaps.marker_layer(hot_locations, info_box_content=hot_dog_info)
hot_fig.add_layer(marker_layer)

# 11b. Display the figure
hot_fig

Figure(layout=FigureLayout(height='420px'))