In [1]:
import requests
from requests.auth import HTTPDigestAuth
import os
import json
import time
import pandas as pd
import time
from pprint import pprint
from config import pet_finder_api_key, pet_finder_secret_key, gmaps_api_key
import geocoder
error_count = 0
petfinder_base_url = "https://api.petfinder.com/v2/"
# dictionary contains the address/latlong key-valule pairs addresses already geocoded
# Example:  geomapped_addresses = { "123 Main Street": "34,-120", "555 Elm Street Escondido, CA" : "33.21,-110.123" }
geomapped_addresses = {}

In [2]:
# https://stackoverflow.com/questions/16891340/remove-a-prefix-from-a-string
# removes unwanted characters (prefix) from the beginning of a string (s)
def remove_prefix(s, prefix):
    return s[len(prefix):] if s.startswith(prefix) else s

In [3]:
#create function for geo coding addresses
# simple library for geocoding ----  https://geocoder.readthedocs.io/ 

def geomapper(address):
    try:
        # remove leading "," from the address
        address = remove_prefix(address, ',')
        # check if address has already been geocoded, if so return that value from the geomapped_addresses dictionary
        if (address in geomapped_addresses):
            return geomapped_addresses[address]
        g = geocoder.google(address, key=gmaps_api_key)
        lat = g.json['lat']
        long = g.json['lng']
        latlong = f'{lat},{long}'
        # store the lat/long in the geomapped_addresses dictionary so we don't run the API on it again
        geomapped_addresses[address] = latlong
        
        if (len(geomapped_addresses) % 100 == 0):
            print(f"{len(geomapped_addresses)} addresses processed")
            file_name = "Resources/geomapped_addresses_temp.json"
            with open(file_name, 'w') as fp:
                json.dump(geomapped_addresses, fp, sort_keys=True, indent=4)
        return latlong
    except:
        #error_count += 1
        return None

In [4]:
#Create function called locateAddresses that adds latitude and longitude columns for geomapping
#Assumes df have column "address"
def locateAddresses(df):
    # Load any already geocoded addresses into geomapped_addresses dictionary
    file_name = "Resources/geomapped_addresses.json"
    if (os.path.isfile(file_name)):
        print("reading geomapped_addresses.json")
        with open(file_name, 'r') as fp:
            geomapped_addresses.update(json.load(fp))
        print(f"loaded {len(geomapped_addresses)} addresses")
        
    # create a copy of the passed in dataframe so we can manipulate it without getting reference errors
    df_temp = pd.DataFrame(df)
    df_temp["geo"] = 0
    df_temp["geo"]= df_temp.address.apply(lambda x: geomapper(x))
    #split geo column to assign values in latitude and longitude 
    df_temp[['Lat', 'Lng']] = df_temp.geo.str.split(',', expand = True)
    # no longer need the geo column...
    del df_temp['geo']
    
    # Save the geomapped_addresses dictionary to a file so we don't ever have to geomap addresses already done
    print(f"saving geomapped_addresses.json")
    with open(file_name, 'w') as fp:
        json.dump(geomapped_addresses, fp, sort_keys=True, indent=4)
        
    # return our local copy of the dataframe.  make sure you assign the return value of this function to a new df.
    return df_temp

In [5]:
def getAccessToken():
    url = f'{petfinder_base_url}oauth2/token'
    #print(url)
    #format post request body data
    #https://www.geeksforgeeks.org/get-post-requests-using-python/
    # data to be sent to api; data is the POST body
    data = {'grant_type':"client_credentials", 
            'client_id':pet_finder_api_key, 
            'client_secret':pet_finder_secret_key
           }
    #print(data)
    #how to add headers in python POSTS
    #https://stackoverflow.com/questions/8685790/adding-header-to-python-requests-module
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    response = requests.post(url, data = data, headers=headers)
    #print(response)
    if(response.ok):
        jData = json.loads(response.content)
        #print(jData)
        return jData["access_token"]
    else:
        # If response code is not ok (200)
        return None

In [6]:
token = getAccessToken()
print(token)

eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImp0aSI6IjU0ZTJhY2I1Zjc4MmNlMjg0YjUzMzQxZmY1NDA3NWU1ODEzYjE1MTJlMmEwMTZhNDdjYWM3MDE4MGM0ZGJkZTIyOWFiNmExZTE2ZTYyMjYwIn0.eyJhdWQiOiJKYWFqNktxVTIxdlduOFQyb1BaandIUkhsZjQ5OVhGaVJkcW5ORFAxZ0UwVVFYZmNoRCIsImp0aSI6IjU0ZTJhY2I1Zjc4MmNlMjg0YjUzMzQxZmY1NDA3NWU1ODEzYjE1MTJlMmEwMTZhNDdjYWM3MDE4MGM0ZGJkZTIyOWFiNmExZTE2ZTYyMjYwIiwiaWF0IjoxNTUzODA4NzI4LCJuYmYiOjE1NTM4MDg3MjgsImV4cCI6MTU1MzgxMjMyOCwic3ViIjoiIiwic2NvcGVzIjpbXX0.g2EuuP6WgHeND_vESPSwqGfxlq2ZVMf1p3JSHBS9c0amytQLSoI1VpuuvWrox6WqPdDc4N7bNZLuSVEdeArTXzn0UP5WsRHyh0j9R8qaWisyzETjmkRUVDbQmZ-hyQ7kx1Xj70Y5k_2GZEcrNNDZ0KKlYEVXDho3DzbntggnStcvInvUu1a82_mCm8WrVQDapIk33Fa-JgNhnfZMZB2OnbgTXTCYcT5lgZB7WoUR_X2VdK8RE_OH8TiTQ890RbtlR_FKdF8lAhPJAw0lKxCztTmK2n-O0eoWOZm5D5YpQL5LN_m6vP6Fd_dyUTv41iehRnbeoAZlmSa7JalSe10L6w


In [7]:
#function to get total pages of results
def totalPages(animal_type,status,limit,location):
    url = f'{petfinder_base_url}animals?type={animal_type}&status={status}&limit={limit}&location={location}'
    print(url)
    #format for calls to animal api
    #https://api.petfinder.com/v2/animals?type=dog&page=2
    #headers: Authorization: Bearer eyJ0eXA...
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.get(url, headers=headers )
    if(response.ok):
        jData = json.loads(response.content)
        return jData['pagination']['total_pages']
       
    else:
        print("Not found")
        return None

In [8]:
def fetchPet(animal_type,limit,status,page, location):
    url = f'{petfinder_base_url}animals?type={animal_type}&limit={limit}&status={status}&page={page}&location={location}'
    print(url)
    #format for calls to animal api
    #https://api.petfinder.com/v2/animals?type=dog&page=2
    #headers: Authorization: Bearer eyJ0eXA...
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.get(url, headers=headers )
    if(response.ok):
        jData = json.loads(response.content)
        return jData

    else:
        print("Not found")
        return None   

In [9]:
#To get total records
def getData(animal_type, limit, status, location):
    pets = []
    total_pages = totalPages(animal_type,status, limit,location)
    for  page in range(1,total_pages):
        petData = fetchPet(animal_type,limit,status,page,location)
        #print(petData)
        #append to a list from another list
        #https://stackabuse.com/append-vs-extend-in-python-lists/
        #pets.extend(petData["animals"])
        
        for row in petData["animals"]:
            
            pet_dict= {
            'pet_id': row['id'],
            'organization_id': row['organization_id'],
            'url': row['url'],
            'type': row['type'],
            'primary breed': row['breeds']['primary'],
            'secondary breed': row['breeds']['secondary'],
            'mixed breed': row['breeds']['mixed'],
            'age': row['age'],
            'gender': row['gender'],
            'size': row['size'],
            'photo1': '',
            'photo2': '',
            'photo3': '',
            'address1': '',
            'address2':'',
            'city': '',
            'state': '',
            'postcode': '',
            'email': '',
            'phone': ''
            }
            
            if len(row['photos']) > 0:
                pet_dict['photo1'] = row['photos'][0]['full']
            if len(row['photos']) > 1:
                pet_dict['photo2'] = row['photos'][1]['full']
            if len(row['photos']) > 2:
                pet_dict['photo3'] = row['photos'][2]['full']
                
                
                
            if 'contact' in row:
                contact = row['contact']
                
                if 'email' in contact:
                    pet_dict['email'] = row['contact']['email']
                if 'phone' in contact:
                    pet_dict['phone'] = row['contact']['phone']
                
                if 'address' in contact:
                    address = row['contact']['address']
                    if 'address1' in address:
                        pet_dict['address1'] = row['contact']['address']['address1']
                    if 'address2' in address:
                        pet_dict['address2'] = row['contact']['address']['address2']
                    if 'city' in address:
                        pet_dict['city'] = row['contact']['address']['city']
                    if 'state' in address:
                        pet_dict['state'] = row['contact']['address']['state']
                    if 'postcode' in address:
                        pet_dict['postcode'] = row['contact']['address']['postcode']
                
            pets.append(pet_dict)
            
    pets_city_df = pd.DataFrame(pets)
    pets_city_df = pets_city_df[["pet_id", "type", "primary breed", "secondary breed", "mixed breed", "size", "gender", "age", "photo1", "photo2", "organization_id", "phone", "address1", "address2", "city", "state", "postcode", "email"]]
  
    return pets_city_df

In [10]:
# load csv files if exist otherwise create by calling function getData
location = "San Diego, CA"
cats_city_df = None
cats_file = "Resources/cats_city.csv"
if (os.path.isfile(cats_file)):
    cats_city_df = pd.read_csv(cats_file)
    cats_city_df = cats_city_df.fillna("")
else:
    cats_city_df = getData(animal_type= "cat",limit = 100, status = "adoptable", location= location)
    cats_city_df.to_csv(cats_file,header=True, index=False)

In [11]:
dogs_city_df = None
location = "San Diego, CA"
dogs_file = "Resources/dogs_city.csv"
if (os.path.isfile(dogs_file)):
    dogs_city_df = pd.read_csv(dogs_file)
    dogs_city_df = dogs_city_df.fillna("")
else:
    dogs_city_df = getData(animal_type= "dog",limit = 100, status = "adoptable", location= location)
    dogs_city_df.to_csv(dogs_file,header=True, index=False)

In [12]:
dogs_city_df

Unnamed: 0,pet_id,type,primary breed,secondary breed,mixed breed,size,gender,age,photo1,photo2,organization_id,phone,address1,address2,city,state,postcode,email
0,44338661,Dog,Boxer,German Shorthaired Pointer,True,Large,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA1048,714-241-9765,,,Santa Ana,CA,92780,poochmatch@hotmail.com
1,44338300,Dog,Labrador Retriever,,True,Medium,Female,Young,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA1530,619-990-7455,2307 Fenton Parkway #107-160,,San Diego,CA,92108,labradorsandfriends@yahoo.com
2,44337830,Dog,Poodle,,False,Small,Female,Senior,,,CA567,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org
3,44337831,Dog,Miniature Pinscher,,False,Small,Male,Adult,,,CA567,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org
4,44337828,Dog,Brittany Spaniel,Mixed Breed,True,Small,Male,Young,,,CA567,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org
5,44337359,Dog,Rat Terrier,Jack Russell Terrier,True,Small,Female,Baby,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,CA1048,714-241-9765,,,Santa Ana,CA,92780,poochmatch@hotmail.com
6,44337314,Dog,Boxer,,True,Large,Female,Young,,,CA1874,(858) 273-5386,P.O. Box 178196,,San Diego,CA,92177,doginfo@labsandmore.org
7,44337315,Dog,Boxer,,True,Large,Female,Baby,,,CA1874,(858) 273-5386,P.O. Box 178196,,San Diego,CA,92177,doginfo@labsandmore.org
8,44337316,Dog,Boxer,,True,Large,Female,Baby,,,CA1874,(858) 273-5386,P.O. Box 178196,,San Diego,CA,92177,doginfo@labsandmore.org
9,44337317,Dog,Boxer,,True,Large,Male,Baby,,,CA1874,(858) 273-5386,P.O. Box 178196,,San Diego,CA,92177,doginfo@labsandmore.org


In [13]:
#combine cats_df and dogs_df
cats_dogs_city_df = pd.concat([cats_city_df,dogs_city_df])
cats_dogs_city_df

Unnamed: 0,pet_id,type,primary breed,secondary breed,mixed breed,size,gender,age,photo1,photo2,organization_id,phone,address1,address2,city,state,postcode,email
0,44337850,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org
1,44337851,Cat,Domestic Medium Hair,,True,Medium,Male,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org
2,44337852,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org
3,44337849,Cat,Domestic Short Hair,,True,Medium,Male,Young,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org
4,44337829,Cat,Domestic Short Hair,,False,Small,Male,Young,,,CA567,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org
5,44337320,Cat,Tortoiseshell,,True,Small,Female,Young,,,CA759,(562) 544-0335,,,Long Beach,CA,90802,fishmjo@yahoo.com
6,44337321,Cat,Tabby,Domestic Short Hair,True,Medium,Male,Adult,,,CA759,(562) 544-0335,,,Long Beach,CA,90802,fishmjo@yahoo.com
7,44337319,Cat,Siamese,Ragdoll,True,Large,Male,Young,,,CA759,(562) 544-0335,,,Long Beach,CA,90802,fishmjo@yahoo.com
8,44337057,Cat,Domestic Short Hair,,False,Large,Male,Adult,,,CA257,(714) 935-6848,1630 Victory Road,,Tustin,CA,92782,OCCRocpetinfo@occr.ocgov.com
9,44337058,Cat,Siamese,,False,Medium,Female,Adult,,,CA257,(714) 935-6848,1630 Victory Road,,Tustin,CA,92782,OCCRocpetinfo@occr.ocgov.com


In [14]:
#Add create combined address column and long and lat columns
#df['Name'] = df['First'].str.cat(df['Last'],sep=" ")
#dogs_df['address'] = dogs_df['address1'].str.cat(['city'],['state'],['postcode'],sep=" ")
cats_dogs_city_df['address'] = cats_dogs_city_df['address1'].map(str) + ',' + cats_dogs_city_df['city'].map(str) + ',' \
    + cats_dogs_city_df['state'].map(str)+ ' ' + cats_dogs_city_df['postcode'].map(str)
cats_dogs_city_df.head()

Unnamed: 0,pet_id,type,primary breed,secondary breed,mixed breed,size,gender,age,photo1,photo2,organization_id,phone,address1,address2,city,state,postcode,email,address
0,44337850,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504"
1,44337851,Cat,Domestic Medium Hair,,True,Medium,Male,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504"
2,44337852,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504"
3,44337849,Cat,Domestic Short Hair,,True,Medium,Male,Young,,,CA216,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504"
4,44337829,Cat,Domestic Short Hair,,False,Small,Male,Young,,,CA567,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org,"14041 Elsworth Street,Moreno Valley,CA 92553"


In [15]:
print(len(cats_dogs_city_df["address"].unique()))

295


In [16]:
#Add latitude and longitude columns for gmapping using the locateAddresses function
city_all_pets_file = "Resources/city_allpets.csv"
citypets_geo_df = None
if (os.path.isfile(city_all_pets_file)):
    citypets_geo_df = pd.read_csv(city_all_pets_file)
else:
    citypets_geo_df = locateAddresses(cats_dogs_city_df)
    citypets_geo_df.to_csv(city_all_pets_file,header=True, index=False)
citypets_geo_df.head()

Unnamed: 0,pet_id,type,primary breed,secondary breed,mixed breed,size,gender,age,photo1,photo2,...,phone,address1,address2,city,state,postcode,email,address,Lat,Lng
0,44337850,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,...,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504",33.966201,-117.437
1,44337851,Cat,Domestic Medium Hair,,True,Medium,Male,Baby,,,...,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504",33.966201,-117.437
2,44337852,Cat,Domestic Medium Hair,,True,Medium,Female,Baby,,,...,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504",33.966201,-117.437
3,44337849,Cat,Domestic Short Hair,,True,Medium,Male,Young,,,...,(951) 688-4340,6165 Industrial Avenue,,Riverside,CA,92504,adopt1@petsadoption.org,"6165 Industrial Avenue,Riverside,CA 92504",33.966201,-117.437
4,44337829,Cat,Domestic Short Hair,,False,Small,Male,Young,,,...,951.413.3790,14041 Elsworth Street,,Moreno Valley,CA,92553,animalshelter@moval.org,"14041 Elsworth Street,Moreno Valley,CA 92553",33.915658,-117.270964


In [17]:
#TEST:  to return all results for a specific breed
breed = "Chihuahua"
state = "CA"
city = "San Diego"
citypets_geo_df =  citypets_geo_df.loc[(citypets_geo_df['primary breed']==breed)&(citypets_geo_df['city']==city ),:]
citypets_geo_df

Unnamed: 0,pet_id,type,primary breed,secondary breed,mixed breed,size,gender,age,photo1,photo2,...,phone,address1,address2,city,state,postcode,email,address,Lat,Lng
2774,44332737,Dog,Chihuahua,,True,Medium,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
2778,44332736,Dog,Chihuahua,,True,Medium,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
2982,44318051,Dog,Chihuahua,Terrier,True,Medium,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3055,44309924,Dog,Chihuahua,,True,Medium,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3056,44309921,Dog,Chihuahua,,True,Medium,Female,Senior,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3057,44309922,Dog,Chihuahua,,True,Medium,Female,Senior,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3079,44306389,Dog,Chihuahua,,True,Small,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,,...,(858) 273-5386,P.O. Box 178196,,San Diego,CA,92177,doginfo@labsandmore.org,"P.O. Box 178196,San Diego,CA 92177",32.824177,-117.231555
3151,44299589,Dog,Chihuahua,,True,Medium,Female,Senior,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3152,44299588,Dog,Chihuahua,,True,Medium,Male,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(619) 299 -7012,5500 Gaines Street,,San Diego,CA,92110,info@sdhumane.org,"5500 Gaines Street,San Diego,CA 92110",32.765668,-117.194161
3216,44290229,Dog,Chihuahua,Dachshund,True,Small,Female,Adult,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,https://dl5zpyw5k3jeb.cloudfront.net/photos/pe...,...,(858) 442-6585,,,San Diego,CA,92117,info@nwtlb.org,",San Diego,CA 92117",32.825077,-117.202936
