In [1]:
# Dependencies
import json
import requests
import pandas as pd
from config import petfinder_api_key, petfinder_secret_key

In [2]:
# Request token for the Petfinder API
data = {
  'grant_type': 'client_credentials',
  'client_id': petfinder_api_key,
  'client_secret': petfinder_secret_key
}

token_response = requests.post('https://api.petfinder.com/v2/oauth2/token', data=data)
token_response_json = token_response.json()

print(json.dumps(token_response_json, indent = 4))

{
    "token_type": "Bearer",
    "expires_in": 3600,
    "access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImp0aSI6Ijk0MTcyNWU0YmJmYTA5YWNmZmJhN2ExNWM4NTI1YmUyZjM1MzE3ODE5OTc2YjJmYTExYTlkMWQ0MjNiYTA1YzQ4M2FhMWMxZGVkNGFkMjNkIn0.eyJhdWQiOiJIaGFpVjNWVkdZY3NJbjVwNXoxSWZvOVd3TWdIWXlERDBrYXdQYXFmYjhsd1k1dzhDZyIsImp0aSI6Ijk0MTcyNWU0YmJmYTA5YWNmZmJhN2ExNWM4NTI1YmUyZjM1MzE3ODE5OTc2YjJmYTExYTlkMWQ0MjNiYTA1YzQ4M2FhMWMxZGVkNGFkMjNkIiwiaWF0IjoxNTgyMzM0NTc0LCJuYmYiOjE1ODIzMzQ1NzQsImV4cCI6MTU4MjMzODE3NCwic3ViIjoiIiwic2NvcGVzIjpbXX0.qNKAZQwHwVgcNwAFgX5VkW4K-FqQmiCjQ9V7bs37ErJJJIWYXw2HqERkRZ52f18pYTdmExmMKYVGM7gDxyvRaQbWQZ4mMPnfvqyajHzcGRp9go7Zyt98RUk0rDcudjFIi8b6ipZxNyjS1RC40n4Bi_pcWu7jYpxCsCjdXM3bJaex0cU3AcQsFoeQO7eitSTaMrxncm6H1VcFhBX5-ITvUByWQ8FWTH3TkoHevllXmdUSHsYECt7_pwnsrBL7bSLgFtoe9lxWXTPNEKLkS_1Q-YfyxPQB8UCcMpDw4nOdpStrS35siRaJluAaiXe3WV3OblaDLmzaFpll8LrIhhwuew"
}


In [3]:
# Pull data from petfinder.com API 
headers = {'Authorization': f"Bearer {token_response_json['access_token']}"}
url = "https://api.petfinder.com/v2/animals?"
response = requests.get(url, headers=headers)
response_json = response.json()

print(json.dumps(response_json, indent = 4))

{
    "animals": [
        {
            "id": 47441916,
            "organization_id": "FL185",
            "url": "https://www.petfinder.com/cat/beau-47441916/fl/clearwater/humane-society-of-pinellas-fl185/?referrer_id=96f6bcc2-4d0b-4971-9351-30a7a4f94a88",
            "type": "Cat",
            "species": "Cat",
            "breeds": {
                "primary": "Domestic Medium Hair",
                "secondary": null,
                "mixed": true,
                "unknown": false
            },
            "colors": {
                "primary": "Orange & White",
                "secondary": null,
                "tertiary": null
            },
            "age": "Baby",
            "gender": "Male",
            "size": "Medium",
            "coat": null,
            "attributes": {
                "spayed_neutered": true,
                "house_trained": false,
                "declawed": false,
                "special_needs": false,
                "shots_current": true
       

In [None]:
# Extract out list of animal dictionaries (also known as records)
animal_records = response_json['animals']
animal_records

In [None]:
# Use Pandas to directly convert list of records to DataFrame
animals_df = pd.DataFrame(animal_records)
animals_df

In [11]:
# API has record limit for each pages and need to loop through multiple pages 
current_page_number = 600
num_pages_to_fetch = 1100
is_first_page = False
all_pet_records = []

print('--- Data Processing Started! ---')

while current_page_number < num_pages_to_fetch:
    # Increment count
    current_page_number += 1
    print(f"Processing batch #{current_page_number}")
    
    # Perform API call
    # Configure headers and search parameters
    headers = {'Authorization': f"Bearer {token_response_json['access_token']}"}
               
    params = {
        # Can add any more search parameters found at: https://www.petfinder.com/developers/v2/docs/#get-animals
        'limit': 100 # default is 20, increased to 100 items per page
    }
    
    # Pull data from petfinder.com API 
    url = f"https://api.petfinder.com/v2/animals?page={current_page_number}"
    response = requests.get(url, headers=headers, params=params)
    response_json = response.json()

    # Print out each API call's 'pagination' dictionary that describes if there is a next page, other info
    print(response_json['pagination'])
    
    # Pull out records and add them to our list (app_pet_records)
    animal_records = response_json['animals']
    all_pet_records += animal_records

print('--- Data Processing Completed! ---')

--- Data Processing Started! ---
Processing batch #601
{'count_per_page': 100, 'total_count': 8009951, 'current_page': 601, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=600'}, 'next': {'href': '/v2/animals?limit=100&page=602'}}}
Processing batch #602
{'count_per_page': 100, 'total_count': 8009951, 'current_page': 602, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=601'}, 'next': {'href': '/v2/animals?limit=100&page=603'}}}
Processing batch #603
{'count_per_page': 100, 'total_count': 8009951, 'current_page': 603, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=602'}, 'next': {'href': '/v2/animals?limit=100&page=604'}}}
Processing batch #604
{'count_per_page': 100, 'total_count': 8009951, 'current_page': 604, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=603'}, 'next': {'href': '/v2/animals?limit=100&page=605'}}}
Processing batch #605
{'count_per_p

{'count_per_page': 100, 'total_count': 8009959, 'current_page': 636, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=635'}, 'next': {'href': '/v2/animals?limit=100&page=637'}}}
Processing batch #637
{'count_per_page': 100, 'total_count': 8009959, 'current_page': 637, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=636'}, 'next': {'href': '/v2/animals?limit=100&page=638'}}}
Processing batch #638
{'count_per_page': 100, 'total_count': 8009959, 'current_page': 638, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=637'}, 'next': {'href': '/v2/animals?limit=100&page=639'}}}
Processing batch #639
{'count_per_page': 100, 'total_count': 8009959, 'current_page': 639, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=638'}, 'next': {'href': '/v2/animals?limit=100&page=640'}}}
Processing batch #640
{'count_per_page': 100, 'total_count': 8009959, 'current_page': 640,

{'count_per_page': 100, 'total_count': 8009961, 'current_page': 672, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=671'}, 'next': {'href': '/v2/animals?limit=100&page=673'}}}
Processing batch #673
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 673, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=672'}, 'next': {'href': '/v2/animals?limit=100&page=674'}}}
Processing batch #674
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 674, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=673'}, 'next': {'href': '/v2/animals?limit=100&page=675'}}}
Processing batch #675
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 675, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=674'}, 'next': {'href': '/v2/animals?limit=100&page=676'}}}
Processing batch #676
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 676,

{'count_per_page': 100, 'total_count': 8009961, 'current_page': 708, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=707'}, 'next': {'href': '/v2/animals?limit=100&page=709'}}}
Processing batch #709
{'count_per_page': 100, 'total_count': 8009961, 'current_page': 709, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=708'}, 'next': {'href': '/v2/animals?limit=100&page=710'}}}
Processing batch #710
{'count_per_page': 100, 'total_count': 8009962, 'current_page': 710, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=709'}, 'next': {'href': '/v2/animals?limit=100&page=711'}}}
Processing batch #711
{'count_per_page': 100, 'total_count': 8009962, 'current_page': 711, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=710'}, 'next': {'href': '/v2/animals?limit=100&page=712'}}}
Processing batch #712
{'count_per_page': 100, 'total_count': 8009962, 'current_page': 712,

{'count_per_page': 100, 'total_count': 8009963, 'current_page': 744, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=743'}, 'next': {'href': '/v2/animals?limit=100&page=745'}}}
Processing batch #745
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 745, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=744'}, 'next': {'href': '/v2/animals?limit=100&page=746'}}}
Processing batch #746
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 746, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=745'}, 'next': {'href': '/v2/animals?limit=100&page=747'}}}
Processing batch #747
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 747, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=746'}, 'next': {'href': '/v2/animals?limit=100&page=748'}}}
Processing batch #748
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 748,

{'count_per_page': 100, 'total_count': 8009965, 'current_page': 780, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=779'}, 'next': {'href': '/v2/animals?limit=100&page=781'}}}
Processing batch #781
{'count_per_page': 100, 'total_count': 8009966, 'current_page': 781, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=780'}, 'next': {'href': '/v2/animals?limit=100&page=782'}}}
Processing batch #782
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 782, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=781'}, 'next': {'href': '/v2/animals?limit=100&page=783'}}}
Processing batch #783
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 783, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=782'}, 'next': {'href': '/v2/animals?limit=100&page=784'}}}
Processing batch #784
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 784,

{'count_per_page': 100, 'total_count': 8009969, 'current_page': 816, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=815'}, 'next': {'href': '/v2/animals?limit=100&page=817'}}}
Processing batch #817
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 817, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=816'}, 'next': {'href': '/v2/animals?limit=100&page=818'}}}
Processing batch #818
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 818, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=817'}, 'next': {'href': '/v2/animals?limit=100&page=819'}}}
Processing batch #819
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 819, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=818'}, 'next': {'href': '/v2/animals?limit=100&page=820'}}}
Processing batch #820
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 820,

{'count_per_page': 100, 'total_count': 8009969, 'current_page': 852, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=851'}, 'next': {'href': '/v2/animals?limit=100&page=853'}}}
Processing batch #853
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 853, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=852'}, 'next': {'href': '/v2/animals?limit=100&page=854'}}}
Processing batch #854
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 854, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=853'}, 'next': {'href': '/v2/animals?limit=100&page=855'}}}
Processing batch #855
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 855, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=854'}, 'next': {'href': '/v2/animals?limit=100&page=856'}}}
Processing batch #856
{'count_per_page': 100, 'total_count': 8009969, 'current_page': 856,

{'count_per_page': 100, 'total_count': 8009966, 'current_page': 888, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=887'}, 'next': {'href': '/v2/animals?limit=100&page=889'}}}
Processing batch #889
{'count_per_page': 100, 'total_count': 8009966, 'current_page': 889, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=888'}, 'next': {'href': '/v2/animals?limit=100&page=890'}}}
Processing batch #890
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 890, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=889'}, 'next': {'href': '/v2/animals?limit=100&page=891'}}}
Processing batch #891
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 891, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=890'}, 'next': {'href': '/v2/animals?limit=100&page=892'}}}
Processing batch #892
{'count_per_page': 100, 'total_count': 8009967, 'current_page': 892,

{'count_per_page': 100, 'total_count': 8009965, 'current_page': 924, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=923'}, 'next': {'href': '/v2/animals?limit=100&page=925'}}}
Processing batch #925
{'count_per_page': 100, 'total_count': 8009965, 'current_page': 925, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=924'}, 'next': {'href': '/v2/animals?limit=100&page=926'}}}
Processing batch #926
{'count_per_page': 100, 'total_count': 8009965, 'current_page': 926, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=925'}, 'next': {'href': '/v2/animals?limit=100&page=927'}}}
Processing batch #927
{'count_per_page': 100, 'total_count': 8009965, 'current_page': 927, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=926'}, 'next': {'href': '/v2/animals?limit=100&page=928'}}}
Processing batch #928
{'count_per_page': 100, 'total_count': 8009965, 'current_page': 928,

{'count_per_page': 100, 'total_count': 8009957, 'current_page': 960, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=959'}, 'next': {'href': '/v2/animals?limit=100&page=961'}}}
Processing batch #961
{'count_per_page': 100, 'total_count': 8009957, 'current_page': 961, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=960'}, 'next': {'href': '/v2/animals?limit=100&page=962'}}}
Processing batch #962
{'count_per_page': 100, 'total_count': 8009957, 'current_page': 962, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=961'}, 'next': {'href': '/v2/animals?limit=100&page=963'}}}
Processing batch #963
{'count_per_page': 100, 'total_count': 8009957, 'current_page': 963, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=962'}, 'next': {'href': '/v2/animals?limit=100&page=964'}}}
Processing batch #964
{'count_per_page': 100, 'total_count': 8009957, 'current_page': 964,

{'count_per_page': 100, 'total_count': 8009963, 'current_page': 996, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=995'}, 'next': {'href': '/v2/animals?limit=100&page=997'}}}
Processing batch #997
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 997, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=996'}, 'next': {'href': '/v2/animals?limit=100&page=998'}}}
Processing batch #998
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 998, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=997'}, 'next': {'href': '/v2/animals?limit=100&page=999'}}}
Processing batch #999
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 999, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=998'}, 'next': {'href': '/v2/animals?limit=100&page=1000'}}}
Processing batch #1000
{'count_per_page': 100, 'total_count': 8009963, 'current_page': 10

{'count_per_page': 100, 'total_count': 8009960, 'current_page': 1031, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1030'}, 'next': {'href': '/v2/animals?limit=100&page=1032'}}}
Processing batch #1032
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 1032, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1031'}, 'next': {'href': '/v2/animals?limit=100&page=1033'}}}
Processing batch #1033
{'count_per_page': 100, 'total_count': 8009959, 'current_page': 1033, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1032'}, 'next': {'href': '/v2/animals?limit=100&page=1034'}}}
Processing batch #1034
{'count_per_page': 100, 'total_count': 8009960, 'current_page': 1034, 'total_pages': 80100, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1033'}, 'next': {'href': '/v2/animals?limit=100&page=1035'}}}
Processing batch #1035
{'count_per_page': 100, 'total_count': 8009960, 'cur

{'count_per_page': 100, 'total_count': 8010073, 'current_page': 1066, 'total_pages': 80101, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1065'}, 'next': {'href': '/v2/animals?limit=100&page=1067'}}}
Processing batch #1067
{'count_per_page': 100, 'total_count': 8010075, 'current_page': 1067, 'total_pages': 80101, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1066'}, 'next': {'href': '/v2/animals?limit=100&page=1068'}}}
Processing batch #1068
{'count_per_page': 100, 'total_count': 8010074, 'current_page': 1068, 'total_pages': 80101, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1067'}, 'next': {'href': '/v2/animals?limit=100&page=1069'}}}
Processing batch #1069
{'count_per_page': 100, 'total_count': 8010073, 'current_page': 1069, 'total_pages': 80101, '_links': {'previous': {'href': '/v2/animals?limit=100&page=1068'}, 'next': {'href': '/v2/animals?limit=100&page=1070'}}}
Processing batch #1070
{'count_per_page': 100, 'total_count': 8010073, 'cur

KeyError: 'pagination'

In [12]:
len(all_pet_records)

49900

In [13]:
large_animals_df = pd.DataFrame(all_pet_records)

# Show number of rows/columns
large_animals_df.shape

(49900, 23)

In [7]:
# Look at first 5 records
large_animals_df.head(5)

Unnamed: 0,id,organization_id,url,type,species,breeds,colors,age,gender,size,...,tags,name,description,photos,status,status_changed_at,published_at,distance,contact,_links
0,47429065,NY48,https://www.petfinder.com/dog/stevie-47429065/...,Dog,Dog,"{'primary': 'Boxer', 'secondary': 'Mastiff', '...","{'primary': None, 'secondary': None, 'tertiary...",Baby,Female,Small,...,[],STEVIE,Meet Stevie! This adorable purebred Boxer pupp...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,adoptable,2020-02-20T19:15:03+0000,2020-02-20T19:15:03+0000,,"{'email': 'adoptions@sasfinc.org', 'phone': '(...","{'self': {'href': '/v2/animals/47429065'}, 'ty..."
1,47429067,NY48,https://www.petfinder.com/dog/luna-47429067/ny...,Dog,Dog,"{'primary': 'Pit Bull Terrier', 'secondary': N...","{'primary': None, 'secondary': None, 'tertiary...",Baby,Female,Small,...,[],LUNA,Luna is an adorable Pit Bull / Jack Russell Te...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,adoptable,2020-02-20T19:15:03+0000,2020-02-20T19:15:03+0000,,"{'email': 'adoptions@sasfinc.org', 'phone': '(...","{'self': {'href': '/v2/animals/47429067'}, 'ty..."
2,47429066,NY48,https://www.petfinder.com/dog/ella-47429066/ny...,Dog,Dog,{'primary': 'Australian Cattle Dog / Blue Heel...,"{'primary': None, 'secondary': None, 'tertiary...",Baby,Female,Small,...,[],ELLA,Meet Ella! This sweet puppy came to SASF from ...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,adoptable,2020-02-20T19:15:03+0000,2020-02-20T19:15:03+0000,,"{'email': 'adoptions@sasfinc.org', 'phone': '(...","{'self': {'href': '/v2/animals/47429066'}, 'ty..."
3,47429068,CA1044,https://www.petfinder.com/dog/mayo-47429068/ca...,Dog,Dog,"{'primary': 'Great Dane', 'secondary': 'Mixed ...","{'primary': None, 'secondary': None, 'tertiary...",Baby,Female,Small,...,[],MAYO,Mayo and Mustard are large breed rescue mutts ...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,adoptable,2020-02-20T19:15:03+0000,2020-02-20T19:15:03+0000,,"{'email': 'infopa@petsinneed.org', 'phone': '(...","{'self': {'href': '/v2/animals/47429068'}, 'ty..."
4,47429070,CA1044,https://www.petfinder.com/dog/mustard-47429070...,Dog,Dog,"{'primary': 'Great Dane', 'secondary': 'Mixed ...","{'primary': None, 'secondary': None, 'tertiary...",Baby,Male,Small,...,[],MUSTARD,Mayo and Mustard are large breed rescue mutts ...,[{'small': 'https://dl5zpyw5k3jeb.cloudfront.n...,adoptable,2020-02-20T19:15:03+0000,2020-02-20T19:15:03+0000,,"{'email': 'infopa@petsinneed.org', 'phone': '(...","{'self': {'href': '/v2/animals/47429070'}, 'ty..."


In [None]:
list(large_animals_df.columns)

In [14]:
# Export data frame to csv file
large_animals_df.to_csv('petfinder_data_21Feb2020_2.csv', encoding='utf-8', index=False)

In [11]:
# Perform data cleaning by filtering and keeping necessary column only
petfinder_adoption_df = large_animals_df.loc[:, ["id",
                                             "organization_id",
                                             "url",
                                             "type",
                                             "breeds",
                                             "colors",
                                             "age",
                                             "gender",
                                             "name",
                                             "status",
                                             "contact",
                                             "_links"]]

petfinder_adoption_df.head()


Unnamed: 0,id,organization_id,url,type,breeds,colors,age,gender,name,status,contact,_links
0,47423986,PA753,https://www.petfinder.com/dog/james-47423986/p...,Dog,"{'primary': 'Labrador Retriever', 'secondary':...","{'primary': 'Yellow / Tan / Blond / Fawn', 'se...",Baby,Male,James.,adoptable,"{'email': 'hopeforhannahrescue@gmail.com', 'ph...","{'self': {'href': '/v2/animals/47423986'}, 'ty..."
1,47432926,MO669,https://www.petfinder.com/dog/mouser-47432926/...,Dog,"{'primary': 'Jack Russell Terrier', 'secondary...","{'primary': None, 'secondary': None, 'tertiary...",Young,Male,Mouser,adoptable,"{'email': 'kristen@allcrittercare.com', 'phone...","{'self': {'href': '/v2/animals/47432926'}, 'ty..."
2,47432970,IL244,https://www.petfinder.com/dog/louise-47432970/...,Dog,"{'primary': 'Pit Bull Terrier', 'secondary': N...","{'primary': 'Black', 'secondary': None, 'terti...",Baby,Female,Louise,adoptable,"{'email': 'info@RoverRescue.org', 'phone': '(6...","{'self': {'href': '/v2/animals/47432970'}, 'ty..."
3,47432973,NY1334,https://www.petfinder.com/dog/archie-47432973/...,Dog,"{'primary': 'Terrier', 'secondary': None, 'mix...","{'primary': 'Brindle', 'secondary': None, 'ter...",Baby,Male,Archie,adoptable,"{'email': 'happylifeanimalrescue@gmail.com', '...","{'self': {'href': '/v2/animals/47432973'}, 'ty..."
4,47432974,NY1334,https://www.petfinder.com/dog/veronica-4743297...,Dog,"{'primary': 'Terrier', 'secondary': None, 'mix...","{'primary': 'Bicolor', 'secondary': None, 'ter...",Baby,Female,Veronica,adoptable,"{'email': 'happylifeanimalrescue@gmail.com', '...","{'self': {'href': '/v2/animals/47432974'}, 'ty..."


In [14]:
petfinder_breeds = []

for i in range(len(all_pet_records)):
    petfinder_breeds.append(petfinder_adoption_df['breeds'][i]['primary'])
    
petfinder_breeds    
    

['Labrador Retriever',
 'Jack Russell Terrier',
 'Pit Bull Terrier',
 'Terrier',
 'Terrier',
 'Terrier',
 'Domestic Short Hair',
 'Labrador Retriever',
 'Terrier',
 'Dachshund',
 'Chihuahua',
 'Labrador Retriever',
 'Miniature Pinscher',
 'Chihuahua',
 'Guinea Pig',
 'Labrador Retriever',
 'Boxer',
 'Guinea Pig',
 'Guinea Pig',
 'Labrador Retriever',
 'Labrador Retriever',
 'Domestic Short Hair',
 'Domestic Short Hair',
 'Labrador Retriever',
 'Chihuahua',
 'Labrador Retriever',
 'Chihuahua',
 'Hound',
 'Domestic Short Hair',
 'Domestic Short Hair',
 'American Bobtail',
 'Tabby',
 'Tabby',
 'Domestic Long Hair',
 'Tabby',
 'Tabby',
 'Tabby',
 'Calico',
 'Spaniel',
 'Australian Shepherd',
 'Boxer',
 'Great Pyrenees',
 'Mastiff',
 'Schnauzer',
 'Domestic Short Hair',
 'Husky',
 'German Shepherd Dog',
 'Lhasa Apso',
 'Domestic Long Hair',
 'Terrier',
 'Labrador Retriever',
 'Whippet',
 'Labrador Retriever',
 'Labrador Retriever',
 'Corgi',
 'Labrador Retriever',
 'Labrador Retriever',
 'P

In [15]:
len(petfinder_breeds)

2000

In [25]:
# Export and save json data to json file
with open('petfinder_data_v2.json', 'w') as outfile:
    json.dump(response_json, outfile)

In [44]:
# Put the json file to dataframe
# json_file = "./petfinder_data_v2.json"
petfinder_data_df = pd.read_json("./petfinder_data_v2.json")
petfinder_data_df.head()

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

## Web Scraping 

In [31]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import time 

In [37]:
# !which chromedriver

/usr/local/bin/chromedriver


In [38]:
# executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
# browser = Browser('chrome', **executable_path, headless=False)

In [19]:
# URL of page to be scraped
# url = "http://dog.rescueme.org/California"
# browser.visit(url)

In [36]:
# response = requests.get(url)
# print(response.text)

In [27]:
# soup = BeautifulSoup(response.text, 'html.parser')

In [21]:
# print(soup.prettify())

{"type":"https://httpstatus.es/429", "status":429, "title":"Rate Limit Exceeded", "detail":"Limit Exceeded"}



In [53]:
name = []
link = []
breed = []
location = []

for page in range(1, 6):
    url = f"http://dog.rescueme.org/California#all{page}"
#     print(url)
    
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    results = soup.find_all('div', class_="row")
    
    for result in results:
    # Error handling
        try:
            name.append(result.find('span', class_= '_cpn').text)
            link.append(result.a['href'])
            breed.append(result.find('span', class_= '_sbbr').text)
            location.append(result.find('span', class_= '_clo _clom').text)
            
            
            
#             name = result.find('span', class_= '_cpn').text
#             link = result.a['href']
#             breed = result.find('span', class_= '_sbbr').text
#             location = result.find('span', class_= '_clo _clom').text
            
            if (name and link and breed and location):
                print('page:', page, '------------------')
                print(name)
#                 print(link)
#                 print(breed)
#                 print(location)
    
        except Exception as e:
            print(e)
    

'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
page: 1 ------------------
['Nana']
page: 1 ------------------
['Nana', 'Dan']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi', 'Rosie']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi', 'Rosie', 'Elizabeth']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi', 'Rosie', 'Elizabeth', 'Beverly']
page: 1 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi', 'Rosie', 'Elizabeth', 'Beverly', 'Gus']
pag

'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
page: 2 ------------------
['Nana', 'Dan', 'Jellybean', 'Rocky', 'Kobe', 'Puchi', 'Rosie', 'Elizabeth', 'Beverly', 'Gus', 'Duncan', 'Dot', 'Layla', '', 'Skye', 'Enzo', 'Jake', 'Jake', 'Bryce/Raymond', 'Titus', 'POGO', 'Matilda', 'DIESEL OLIVAS', 'Bella', 'Daisy', 'Duncan', 'Stella', 'MIKEY', 'MIA', 'Charlie', 'Smokey', 'Sable', 'URGENT!!! BLACKY', 'URGENT!!! GINGER', 'Noki', 'Barney', 'Dino', 'BamBam', 'Precious', 'Nino', 'URGENT AFTER 2/26', 'URGENT ON 2/26', 'URGENT AFTER 2/26', 'URGENT AFTER 2/26', 'URGENT AFTER 2/26', 'URGENT AFTER 2/26', 'Bella', '', 'Theo', 'Fredo', 'Mia', 'Zuzu', '', '', 'Mojo', 'URGENT - SAMMY', 'Jerry', 'Milo', 'River', 'Garth', 'Jackson', 'Logan', 'Reggie', 'Buddy', 'Norbit', 'Cruise', 'Charlie', 'Minnie Mouse', 'Razzle Dazzle', 'Dose Doe', 'Eleven', 'U

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [51]:
name

['Nana',
 'Dan',
 'Jellybean',
 'Rocky',
 'Kobe',
 'Puchi',
 'Rosie',
 'Elizabeth',
 'Beverly',
 'Gus',
 'Duncan',
 'Dot',
 'Layla',
 '',
 'Skye',
 'Enzo',
 'Jake',
 'Jake',
 'Bryce/Raymond',
 'Titus',
 'POGO',
 'Matilda',
 'DIESEL OLIVAS',
 'Bella',
 'Daisy',
 'Duncan',
 'Stella',
 'MIKEY',
 'MIA',
 'Charlie',
 'Smokey',
 'Sable',
 'URGENT!!! BLACKY',
 'URGENT!!! GINGER',
 'Noki',
 'Barney',
 'Dino',
 'BamBam',
 'Precious',
 'Nino',
 'URGENT AFTER 2/26',
 'URGENT ON 2/26',
 'URGENT AFTER 2/26',
 'URGENT AFTER 2/26',
 'URGENT AFTER 2/26',
 'URGENT AFTER 2/26',
 'Bella',
 '',
 'Theo',
 'Fredo',
 'Mia',
 'Zuzu',
 '',
 '',
 'Mojo',
 'URGENT - SAMMY',
 'Jerry',
 'Milo',
 'River',
 'Garth',
 'Jackson',
 'Logan',
 'Reggie',
 'Buddy',
 'Norbit',
 'Cruise',
 'Charlie',
 'Minnie Mouse',
 'Razzle Dazzle',
 'Dose Doe',
 'Eleven',
 'URGENT AFTER 2/25',
 'URGENT AFTER 2/25',
 'URGENT AFTER 2/25',
 'URGENT AFTER 2/25',
 'URGENT - BETTY',
 'URGENT - BELLA',
 'URGENT - MAY',
 'Yugi',
 'URGENT - THIMES

In [23]:
for x in range(1, 25):
#     html = browser.html
#     soup = BeautifulSoup(html, 'html.parser')
    
    results = soup.find_all('div', class_="row")
    
    for result in results:
    # Error handling
        try:
            name = result.find('span', class_= '_cpn').text
            link = result.a['href']
            breed = result.find('span', class_= '_sbbr').text
            location = result.find('span', class_= '_clo _clom').text
        
            if (name and link and breed and location):
                print('page:', x, '------------------')
                print(name)
                print(link)
                print(breed)
                print(location)
    
        except Exception as e:
            print(e)
        
    browser.click_link_by_partial_text('NEXT')

'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
page: 1 ------------------
Nana
http://post.rescueme.org/20-02-21-00420
Chihuahua
Los Angeles County, Van Nuys, CA
page: 1 ------------------
Dan
http://post.rescueme.org/20-02-21-00413
Lhasa Apso
Los Angeles County, Van Nuys, CA
page: 1 ------------------
Jellybean
http://post.rescueme.org/20-02-21-00380
Dachshund
Los Angeles County, Woodland Hills, CA
page: 1 ------------------
Rocky
http://post.rescueme.org/20-02-21-00370
Pit Bull
Stanislaus County, Oakdale, CA
page: 1 ------------------
Kobe
http://post.rescueme.org/20-02-21-00369
American Bulldog
Los Angeles County, Woodland Hills, CA
page: 1 ------------------
Puchi
http://post.rescueme.org/20-02-21-00364
Chihuahua
Orange County, Menifee, CA
page: 1 ------------------
Rosie
http://post.rescueme.org/20-02-21-00321
German She

page: 1 ------------------
Twinkie
http://post.rescueme.org/20-02-16-00013
Pomeranian
Orange County, Yorba Linda, CA
page: 1 ------------------
URGENT - SPIKE
http://post.rescueme.org/20-02-16-00011
Chihuahua
Los Angeles County, Baldwin Park, CA
page: 1 ------------------
Oakley
http://post.rescueme.org/20-02-16-00010
Chihuahua
Orange County, Yorba Linda, CA
page: 1 ------------------
URGENT - ROCKY
http://post.rescueme.org/20-02-16-00009
Lab
Los Angeles County, Baldwin Park, CA
page: 1 ------------------
Pepper
http://post.rescueme.org/20-02-16-00005
German Shepherd
Orange County, Yorba Linda, CA
page: 1 ------------------
Smokey
http://post.rescueme.org/20-02-15-00296
Pit Bull
San Joaquin County, Stockton, CA
page: 1 ------------------
Dillon
http://post.rescueme.org/20-02-15-00228
Yorkie
Los Angeles County, Diamond Bar, CA
page: 1 ------------------
Chloe
http://post.rescueme.org/20-02-15-00223
Pit Bull
San Joaquin County, Manteca, CA
page: 1 ------------------
Hooey
http://post.res

ElementDoesNotExist: no elements could be found with link by partial text "NEXT"

In [31]:
results = soup.find_all('div', class_="row")
# print(results)
# names = soup.find_all('span', class_= '_cpn')

for result in results:
    # Error handling
    try:
        name = result.find('span', class_= '_cpn').text
        link = result.a['href']
        breed = result.find('span', class_= '_sbbr').text
        location = result.find('span', class_= '_clo _clom').text
        
        if (name and link and breed and location):
            print('------------------')
            print(name)
            print(link)
            print(breed)
            print(location)
    
    except Exception as e:
        print(e)

'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
'NoneType' object has no attribute 'text'
------------------
Duncan
http://post.rescueme.org/20-02-20-00506
Poodle
Los Angeles County, Chatsworth, CA
------------------
Stella
http://post.rescueme.org/20-02-20-00504
Foxhound
Sacramento County, Sacramento, CA
------------------
MIKEY
http://post.rescueme.org/20-02-20-00490
Shih Tzu
Los Angeles County, Los Angeles, CA
------------------
MIA
http://post.rescueme.org/20-02-20-00487
Poodle
Los Angeles County, Los Angeles, CA
------------------
Smokey
http://post.rescueme.org/20-02-20-00446
Pit Bull
Shasta County, REdding, CA
------------------
Sable
http://post.rescueme.org/20-02-20-00434
Tibetan Mastiff
Santa Clara County, San Jose, CA
------------------
URGENT!!! BLACKY
http://post.rescueme.org/20-02-20-00431
German Shepherd
Riverside County, Norco, CA
------------------
UR

In [9]:
# URL of page to be scraped
url = "https://www.dogsblog.com/"
response = requests.get(url)
print(response.text)

<!DOCTYPE html>
<html class="no-js" lang="en-US" prefix="og: http://ogp.me/ns#">
<head>
	<meta charset="UTF-8">
	<!-- Always force latest IE rendering engine (even in intranet) & Chrome Frame -->
	<!--[if IE ]>
	<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
	<![endif]-->
	<link rel="profile" href="http://gmpg.org/xfn/11" />
	
		
		
		
					<meta name="viewport" content="width=device-width, initial-scale=1">
			<meta name="apple-mobile-web-app-capable" content="yes">
			<meta name="apple-mobile-web-app-status-bar-style" content="black">
		
			<link rel="pingback" href="https://www.dogsblog.com/xmlrpc.php" />
	<script type="text/javascript">document.documentElement.className = document.documentElement.className.replace( /\bno-js\b/,'js' );</script>
<!-- Search Engine Optimization by Rank Math - https://s.rankmath.com/home -->
<title>Dogs for Adoption - dogsblog.com</title>
<meta name="description" content="dogsblog.com lists rescue dogs available for adoption all over th

In [39]:
# URL of page to be scraped
url = "https://bestfriends.org/adopt/adopt-our-sanctuary/dogs"
browser.visit(url)

In [42]:
for x in range(1, 10):
    browser.is_element_visible_by_css(".load-more", wait_time=3)
    browser.click_link_by_partial_text('MORE ANIMALS')
    

ElementDoesNotExist: no elements could be found with link by partial text "MORE ANIMALS"

In [None]:
for x in range(1, 25):
    
    soup = BeautifulSoup(html, 'html.parser')

In [28]:
# URL of page to be scraped
url = "https://bestfriends.org/adopt/adopt-our-sanctuary/dogs"
response = requests.get(url)
print(response.text)

<!DOCTYPE html>
<!--[if lt IE 7]>
<html class="ie6 ie" lang="en"
      dir="ltr"> <![endif]-->
<!--[if IE 7]>
<html class="ie7 ie" lang="en"
      dir="ltr"> <![endif]-->
<!--[if IE 8]>
<html class="ie8 ie" lang="en"
      dir="ltr"> <![endif]-->
<!--[if gt IE 8]> <!-->
<html class="
not-ie" lang="en"
      dir="ltr"> <!--<![endif]-->
<head>
  <!--[if IE]><![endif]-->
<link rel="dns-prefetch" href="//maxcdn.bootstrapcdn.com" />
<link rel="dns-prefetch" href="//s3fs.bestfriends.org" />
<link rel="dns-prefetch" href="//dm76icm4097f8.cloudfront.net" />
<link rel="dns-prefetch" href="//ajax.googleapis.com" />
<link rel="dns-prefetch" href="//google_tag" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><script type="text/javascript">(window.NREUM||(NREUM={})).loader_config={licenseKey:"c72fbe0d5a",applicationID:"206872624"};window.NREUM||(NREUM={}),__nr_require=function(e,n,t){function r(t){if(!n[t]){var i=n[t]={exports:{}};e[t][0].call(i.exports,function(n){var i=e[t

In [29]:
soup = BeautifulSoup(response.text, 'html.parser')

In [30]:
results = soup.find_all('div', class_="rg-animal")
# print(results)
# names = soup.find_all('span', class_= '_cpn')

for result in results:
    # Error handling
    try:
        name = result.find('span', class_= 'animalName').text
        link = result.a['href']
        breed = result.find('span', class_= 'animalBreed').text
        age = result.find('span', class_= 'animalAge').text
        
        if (name and link and breed and age):
            print('------------------')
            print(name)
            print(link)
            print(breed)
            print(age)
    
    except Exception as e:
        print(e)

------------------
Joy
/adopt/adopt-our-sanctuary/9384898/joy
American Pit Bull Terrier / Chinese Shar-Pei / Mixed (short coat)
Senior
------------------
Tarlton
/adopt/adopt-our-sanctuary/9384877/tarlton
Mastiff / Labrador Retriever / Mixed (short coat)
Senior
------------------
Avon
/adopt/adopt-our-sanctuary/9384875/avon
Cattle Dog (short coat)
Senior
------------------
Shocky
/adopt/adopt-our-sanctuary/9384823/shocky
American Pit Bull Terrier (short coat)
Senior
------------------
Kenya
/adopt/adopt-our-sanctuary/9384967/kenya
Labrador Retriever (short coat)
Senior
------------------
Bliss
/adopt/adopt-our-sanctuary/9385093/bliss
American Pit Bull Terrier / Mixed (short coat)
Senior
------------------
Moogan
/adopt/adopt-our-sanctuary/9385337/moogan
American Pit Bull Terrier (short coat)
Adult
------------------
Chaplin
/adopt/adopt-our-sanctuary/9384952/chaplin
American Pit Bull Terrier (short coat)
Senior
------------------
Fred
/adopt/adopt-our-sanctuary/9385310/fred
Red Heeler 

<head><title>Not Acceptable!</title></head><body><h1>Not Acceptable!</h1><p>An appropriate representation of the requested resource could not be found on this server. This error was generated by Mod_Security.</p></body></html>
