In [19]:
# Dependencies
import json
import requests
import pandas as pd
from config import petfinder_api_key, petfinder_secret_key

In [34]:
# Request token for the Petfinder API
data = {
  'grant_type': 'client_credentials',
  'client_id': petfinder_api_key,
  'client_secret': petfinder_secret_key
}

token_response = requests.post('https://api.petfinder.com/v2/oauth2/token', data=data)
token_response_json = token_response.json()

print(json.dumps(token_response_json, indent = 4))

{
    "token_type": "Bearer",
    "expires_in": 3600,
    "access_token": "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImp0aSI6IjZmNGFiNThjNjM2MDQ5NDY2MmQyNWI1NzY3YjU4MzBjZTRiNzY3YTJlOWExZTVjNDg0NzY3ZTZhMGI2M2VlMmRmNjE5YjJmMTYwNzI2ZTg4In0.eyJhdWQiOiJIaGFpVjNWVkdZY3NJbjVwNXoxSWZvOVd3TWdIWXlERDBrYXdQYXFmYjhsd1k1dzhDZyIsImp0aSI6IjZmNGFiNThjNjM2MDQ5NDY2MmQyNWI1NzY3YjU4MzBjZTRiNzY3YTJlOWExZTVjNDg0NzY3ZTZhMGI2M2VlMmRmNjE5YjJmMTYwNzI2ZTg4IiwiaWF0IjoxNTgyMDgzMTkwLCJuYmYiOjE1ODIwODMxOTAsImV4cCI6MTU4MjA4Njc5MCwic3ViIjoiIiwic2NvcGVzIjpbXX0.a7OcEtaYCoIWb6Oh5e6t3GI2_x3cjrnQ2uj2HUCl2q6vJNTN36nNsuDtwRXF21xzhM5D1wMnlU33vhecsAm86hv5o5Z8uaVIOcSHxoR2_uRv8k6-1B__l5mvbdBhB9ekuAdybxNNmFPVxuwzk4b9TknMIhrmeGbwnUHMKKw4N4gmKMB-JTivcPsO03IOiW73WFW3VaTEOaN1rno72Kl6SWMD261PXArUW45BKulTxqIWb-KcrB2RGwJ9iSJRglTZ1DUhLQkuwOtWq5IrFW5pQbrpi-OLVGWbRtMdxQZDprb2QVBFkhHMjDPaTOhMaiqKevQ26ZDavBjAK411KoEvxA"
}


In [21]:
# Pull data from petfinder.com API 
headers = {'Authorization': f"Bearer {token_response_json['access_token']}"}
url = "https://api.petfinder.com/v2/animals?"
response = requests.get(url, headers=headers)
response_json = response.json()

print(json.dumps(response_json, indent = 4))

{
    "animals": [
        {
            "id": 47414548,
            "organization_id": "CA2420",
            "url": "https://www.petfinder.com/dog/grizz-47414548/ca/novato/bay-area-german-shepherd-rescue-ca2420/?referrer_id=96f6bcc2-4d0b-4971-9351-30a7a4f94a88",
            "type": "Dog",
            "species": "Dog",
            "breeds": {
                "primary": "German Shepherd Dog",
                "secondary": null,
                "mixed": false,
                "unknown": false
            },
            "colors": {
                "primary": "Yellow / Tan / Blond / Fawn",
                "secondary": "Black",
                "tertiary": null
            },
            "age": "Adult",
            "gender": "Male",
            "size": "Large",
            "coat": "Medium",
            "attributes": {
                "spayed_neutered": true,
                "house_trained": true,
                "declawed": null,
                "special_needs": true,
                "shots_c

In [None]:
# Extract out list of animal dictionaries (also known as records)
animal_records = response_json['animals']
animal_records

In [None]:
# Use Pandas to directly convert list of records to DataFrame
animals_df = pd.DataFrame(animal_records)
animals_df

In [None]:
# API has record limit for each pages and need to loop through multiple pages 
current_page_number = 0
num_pages_to_fetch = 10
is_first_page = False
all_pet_records = []

print('--- Data Processing Started! ---')

while current_page_number < num_pages_to_fetch:
    # Increment count
    current_page_number += 1
    print(f"Processing batch #{current_page_number}")
    
    # Perform API call
    # Configure search parameters
    params = {
        # Can add any more search parameters found at: https://www.petfinder.com/developers/v2/docs/#get-animals
        'limit': 100 # default is 20, increased to 100 items per page
    }
    
    # Pull data from petfinder.com API 
    url = f"https://api.petfinder.com/v2/animals?page={current_page_number}"
    response = requests.get(url, headers=headers, params=params)
    response_json = response.json()

    # Print out each API call's 'pagination' dictionary that describes if there is a next page, other info
    print(response_json['pagination'])
    
    # Pull out records and add them to our list (app_pet_records)
    animal_records = response_json['animals']
    all_pet_records += animal_records

print('--- Data Processing Completed! ---')

In [None]:
len(all_pet_records)

In [None]:
large_animals_df = pd.DataFrame(all_pet_records)

# Show number of rows/columns
large_animals_df.shape

In [None]:
# Look at first 5 records
large_animals_df.head(5)

In [None]:
list(large_animals_df.columns)

In [None]:
# Perform data cleaning by filtering and keeping necessary column only
petfinder_adoption = large_animals_df.loc[:, ["id",
                                             "organization_id",
                                             "url",
                                             "type",
                                             "breeds",
                                             "colors",
                                             "age",
                                             "gender",
                                             "name",
                                             "status",
                                             "contact",
                                             "_links"]]

petfinder_adoption.head()


In [None]:
# Export data frame to csv file
large_animals_df.to_csv(animal_data.csv, encoding='utf-8', index=False)

In [25]:
# Export and save json data to json file
with open('petfinder_data_v2.json', 'w') as outfile:
    json.dump(response_json, outfile)

In [44]:
# Put the json file to dataframe
# json_file = "./petfinder_data_v2.json"
petfinder_data_df = pd.read_json("./petfinder_data_v2.json")
petfinder_data_df.head()

ValueError: Mixing dicts with non-Series may lead to ambiguous ordering.

## Web Scraping 

In [35]:
# Dependencies
from bs4 import BeautifulSoup
import requests

In [40]:
# URL of page to be scraped
url = "http://dog.rescueme.org/California"

In [41]:
response = requests.get(url)
print(response.text)

<!doctype html><html class="no-js" lang="en"><head><meta charset="utf-8" /><meta http-equiv="x-ua-compatible" content="ie=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><TITLE>- California Dog Rescue - ADOPTIONS - Rescue Me!</TITLE><meta name="description" content="&#34;Click here to view Dogs in California for adoption. Individuals & rescue groups can post animals free.&#34; - &#9829; RESCUE ME! &#9829; &#1772;" /><meta property="og:title" content="&#9658; Dog Rescue &#9658; California"><link rel="shortcut icon" href="http://www.rescueme.org/favicon-rescueme.ico"><meta property="og:image" content="http://images.rescueme.org/pn/dog-fb.png"><meta property="og:url" content="http://dog.rescueme.org/California"><meta property="og:description" content="&#34;Click here to view Dogs for adoption, or post one in need.&#34; - &#9829; RESCUE ME! &#9829; &#1772;"><meta property="og:type" content="website"><meta property="fb:admins" content="714916561"><meta property="

In [42]:
soup = BeautifulSoup(response.text, 'html.parser')

In [43]:
print(soup.prettify())

<!DOCTYPE doctype html>
<html class="no-js" lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="ie=edge" http-equiv="x-ua-compatible"/>
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <title>
   - California Dog Rescue - ADOPTIONS - Rescue Me!
  </title>
  <meta content='"Click here to view Dogs in California for adoption. Individuals &amp; rescue groups can post animals free." - ♥ RESCUE ME! ♥ ۬' name="description">
   <meta content="► Dog Rescue ► California" property="og:title"/>
   <link href="http://www.rescueme.org/favicon-rescueme.ico" rel="shortcut icon"/>
   <meta content="http://images.rescueme.org/pn/dog-fb.png" property="og:image"/>
   <meta content="http://dog.rescueme.org/California" property="og:url"/>
   <meta content='"Click here to view Dogs for adoption, or post one in need." - ♥ RESCUE ME! ♥ ۬' property="og:description"/>
   <meta content="website" property="og:type"/>
   <meta content="714916561" property="fb:admins"/>
   <meta

In [45]:
names = soup.find_all('span', class_= '_cpn')

for name in names:
    print(name.text)

MAX
Groot,Milo Adopt Toget
Bravo
Pancho
Autumn
Milo
Captain
Toby
Angel
Chocomax
Aurora
Macie
Max
Sable
Molly
Brianna
Oliver
Leta
Charley
URGENT AFTER 2/25
LUCKY-URGENT 3/3
Grover
Portia
Naomi
Zuzu
Puppy
Fredo
Cuddles
Alice
Oreo
Cali
Copper

Walker
Sheba
Max
Buddy
mack
Ned Pepper
Sweet tea
Tank
Midnight
Frankie
Rhiannon
Honey biscuit
Roger
Velvet / Violet
Whiskey a go go
Storm
Fox
Doe
Badger
Taz
Hawk
Coon
Bear
URGENT AFTER 2/25
URGENT AFTER 2/25
Sonny 4 lbs
Henri
Bella
Buck
Cowboy
Shadow
Max
Copper
George
Axel
Kuhio
Tank
Princess
Milo
Klaus
Bruno
Princ Harry
Pippa
Spencer
Frankie
Milo
Pancho
Elizabeth
Roxy
Ally
MADDIE
COOPER
COWBOY
Arturo
Poldi
Smalls
Mia
Bruno
Zeus
URGENT AFTER 2/22
Captain & Clifford
Henry is a shy Doxie
Rocky
Benji
Phoebe
Vladimir
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
Sasha
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
URGENT AFTER 2/21
Chance
URGENT AFTE