In [32]:
from bs4 import BeautifulSoup
import requests
import json
import boto3

In [33]:
r = requests.get('https://humaneanimalrescue.org/adopt/?_type=dog')
soup = BeautifulSoup(r.text, 'html.parser')

print(soup.prettify())

<!DOCTYPE html>
<html class="avada-html-layout-wide avada-html-header-position-top" lang="en-US" prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb#">
 <head>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots"/>
  <!-- This site is optimized with the Yoast SEO plugin v19.0 - https://yoast.com/wordpress/plugins/seo/ -->
  <title>
   Adopt - Humane Animal Rescue
  </title>
  <meta content="Humane Animal Rescue is one of the largest animal welfare organizations in Pennsylvania, providing services to tens of thousands of animals each year at our two shelters and wildlife rehabilitation center." name="description"/>
  <link href="https://humaneanimalrescue.org/adopt/" rel="canonical"/>
  <meta content="en_US" property="og:locale"/>
  

In [34]:
dog = requests.get('https://humaneanimalrescue.org/animals/agatha-92133/')
soup = BeautifulSoup(dog.text, 'html.parser')

print(soup.prettify())

<!DOCTYPE html>
<html class="avada-html-layout-wide avada-html-header-position-top" lang="en-US" prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb#">
 <head>
  <meta content="IE=edge" http-equiv="X-UA-Compatible"/>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <meta content="index, follow, max-image-preview:large, max-snippet:-1, max-video-preview:-1" name="robots"/>
  <!-- This site is optimized with the Yoast SEO plugin v19.0 - https://yoast.com/wordpress/plugins/seo/ -->
  <title>
   Agatha - Humane Animal Rescue
  </title>
  <link href="https://humaneanimalrescue.org/animals/agatha-92133/" rel="canonical"/>
  <meta content="en_US" property="og:locale"/>
  <meta content="article" property="og:type"/>
  <meta content="Agatha - Humane Animal Rescue" property="og:title"/>
  <meta content="https://humaneanimalrescue.org/animals/agatha-92133/" property="og:url"/>
  <meta content="Humane

In [35]:
def list_dogs(soup: BeautifulSoup) -> list:
    """
    Get a list of dogs from the main webpage.
    :param soup: BeautifulSoup object
    :return: list of dogs
    """
    results = soup.find_all('a', attrs={'class': 'wpgb-card-layer-link'})
    dogs = []
    dog_pages = [result.attrs['href'] for result in results]
    for dog_page in dog_pages:
        dog_name, dog_id = dog_page.split('/')[-2].split('-')
        entry = {
            'id': dog_id,
            'name': dog_name,
            'url': dog_page
        }
        dogs.append(entry)
    return dogs
url = 'https://humaneanimalrescue.org/adopt/?_type=dog'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
current_dogs = list_dogs(soup)

In [36]:
with open('dogs.json', 'w') as f:
    json.dump(current_dogs, f, indent=4)

In [37]:
def get_dog_details(soup: BeautifulSoup) -> dict:
    """
    Get details of a dog from the dog's webpage.
    :param soup: BeautifulSoup object
    :return: dict of dog details
    """
    dog_details = {}

    details = soup.find('div', attrs={'class': 'animal-copy'}).contents
    pics = soup.find('div', attrs={'class': 'animal-photos'})('img')
    
    translator = str.maketrans({chr(10): '', chr(9): ''})  # Remove \n and \t
    
    dog_details['headshot'] = pics[0].attrs['src'].strip()
    dog_details['name'] = details[1].text
    dog_details['breed'] = details[3].text
    dog_details['sex'] = details[5].text
    dog_details['age'] = details[7].text
    dog_details['weight'] = details[9].text
    dog_details['description'] = details[15].text.translate(translator)
    dog_details['pictures'] = [pic.attrs['src'].strip() for pic in pics[2:]]
    return dog_details

url = 'https://humaneanimalrescue.org/animals/agatha-92133/'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
dog1 = get_dog_details(soup)
url = 'https://humaneanimalrescue.org/animals/augustus-92932/'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
dog2 = get_dog_details(soup)
dogs = {'dogs': [dog1, dog2]}

In [38]:
ses = boto3.client('ses')

In [39]:
dog_text_part = """
{{#dogs}}
{{headshot}}
{{name}}
{{breed}}
{{sex}}
{{age}}
{{weight}}
{{description}}
{{#pictures}}
{{this}}
{{/pictures}}
{{/dogs}}
"""

dog_html_part = """
<head>
    <style>
        img {
            max-height:300px;
            max-width:300px;
            height:auto;
            width:auto; 
        }
    </style>
</head>
{{#dogs}}
    <img src="{{headshot}}" alt="{{name}}'s headshot">
    <h1>{{name}}</h1>
    <ul>
        <li>{{breed}}</li>
        <li>{{sex}}</li>
        <li>{{age}}</li>
        <li>{{weight}}</li>
    </ul>
    <p>{{description}}</p>
    {{#pictures}}
        <img src="{{this}}" alt="A picture of {{name}}">
    {{/pictures}}
    <hr>
{{/dogs}}
"""

dog_template = {
    'TemplateName': 'dog_template',
    'SubjectPart': 'There are new dogs for you to view!',
    'TextPart': dog_text_part,
    'HtmlPart': dog_html_part,
}
r = ses.update_template(Template=dog_template)

In [40]:
source = 'Rob Mitchell <rob.mitchellzone@gmail.com>'
destination = 'rob.mitchellzone@gmail.com'
template_send_args = {
    'Source': source,
    'Destination': {'ToAddresses': [destination]},
    'Template': 'dog_template',
    'TemplateData': json.dumps(dogs),
    'ConfigurationSetName': 'email_failures_to_rob'
}
ses.send_templated_email(**template_send_args)

{'MessageId': '010f01813045c966-7ca86097-6144-4d4f-803e-1e2be63a4884-000000',
 'ResponseMetadata': {'RequestId': '0b6d5d6f-e5dd-4864-b424-febf8423f875',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'date': 'Sat, 04 Jun 2022 19:51:28 GMT',
   'content-type': 'text/xml',
   'content-length': '362',
   'connection': 'keep-alive',
   'x-amzn-requestid': '0b6d5d6f-e5dd-4864-b424-febf8423f875'},
  'RetryAttempts': 0}}