In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import requests
import pymongo
import pandas as pd
from config import api_key

In [2]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.EndangeredAnimalDB
collection = db.species

In [4]:
# Read animal common names and convert it to Pandas DataFrame
name_df = pd.read_csv('../../data/animal_names.csv', encoding='UTF-8')

# Preview "name_df"
name_df.head()

Unnamed: 0,Common_Name,Other_Name,Sci_Name
0,African Elephant,,Loxodonta africana
1,African Wild Dog,,Lycaon pictus
2,Albacore Tuna,,Thunnus alalunga
3,Amazon River Dolphin,,Inia geoffrensis
4,Amur Leopard,,Panthera pardus


In [5]:
endangered_animals = name_df.Common_Name.tolist()

In [6]:
# Going to each species web page and scraping for image, and other information pertaining to status, population,
# scientific name, description and image
base_url = "https://www.worldwildlife.org/species"
for animal in endangered_animals:
    # Error handling
    try:
# Getting to each species URL    
        animal_url = animal.replace(" ","-") 
        url = base_url +"/"+ animal_url
        response = requests.get(url)
# Scraping for information for each species        
        soup = bs(response.text, 'html')
        overview_species = soup.find('div', id='overview')
        all_overview = overview_species.find_all('div', class_='container')
        species_status = all_overview[0].text
        species_population = all_overview[1].text
        species_scientific_name = all_overview[2].text
        species_description = soup.find('div', class_='wysiwyg lead').text
        image_path = soup.find('img',class_='centered')
        # Run only if title, price, and link are available
        if (all_overview and species_description and image_path):
            print(image_path)
            species_image = image_path['src']
        
# Creating a dictionary and putting it all the scraped information together        
            species_post = {
                'Species Name' : animal,
                'Status' : species_status,
                'Population' : species_population,
                'Scientific Name' : species_scientific_name,
                'Species Description' : species_description,
                'Species Image URL' : species_image
            }
# Inserting the record in the database collection    
            collection.insert_one(species_post)
    except Exception as e:
        print(e)

<img alt="African elephant browsing on bush" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/882/images/hero_small/African_Elephant_7.27.2012_hero_and_circle_HI_53941.jpg?1345532748"/>
<img alt="African wild dogs in Zamibia" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/10341/images/hero_small/wild_dogs-web.jpg?1447180370"/>
<img alt="tuna 102948248" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/1466/images/carousel_small/tuna_102948248.jpg?1345545852"/>
<img alt="Amazon River Dolphin (Pink Dolphin)" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/787/images/hero_small/Amazon_River_Dolphin_7.25.2012_Circle_and_Hero_XL_257657.jpg?1470325710"/>
<img alt="Amur Leopard" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/878/images/hero_small/amur-leopard_99144569.jpg?1345532564"/>
<img alt="Arctic Fox" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/540/images/hero_small/shutterstock_1038

list index out of range
<img alt="Polar Bear" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/2330/images/hero_small/polar-bear-hero.jpg?1345901694"/>
<img alt="Two pronghorn (Antilocapra americana) waking. Custer State Park, North Dakota, United States." class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/1764/images/hero_small/SCR_205311.jpg?1345552323"/>
<img alt="Red panda " class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/806/images/hero_small/SCR_47384.jpg?1345530917"/>
<img alt="Saola" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/156/images/hero_small/LG-Saola_Hero_image_%28c%29_David_Hulse_WWF_Canon.jpg?1345515547"/>
<img alt="Sea lion" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/560/images/hero_small/sea-lion-07162012-hero-shutterstock_90483844.jpg?1345525771"/>
<img alt="Whale" class="centered" src="https://c402277.ssl.cf1.rackcdn.com/photos/340/images/hero_small/HI_257682.jpg?1434400464"/>

In [7]:
# Display items in MongoDB collection
specie_listings = db.species.find()

for specie in specie_listings:
    print(specie)


{'_id': ObjectId('5d1d16bed99536d94cd29527'), 'Species Name': 'African Elephant', 'Status': '\nVulnerable\n', 'Population': '\nApproximately 415,000 in the wild\n', 'Scientific Name': '\nLoxodonta africana\n', 'Species Description': '\nThe African elephant is the largest animal walking the Earth. Their herds wander through 37 countries in Africa. They are easily recognized by their trunk that is used for communication and handling objects. And their large ears allow them to radiate excess heat. Upper incisor teeth develop into tusks in African elephants and grow throughout their lifetime. There are two subspecies of African elephants—the Savanna (or bush) elephant and the Forest elephant. Savanna elephants are larger than forest elephants, and their tusks curve outwards. In addition to being smaller, forest elephants are darker and their tusks are straighter and point downward. There are also differences in the size and shape of the skull and skeleton between the two subspecies.\nFores

In [8]:
# Search for articles that mention animals
articlecollection = db.articles

nytimesurl = "https://api.nytimes.com/svc/search/v2/articlesearch.json?"
 
for animals in endangered_animals:
    query =  "'" + animals + "'"

    # Build query URL

    query_url = nytimesurl + 'q='+query+'&api-key='+api_key+'&sort=newest'

# Request articles
    
    articles = requests.get(query_url).json()

    # Reset "articles_list" and "Urls"
    articles_list = []
    Urls = [] 

# The "response" property in articles contains the actual articles

# list comprehension.
    try:
        articles_list = [article for article in articles["response"]["docs"]]
    except(KeyError): 
        pass
    for article in articles_list:    
        Urls.append(article["web_url"])  
 
# Dictionary to be inserted as a MongoDB document

    articles_dict = {

        'endagered_animal': animals,

        'url': [Urls]

    }

    articlecollection.insert_one(articles_dict)