In [1]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd

ModuleNotFoundError: No module named 'splinter'

In [2]:
# Use Chrome to scrape the following url
executable_path = {'executable_path': '../chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Url for species directory in www.worldwildlife.org
url= 'https://www.worldwildlife.org/species/directory'

# Navigate to url
browser.visit(url)

In [4]:
# List to store common and scientific names for each row of table
cols = []

# Loop through two pages
for x in range(2):
    # Parse HTML with BeautifulSoup
    soup = BeautifulSoup(browser.html, 'html.parser')
    
    # Find the table
    table = soup.find('table', class_='lead')
    
    # Retrieve all rows from the table
    rows = table.tbody.find_all('tr')
    
    # Loop through each row to scrape column data of interest and append to "cols" list
    for row in rows:
        col_queries = row.find_all('td')
        col = [col_queries[i].text.strip() for i in range(2)]
        cols.append(col)
    
    try:
        # Navigate to the second page by "click" the "2" button on website
        browser.click_link_by_partial_text('2')
    
    except:
        print('Scraping Complete')
        
# Check the length of "cols"        
len(cols)

Scraping Complete


100

In [5]:
# Setup a Pandas DataFrame to store column data from "cols"
name_df = pd.DataFrame(columns=['Common_Name', 'Sci_Name'])

name_df['Common_Name'] = [cols[i][0] for i in range(len(cols))]
name_df['Sci_Name'] = [cols[i][1] for i in range(len(cols))]

# Preview "name_df"
name_df.head()

Unnamed: 0,Common_Name,Sci_Name
0,African Elephant,Loxodonta africana
1,African Wild Dog,Lycaon pictus
2,Albacore Tuna,Thunnus alalunga
3,Amazon River Dolphin,Scientific Name Inia geoffrensis
4,Amur Leopard,Panthera pardus orientalis


In [6]:
# Close the browser
browser.quit()

In [7]:
# Read animal species data
animal_df = pd.read_csv("../../data/animal_list_original.csv", encoding="UTF-8")

# Preview "animal_df"
animal_df.head()

Unnamed: 0,Common_Name,Other_Name,Full_url
0,African Elephant,,https://www.iucnredlist.org/species/12392/3339343
1,African Wild Dog,,https://www.iucnredlist.org/species/12436/1671...
2,Albacore Tuna,,https://www.iucnredlist.org/species/21856/9325450
3,Amazon River Dolphin,,https://www.iucnredlist.org/species/10831/5035...
4,Amur Leopard,,https://www.iucnredlist.org/species/15954/1024...


In [8]:
# Delete "Full_url" column from "animal_df"
del animal_df['Full_url']

# Preview "animal_df"
animal_df.head()

Unnamed: 0,Common_Name,Other_Name
0,African Elephant,
1,African Wild Dog,
2,Albacore Tuna,
3,Amazon River Dolphin,
4,Amur Leopard,


In [9]:
# Merge "animal_df" with "name_df" on "Common_Name" column and save results as "animal_name_df"
animal_name_df = animal_df.merge(name_df, on='Common_Name', how='left')

# Check length of "animal_name_df"
len(animal_name_df)

69

In [10]:
# Set maximum displayed rows to be 70 to double check "Scientific Names"
pd.options.display.max_rows = 70

# View "animal_name_df"
animal_name_df

Unnamed: 0,Common_Name,Other_Name,Sci_Name
0,African Elephant,,Loxodonta africana
1,African Wild Dog,,Lycaon pictus
2,Albacore Tuna,,Thunnus alalunga
3,Amazon River Dolphin,,Scientific Name Inia geoffrensis
4,Amur Leopard,,Panthera pardus orientalis
5,Arctic Fox,,Vulpes lagopus
6,Arctic Wolf,Grey Wolf,Canis lupus arctos
7,Asian Elephant,,Elephas maximus indicus
8,Beluga,Beluga Whale,Delphinapterus leucas
9,Bigeye Tuna,,Thunnus obesus


In [11]:
# Make changes in "Sci_Name" column wherever are needed
animal_name_df['Sci_Name'][3] = 'Inia geoffrensis'
animal_name_df['Sci_Name'][4] = 'Panthera pardus'
animal_name_df['Sci_Name'][6] = 'Canis lupus'
animal_name_df['Sci_Name'][7] = 'Elephas maximus'
animal_name_df['Sci_Name'][22] = 'Gorilla beringei'
animal_name_df['Sci_Name'][24] = 'Spheniscus mendiculus'
animal_name_df['Sci_Name'][25] = 'Platanista gangetica'
animal_name_df['Sci_Name'][47] = 'Bison bison'
animal_name_df['Sci_Name'][48] = 'Dendrobates leucomelas'
animal_name_df['Sci_Name'][50] = 'Antilocapra americana'
animal_name_df['Sci_Name'][67] = 'Neophocaena asiaeorientalis'

# View "animal_name_df"
animal_name_df

Unnamed: 0,Common_Name,Other_Name,Sci_Name
0,African Elephant,,Loxodonta africana
1,African Wild Dog,,Lycaon pictus
2,Albacore Tuna,,Thunnus alalunga
3,Amazon River Dolphin,,Inia geoffrensis
4,Amur Leopard,,Panthera pardus
5,Arctic Fox,,Vulpes lagopus
6,Arctic Wolf,Grey Wolf,Canis lupus
7,Asian Elephant,,Elephas maximus
8,Beluga,Beluga Whale,Delphinapterus leucas
9,Bigeye Tuna,,Thunnus obesus


In [12]:
# Save df as csv file
animal_name_df.to_csv("../../data/animal_names.csv", index=False, header=True)