In [1]:
import pandas as pd #to set Mars facts into a db

from bs4 import BeautifulSoup as bs # "You didn't write that awful page. You're just trying to get some data out of it."
#--------------------------------------Used to navigate/pull data out of html and xml files

from splinter import Browser #abstraction layer on top of selenium

from selenium import webdriver #The selenium.webdriver module provides all the WebDriver implementations.
#-------------------------------Currently supported WebDriver implementations are Firefox, Chrome, IE and Remote.

import time #for browser delays

In [2]:
#instatiate the Browser class, should open up a testable browser window
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## scrape the NASA news site

In [3]:
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)

In [4]:
#use the html attribute to get the html content of the visited page:
html = browser.html

#make the soup using the built-in python parser
soup = bs(html,'html.parser')

#assign the scraped data into variables
news_title = soup.find('div',class_='list_text').find('div', class_='content_title').find('a').text
print(news_title)

AI Is Helping Scientists Discover Fresh Craters on Mars


In [5]:
#assign the scraped data into variables
news_teaser = soup.find('div',class_='list_text').find('div', class_='article_teaser_body').text
print(news_teaser)

It's the first time machine learning has been used to find previously unknown craters on the Red Planet.


## Scrape the featured image

In [6]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [7]:
#Make sure to find the image url to the full size .jpg image
browser.find_by_id('full_image').click()

In [8]:
#Make sure to save a complete url string for this image.
browser.is_element_present_by_text('more info')

#pause to allow for the user to 'see' something happening
time.sleep(1)

browser.links.find_by_partial_text('more info').click()

In [9]:
#use the html attribute to get the html content of the visited page:
html = browser.html

#make the soup using the built-in python parser
soup = bs(html,'html.parser')

#assign the scraped url
featured_img_url = soup.select_one('figure.lede a img').get("src")
print(featured_img_url)

/spaceimages/images/largesize/PIA22893_hires.jpg


## Scrape Facts

In [10]:
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [11]:
# pandas... pandas, pandas, pandas!
facts_df = pd.read_html(url)
facts_df

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [12]:
facts = facts_df[0]
facts.set_index(0, inplace=True)
facts

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [13]:
#rearrange
facts = facts.transpose()
facts

Unnamed: 0,Equatorial Diameter:,Polar Diameter:,Mass:,Moons:,Orbit Distance:,Orbit Period:,Surface Temperature:,First Record:,Recorded By:
1,"6,792 km","6,752 km",6.39 × 10^23 kg (0.11 Earths),2 (Phobos & Deimos),"227,943,824 km (1.38 AU)",687 days (1.9 years),-87 to -5 °C,2nd millennium BC,Egyptian astronomers


In [14]:
#render df to html
facts_table = facts.to_html()
facts_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Equatorial Diameter:</th>\n      <th>Polar Diameter:</th>\n      <th>Mass:</th>\n      <th>Moons:</th>\n      <th>Orbit Distance:</th>\n      <th>Orbit Period:</th>\n      <th>Surface Temperature:</th>\n      <th>First Record:</th>\n      <th>Recorded By:</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>6,792 km</td>\n      <td>6,752 km</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n      <td>227,943,824 km (1.38 AU)</td>\n      <td>687 days (1.9 years)</td>\n      <td>-87 to -5 °C</td>\n      <td>2nd millennium BC</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## Scrape the hemispheres

In [15]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [16]:
#use the html attribute to get the html content of the visited page:
html = browser.html

#make the soup using the built-in python parser
soup = bs(html,'html.parser')


links = soup.find('div', class_='item')

#create an empty list to store the links to each hemisphere and each title
title = []
img_urls = []

#create a loop to select each link
for link in links:
    url = "https://astrogeology.usgs.gov" + links.find('a', class_='itemLink product-item')['href']
    browser.visit(url)
    #use the html attribute to get the html content of the visited page:
    html = browser.html
    #make the soup using the built-in python parser
    soup = bs(html,'html.parser')
    results = soup.find('div', class_='container')
    
    #append the 'title' list
    title.append(results.find('h2',class_ = 'title').text)
    #append the url
    img_urls.append("https://astrogeology.usgs.gov" + results.find('img', class_="wide-image")['src'])

In [17]:
hemisphere_urls = pd.DataFrame({
    "title":title,
    "url":img_urls
})

In [18]:
hemisphere_urls

Unnamed: 0,title,url
0,Cerberus Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/f5e...
1,Cerberus Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/f5e...
2,Cerberus Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/f5e...
3,Cerberus Hemisphere Enhanced,https://astrogeology.usgs.gov/cache/images/f5e...
