In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
import requests
import time

In [2]:
# Import Splinter and set the chromedriver path
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

## NASA Mars News

In [3]:
# To scrape the URL
url = "https://mars.nasa.gov/news/"

browser.visit(url)

# To scrape page create soup
html = browser.html
soup = bs(html, "html.parser")

# to avoid lag time
time.sleep(2)

In [4]:
url = "https://mars.nasa.gov/news/"
    
browser.visit(url)

time.sleep(2)
    
# Scrape page into Soup
html = browser.html
soup = bs(html, "html.parser")
    
# Get the latest news title and paragraph text
article = soup.find('div', class_='list_text')
news_title = article.find('a').text
news_p = article.find('div', class_='article_teaser_body').text

In [5]:
# To test if your code works print the first title
print(news_title)
print("--------------------------------------------------------------------")
print(news_p)

NASA's Perseverance Rover Is Midway to Mars 
--------------------------------------------------------------------
Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination.


## JPL Mars Space Images - Featured Image

In [7]:
# To set up url to scrape image
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Marsa"

# To grab image from url 
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Marsa'

browser.visit(featured_image_url)

time.sleep(2)

html = browser.html

images_soup = bs(html, 'html.parser')

In [8]:
# To retrieve the featured image link
relative_image_path = images_soup.find_all('img')[3]["src"]
featured_image_url = jpl_url + relative_image_path
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/?search=&category=Marsa/spaceimages/images/wallpaper/PIA24232-640x350.jpg


## Mars Facts

In [9]:
# To set up url to scrape mars facts
mars_url = "https://space-facts.com/mars/"

browser.visit(mars_url)

time.sleep(2)

# To read html using pandas
mars_html = browser.html
tables = pd.read_html(mars_html)

mars_soup = bs(mars_html, 'html.parser')
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [10]:
# To show in dataframe table
df = pd.DataFrame(tables[2])

# To rename columns
df.columns = ["Description", "Value"]

df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
# To reindex columns
df = df.set_index("Description")
df.head()

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"


In [12]:
# To scrape the table with the facts about mars to include: diameter, mass, etc.
table_html = df.to_html(index=False, border=1, header=False,
                       classes=["table","table-responsive","table-striped"],
                       justify='center')
table_html

'<table border="1" class="dataframe table table-responsive table-striped">\n  <tbody>\n    <tr>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [None]:
# To clean data output by removing \n
table_html.replace('\n', '')

In [None]:
print(table_html)

## Mars Hemispheres 

In [None]:
# To set up url to scrape hemisphere name and image
hem_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
usgs_url = 'https://astrogeology.usgs.gov'
browser.visit(hem_url)

time.sleep(2)

hem_html = browser.html

hem_soup = bs(hem_html, 'html.parser')

In [None]:
# To find hemisphere data in page
total_data = hem_soup.find('div', class_='collapsible results')
mars_hemispheres = total_data.find_all('div', class_='item')

# To initialize a list
hemisphere_image_urls = []

# To find the titles and image urls for the Hemisphere iterate through each hemisphere's data
for i in mars_hemispheres:
    # To collect the titles
    hemisphere = i.find('div', class_="description")
    title = hemisphere.h3.text
    
    # To collect image links by browsing to hemisphere page
    hemisphere_link = hemisphere.a["href"]    
    browser.visit(usgs_url + hemisphere_link)
    
    image_html = browser.html
    image_soup = bs(image_html, 'html.parser')
    
    image_link = image_soup.find('div', class_='downloads')
    image_url = image_link.find('li').a['href']

    # To create dictionary to store title and url info
    image_dict = {}
    image_dict['title'] = title
    image_dict['img_url'] = image_url
    
    hemisphere_image_urls.append(image_dict)

print(hemisphere_image_urls)

In [None]:
# To put data into dictionary
mars_dict = {
        "news_title": news_title,
        "news_p": news_p,
        "featured_image_url": featured_image_url,
        "fact_table": str(table_html),
        "hemisphere_images": hemisphere_image_urls
    }
mars_dict