In [1]:
# Import dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import splinter
from splinter import Browser
from selenium import webdriver

In [2]:
# load chrome webdriver
url = "https://mars.nasa.gov/news/"
driver = webdriver.Chrome()
driver.get(url)

In [3]:
# load page using beautiful soup
soup = bs(driver.page_source,"lxml")

# Collect the latest News Title
content_title = soup.find_all('div', class_='content_title')
news_title = content_title[1].text

# Collect Paragraph Text
paragraph = soup.find_all('div', class_='article_teaser_body')
news_p = paragraph[0].text

print(f"The lastest news title is: {news_title}.")
print(f"The lastest news is: {news_p}")

The lastest news title is: NASA's Perseverance Rover Is Midway to Mars .
The lastest news is: Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination.


In [4]:
# import image url
image_url="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# load chrome webdriver
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(image_url)
html = browser.html

# use beautiful soup to parse html
soup = bs(html, 'html.parser')

In [5]:
# find the url for the first image
images = soup.find_all('div', class_='img')
featured_image_url = "https://www.jpl.nasa.gov" + images[0].img['src']
print(f"The first image link is: {featured_image_url}")

The first image link is: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA24189-640x350.jpg


In [6]:
# import mars fact url
mar_facts_url = "https://space-facts.com/mars/"

# use panda to read html
tables = pd.read_html(mar_facts_url)

# return html table string
html_result= tables[0].to_html()

# print the table string
print(html_result)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>0</th>
      <th>1</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td>Recorded

In [7]:
# load Mars Hemispheres URL
mar_hemi_url= "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# load chrome driver
driver = webdriver.Chrome()
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
driver.get(mar_hemi_url)

# use beautiful soup to load the page
soup = bs(driver.page_source,"lxml")

In [11]:
# create image url array
images_urls = []

with Browser('chrome', **executable_path) as browser:
    for image_title in [h3.text for h3 in soup.find_all('h3')]:
        img = {}
        # visit the hemisphere url
        browser.visit(mar_hemi_url)
        # click on each image link
        browser.click_link_by_partial_text(image_title)
        # find the image by "Sample"
        itag = browser.find_by_text('Sample')
        # import in the img dict
        img['title'] = image_title
        img['url'] = itag['href']
        # append in images_urls array
        images_urls.append(img)
        # print the link
        print(itag['href'])


https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [16]:
# print images_urls
import pprint
pp = pprint.PrettyPrinter(indent=1)
pp.pprint(images_urls)

[{'title': 'Cerberus Hemisphere Enhanced',
  'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
