In [1]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pandas as pd

In [2]:
#path of webdriver
executable_path = {'executable_path': '../chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
# News url for scraping
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html=browser.html
soup=BeautifulSoup(html,'html.parser')

In [4]:
# Retrieve the latest news title
news_title=soup.find_all('div', class_='content_title')[1].text
print(f"Latest news title: {news_title}")

Latest news title: NASA's Perseverance Rover Gives High-Definition Panoramic View of Landing Site


In [5]:
# Retrive the latest new paragraph
news_p=soup.find_all('div', class_='article_teaser_body')[0].text
print(f"Latest news: {news_p}")

Latest news: A 360-degree panorama taken by the rover’s Mastcam-Z instrument will be discussed during a public video chat this Thursday.


### JPL Mars Space Images - Featured Image

In [6]:
# Image url for scraping
jpl_url="https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
browser.visit(jpl_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [7]:
# Retrieve the url of featured image
img_url=soup.find_all('img', class_='headerimage')[0]['src']
featured_image_url=jpl_url+img_url
featured_image_url

'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.htmlimage/featured/mars2.jpg'

### Mars Facts

In [8]:
# Mars facts table url for scraping
facts_url="https://space-facts.com/mars/"
browser.visit(facts_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [9]:
# Retrieve the mars fact table
tb=pd.read_html('https://space-facts.com/mars/')
facts_tb=tb[0]
facts_tb.rename(columns={0:'Description',1:'Mars'},inplace=True)
facts_tb.set_index('Description',inplace=True)
facts_tb

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
# Convert pandas table to html
facts_tb_html=facts_tb.to_html()
facts_tb_html.replace('\n','')
print(facts_tb_html)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [11]:
# Mars hemispheres url for scraping
base_url="https://astrogeology.usgs.gov"
search_url="/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/"
browser.visit(base_url+search_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [12]:
# Retrieve the urls of all hemispheres image link
search_list=[]
hemi_url=soup.find_all('div', class_='description')
search_list=[base_url+hemi.find('a')['href'] for hemi in hemi_url]
search_list

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [13]:
# Retrieve the urls of all full resolution image for the hemispheres image link list
hemisphere_image_urls=[]
for s_url in search_list:
    print(f"Extracting info from {s_url}")
    browser.visit(s_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    hemi_img_dct={}
    title=soup.find('h2',class_='title').text
    #print(title)
    img_url=soup.find_all('li')[1].a['href']
    #print(img_url)
    hemi_img_dct["title"]=title
    hemi_img_dct["img_url"]=img_url
    hemisphere_image_urls.append(hemi_img_dct)

print(hemisphere_image_urls)

Extracting info from https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
Extracting info from https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
Extracting info from https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
Extracting info from https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced
[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]


In [14]:
# Return a dictionary value
Mars_dict={
    "news_title":news_title,
    "news_p":news_p,
    "featured_image_url":featured_image_url,
    "facts_tb_html":facts_tb_html,
    "hemisphere_image_urls":hemisphere_image_urls
}

In [15]:
Mars_dict

{'news_title': "NASA's Perseverance Rover Gives High-Definition Panoramic View of Landing Site",
 'news_p': 'A 360-degree panorama taken by the rover’s Mastcam-Z instrument will be discussed during a public video chat this Thursday.',
 'featured_image_url': 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.htmlimage/featured/mars2.jpg',
 'facts_tb_html': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.3