In [1]:
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd

# OPEN BROWSER

In [2]:
# @NOTE: Replace the path with your actual path to the chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# NASA MARS NEWS

In [3]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)

for x in range(5):
    try:
        browser.click_link_by_partial_text('MORE')
    except:
        print("Scraping Complete")

html = browser.html
soup = bs(html,"html.parser")       
results = soup.find_all('li',class_='slide')
list_mars_news =[]

for result in results:
    # Error handling
    try:
        news_title = result.find('div',class_='content_title').text.lstrip().rstrip()
        news_p = result.find('div',class_='article_teaser_body').text.lstrip().rstrip()        
        dict_result ={
            "news_title" : news_title,
            "news_p" : news_p
        }
        list_mars_news.append(dict_result)
    except AttributeError as e:
        print(e)
        
dict_result

Scraping Complete
Scraping Complete
Scraping Complete
Scraping Complete
Scraping Complete


{'news_title': 'Landing Day for InSight',
 'news_p': "NASA's InSight spacecraft is on target for Mars landing at around noon PST today."}

# JPL MARS SPACE IMAGES - FEATURED IMAGE

In [4]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

html = browser.html
soup = bs(html,"html.parser")
results = "https://www.jpl.nasa.gov" + soup.find('div', class_='carousel_items').article.footer.a["data-fancybox-href"]

list_mars_images = []
list_mars_images.append(results)

list_mars_images

['https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16613_ip.jpg']

#  MARS WEATHER

In [5]:
url = "https://twitter.com/marswxreport?lang=en"
response = requests.get(url)
soup = bs(response.text,"html.parser")
#print(soup.prettify())

results = soup.find_all('div', class_='js-tweet-text-container')
# print(" ".join(results[0].p.text.split()[:-1]) + " hPa")
mars_weather = " ".join(results[0].p.text.split()[:-1]) + " hPa"

mars_weather

'InSight sol 156 (2019-05-05) low -99.2ºC (-146.6ºF) high -18.1ºC (-0.6ºF) winds from the SW at 4.7 m/s (10.5 mph) gusting to 13.8 m/s (30.8 mph) pressure at 7.40 hPa'

# MARS FACTS

In [6]:
url = "https://space-facts.com/mars/"
tables = pd.read_html(url)
html_table = []
for item, row in tables[0].iterrows():
    dict_xy = {
        "element": row.iloc[0],
        "value": row.iloc[1]
    }
    html_table.append(dict_xy)
    
html_table

[{'element': 'Equatorial Diameter:', 'value': '6,792 km'},
 {'element': 'Polar Diameter:', 'value': '6,752 km'},
 {'element': 'Mass:', 'value': '6.42 x 10^23 kg (10.7% Earth)'},
 {'element': 'Moons:', 'value': '2 (Phobos & Deimos)'},
 {'element': 'Orbit Distance:', 'value': '227,943,824 km (1.52 AU)'},
 {'element': 'Orbit Period:', 'value': '687 days (1.9 years)'},
 {'element': 'Surface Temperature:', 'value': '-153 to 20 °C'},
 {'element': 'First Record:', 'value': '2nd millennium BC'},
 {'element': 'Recorded By:', 'value': 'Egyptian astronomers'}]

# MARS HEMISPHERES

In [7]:
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

html = browser.html
soup = bs(html,"html.parser")
results = soup.find_all('div', class_='item')
hemisphere_image_urls = []
for result in results:
    try:
        x = result.find('div', class_='description')
        hemisphere = x.a.text

        browser.click_link_by_partial_text(hemisphere)        
        html_x = browser.html

        soup_x = bs(html_x,"html.parser")
        link = soup_x.find('div', class_='downloads').find('li').a["href"]
        title = soup_x.find('h2', class_='title').text
    
        hemisphere_image_urls_dict = {
            "title": title, 
            "img_url":link
        }
        hemisphere_image_urls.append(hemisphere_image_urls_dict) 
        browser.visit(url)
        
    except AttributeError as e:
        print('Error:',e)
        
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

# CLOSE BROWSER

In [8]:
browser.quit()

# CREATE A DICTIONARY WITH ALL THE DATA ACQUIRED

In [9]:
mars_dict = {
        "mars_news" : list_mars_news,
        "mars_images" : list_mars_images,
        "mars_weather" : mars_weather,
        "html_table" : html_table,
        "hemisphere_image_urls" : hemisphere_image_urls
}
    
mars_dict

{'mars_news': [{'news_title': 'For InSight, Dust Cleanings Will Yield New Science',
   'news_p': 'Wind can be crucial to clearing dust from spacecraft solar panels on Mars. With InSight\'s meteorological sensors, scientists get their first measurements of wind and dust interacting "live" on the Martian surface.'},
  {'news_title': 'InSight Captures Sunrise and Sunset on Mars',
   'news_p': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day."},
  {'news_title': 'NASA Social Media and Websites Win Webby Awards',
   'news_p': 'NASA\'s social media presence, the InSight mission social media accounts, NASA.gov and SolarSystem.NASA.gov will be honored at the 2019 Webby Awards - "the Oscars of the Internet."'},
  {'news_title': "NASA's InSight Detects First Likely 'Quake' on Mars",
   'news_p': 'While their causes are still unknown, one of three shaking events looks a lot like the quakes detected on the Moon b