In [1]:
# Import dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import time
import pandas as pd

## Setting up splinter

In [2]:
# For macOS users
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless = False)

In [4]:
# For Windows users
# executable_path = {'executable_path': 'chromedriver.exe'}
# browser = Browser('chrome', **executable_path, headless = False)

## NASA Mars News

In [5]:
# Seeting up a URL for scraping
NASA_url = 'https://mars.nasa.gov/news/'
browser.visit(NASA_url)
NASA_html = browser.html
NASA_soup = bs(NASA_html, 'html.parser')

In [6]:
# Storing the title and summary of the first article found
title = NASA_soup.find('div', class_ = 'content_title')
news_title = title.a.text.strip()

pp = NASA_soup.find('div', class_ = 'article_teaser_body')
news_p = pp.text.strip()

print(news_title)
print(news_p)

Robotic Toolkit Added to NASA's Mars 2020 Rover
The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover.


## JPL Mars Space Images - Featured Image

In [7]:
# Seeting up a URL for scraping
JPL_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(JPL_url)

In [8]:
# Preparation for splinter
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(3)
browser.click_link_by_partial_text('more info')

JPL_html = browser.html
JPL_soup = bs(JPL_html, 'html.parser')

In [9]:
# Storing the URL for the featured image
figure = JPL_soup.find('figure', class_ = 'lede')
href = figure.a['href']
base = 'https://www.jpl.nasa.gov'
featured_image_url = f'{base}{href}'

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19674_hires.jpg


## Mars Weather

In [10]:
# Using the request method as the splinter method keeps returning "None" instead..
tweet_url = 'https://mobile.twitter.com/marswxreport?lang=en'
response = requests.get(tweet_url)
tweet_soup = bs(response.text, 'lxml')

# print(tweet_soup.prettify())

In [11]:
tweet = tweet_soup.find('div', class_ = 'tweet-text')
weather = tweet.div.text.strip()

print(weather)

InSight sol 258 (2019-08-18) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.2ºF)
winds from the SSE at 5.3 m/s (11.9 mph) gusting to 16.8 m/s (37.6 mph)
pressure at 7.60 hPa pic.twitter.com/5nCVjcsmlZ


## Mars Facts

In [12]:
# Setting up a URL for scraping
facts_url = 'https://space-facts.com/mars/'
facts = pd.read_html(facts_url)
facts

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
# Using pandas for scraping
df = pd.DataFrame(facts[0])
df

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [14]:
# Cleaning the table for description
del df['Earth']
df.columns = ['Description', 'Value']
df = df.set_index('Description')
df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Diameter:,"6,779 km"
Mass:,6.39 × 10^23 kg
Moons:,2
Distance from Sun:,"227,943,824 km"
Length of Year:,687 Earth days
Temperature:,-153 to 20 °C


## Mars Hemispheres

In [15]:
# Setting up a URL for scraping
USGS_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(USGS_url)
USGS_html = browser.html
USGS_soup = bs(USGS_html, 'html.parser')

In [16]:
# Storing titles of images for scraping
titles = USGS_soup.find_all('h3')
titles_list = []

for title in titles:
    titles_list.append(title.text)
    
print(titles_list)

['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced', 'Valles Marineris Hemisphere Enhanced']


In [17]:
# Looping through articles for image and URL scraping
hemisphere_image_urls = []
for title in titles_list:
    browser.click_link_by_partial_text(title)
    images = {}
    images['title'] = title
    images['img_url'] = browser.find_by_text('Original')['href']
    hemisphere_image_urls.append(images)
    browser.back()
    time.sleep(1)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]