In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import time

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}

In [3]:
#URLs to scrape
nasa_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
mars_weather_url = "https://twitter.com/marswxreport?lang=en"
mars_facts_url = "https://space-facts.com/mars/"
usgs_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

### Scraping NASA Mars News

In [4]:
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(nasa_url)
time.sleep(2)
html = browser.html
soup = bs(html, 'lxml')
# Grab the second title
news_title = soup.find_all('div', class_="content_title")[1].text
# Grab the article text
news_p = soup.find('div', class_='article_teaser_body').text
browser.quit()

print(f"{news_title}\n{news_p}")

NASA's Mars Rover Drivers Need Your Help
Using an online tool to label Martian terrain types, you can train an artificial intelligence algorithm that could improve the way engineers guide the Curiosity rover.


### Scraping JPL for Featured Mars Image

In [6]:
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(jpl_url)
browser.click_link_by_id('full_image')
time.sleep(2)
html = browser.html
soup = bs(html, 'lxml')
featured_image = soup.find('img', class_='fancybox-image').get('src')
featured_image_url = 'https://jpl.nasa.gov'+featured_image
browser.quit()

print(featured_image_url)

https://jpl.nasa.gov/spaceimages/images/mediumsize/PIA18886_ip.jpg


### Scraping Twitter for Mars Weather

In [7]:
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(mars_weather_url)
time.sleep(2)
html = browser.html
soup = bs(html, 'lxml')
weather_tweets = soup.find_all('div', class_='css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0')
current_weather = weather_tweets[1].find('span', class_='css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0').text
browser.quit()

print(f"Latest Mars Weather: {current_weather}")

Latest Mars Weather: InSight sol 549 (2020-06-12) low -91.8ºC (-133.3ºF) high -1.5ºC (29.3ºF)
winds from the SW at 4.5 m/s (10.1 mph) gusting to 18.2 m/s (40.6 mph)
pressure at 7.40 hPa


### Mars Facts

In [8]:
mars_tables = pd.read_html(mars_facts_url)
mars_facts = mars_tables[0].to_html(header=False, index=False)

print(mars_facts)

<table border="1" class="dataframe">
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [9]:
browser = Browser('chrome', **executable_path, headless=False)
browser.visit(usgs_url)
time.sleep(2)
html = browser.html
soup = bs(html, 'lxml')

hemisphere_image_urls = []
urls = []

#Visit main page and grab URLs for each hemisphere 
hemispheres = soup.find_all('div', class_='description')
for hemisphere in hemispheres:
    urls.append("https://astrogeology.usgs.gov"+hemisphere.a['href'])

#Visit pages for each hemisphere and grab title and full image URL
for url in urls:
    browser.visit(url)
    time.sleep(2)
    html_ = browser.html
    soup_ = bs(html_, 'lxml')
    title = soup_.find('h2', class_='title').text
    img_url = soup_.find('div', class_='downloads').ul.li.a['href']
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})

browser.quit()
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
