## Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text.
### by Pam Zhao

In [66]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from splinter import Browser
from sqlalchemy import create_engine

### NASA Mars News

In [23]:
# Splinter to navigate the NASA Mars News sites
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# NASA Mars url
url = 'https://mars.nasa.gov/news'
browser.visit(url)

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(browser.html, 'html.parser')

# Get the first list which is the lastest news
item = soup.find('ul', class_="item_list")
current_slide = item.find('li')


In [24]:
# Save the latest news title and Paragraph text
news_title = current_slide.find('h3').text.strip()
news_p = current_slide.find('div', class_='article_teaser_body').text.strip()

print(news_title)
print(news_p)

NASA's MRO Completes 60,000 Trips Around Mars
The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


### JPL Mars Space Images - Featured Image

In [25]:
# Splinter to navigate the JPL Mars Space Images
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# JPL Mars Space Images url
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(browser.html, 'html.parser')

# Get the first image which is the current image of Mars
article = soup.find('ul', class_="articles")
current_slide = article.find('li')


In [26]:
# Get the current Mars image url
image = current_slide.find('div', class_='img')
featured_image_url = 'https://www.jpl.nasa.gov' + image.img['src']

print(featured_image_url )

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA23221-640x350.jpg


### Mars Weather

In [38]:
# Splinter to navigate the Mars Weather twitter account
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Mars Weather twitter account url
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Get the first image which is the current image of Mars
stream = soup.find('ol', class_="stream-items js-navigable-stream")
current_item = stream.find('li')


In [47]:
text_container = current_item.find('div', class_="js-tweet-text-container")
mars_weather = text_container.find('p').text.strip()
link = text_container.find('p').find('a').text.strip()
mars_weather = mars_weather.replace(link, '')

print (mars_weather)

InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF)
winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph)
pressure at 7.50 hPa


### Mars Facts

In [51]:
# Mars facts url 
url = "https://space-facts.com/mars/"

# Retrive page from the request module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Get the html table 
facts_table = soup.find('table', class_="tablepress tablepress-id-mars")

tds = facts_table.find_all('td')
for 


<table class="tablepress tablepress-id-mars" id="tablepress-mars">
<tbody>
<tr class="row-1 odd">
<td class="column-1"><strong>Equatorial Diameter:</strong></td><td class="column-2">6,792 km<br/>
</td>
</tr>
<tr class="row-2 even">
<td class="column-1"><strong>Polar Diameter:</strong></td><td class="column-2">6,752 km<br/>
</td>
</tr>
<tr class="row-3 odd">
<td class="column-1"><strong>Mass:</strong></td><td class="column-2">6.42 x 10^23 kg (10.7% Earth)</td>
</tr>
<tr class="row-4 even">
<td class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a href="https://space-facts.com/phobos/">Phobos</a> &amp; <a href="https://space-facts.com/deimos/">Deimos</a>)</td>
</tr>
<tr class="row-5 odd">
<td class="column-1"><strong>Orbit Distance:</strong></td><td class="column-2">227,943,824 km (1.52 AU)</td>
</tr>
<tr class="row-6 even">
<td class="column-1"><strong>Orbit Period:</strong></td><td class="column-2">687 days (1.9 years)<br/>
</td>
</tr>
<tr class="row-7 odd">
<td class

### Mars Hemispheres

In [52]:
# Splinter to navigate the Mars Weather twitter account
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# url of Mars hemisphere
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

items = soup.find_all('div', class_="item")

In [64]:
hemisphere_image_urls = []

for item in items:
    title = item.find('h3').text.strip()
    link = 'https://astrogeology.usgs.gov' + item.find('a')['href']
    browser.visit(link)
    hemisphere = BeautifulSoup(browser.html, 'html.parser')
    img_url = hemisphere.find('ul').find('li').find('a')['href']
    img_dict = {"title": title, "img_url": img_url}
    hemisphere_image_urls.append(dict(img_dict))
    

In [65]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
