# Step 1 - Scraping

## NASA Mars News

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
from splinter import Browser
import cssutils

# Use splinter to navigate site for Mars News
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = "https://mars.nasa.gov/news/"
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")
article = soup.find("div", class_='list_text')
news_title = article.find("div", class_="content_title").text
news_p = article.find("div", class_ ="article_teaser_body").text
print(news_title)
print(news_p)

NASA InSight Lander 'Hears' Martian Winds 
Vibrations picked up by two spacecraft instruments have provided the first sounds of Martian wind.


## JPL Mars Space Images - Featured Image

In [4]:
# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')
    
# Locate featured image link and save
div_style = soup.find('article', class_="carousel_item")['style']
style = cssutils.parseStyle(div_style)
partial_image_url = style['background-image']
partial_image_url = partial_image_url.replace('url(', '').replace(')', '') 
featured_image_url = 'https://www.jpl.nasa.gov' + partial_image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18433-1920x1200.jpg'

## Mars Weather

In [5]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')
    
# Scrape the latest weather tweet and save
results = soup.find_all('p', class_='tweet-text')
mars_weather = results[0].text
mars_weather

'Sol 2251 (2018-12-05), high -15C/5F, low -74C/-101F, pressure at 8.47 hPa, daylight 06:34-18:48'

## Mars Facts

In [6]:
# URL of page to be scraped
url = 'http://space-facts.com/mars/'

# Use pandas to read in table from URL, convert to dataframe, update column names, and reset index
tables = pd.read_html(url)
df = tables[0]
df.columns = ['description','value']
df.set_index('description', inplace=True)

# Convert dataframe to html
mars_facts_table = df.to_html()
mars_facts_table = mars_facts_table.replace('\n', '')
mars_facts_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th>description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [12]:
# Use splinter to navigate site for Mars Hemisphere Images
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars)"
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

# Variable to house dictionary
hemisphere_image_urls = []

# Find the 4 hemisphere results
results = soup.find("div", class_ = "result-list" )
hemispheres = results.find_all("div", class_="item")

# Go through each hemisphere result to pull the title & link
for hemisphere in hemispheres:
    title = hemisphere.find("h3").text
    title = title.replace(" Enhanced", "")
    partial_imagepage_url = hemisphere.find("a")["href"]
    imagepage_url = "https://astrogeology.usgs.gov/" + partial_imagepage_url   
    browser.visit(imagepage_url)
    html = browser.html
    soup=BeautifulSoup(html, "html.parser")
    download_links = soup.find("div", class_="downloads")
    image_url = download_links.find("a")["href"]
    hemisphere_image_urls.append({"title": title, "img_url": image_url})
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]