##### This Jupyter Notebook authored by James Khamthung

# Mission to Mars - Scraping

In [125]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [126]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [127]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News
##### Scrape the NASA Mars News Site https://mars.nasa.gov/news and collect the latest News Title and Paragraph Text. Assign the text to variables that can reference later.

In [130]:
# Parse HTML with Beautiful Soup
url = 'https://mars.nasa.gov/news'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [131]:
news_title = soup.find('div', class_='content_title').find('a').text
news_p = soup.find('div', class_='article_teaser_body').text
print(f"News Title: {news_title}\n")
print(f"Paragraph Text: {news_p}")

News Title: Catch NASA's JPL at the Clippers SciFest This Weekend

Paragraph Text: Students and families can meet a Mars rover, take a virtual tour through our solar system and explore alien worlds with NASA's Jet Propulsion Laboratory at the Clippers SciFest SoCal.


## JPL Mars Space Images - Featured Image
##### Visit the url for JPL Featured Space Image https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars. Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url. Find the image url to the full size .jpg image. Save a complete url string for this image.

In [132]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [133]:
imgurl = soup.find('article', class_='carousel_item').get('style')
imgurl

"background-image: url('/spaceimages/images/wallpaper/PIA17551-1920x1200.jpg');"

In [134]:
img_src = imgurl.split("'")[1]

In [135]:
featured_image_url=f"https://www.jpl.nasa.gov{img_src}"
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17551-1920x1200.jpg


## Mars Weather
##### Visit the Mars Weather twitter account https://twitter.com/marswxreport?lang=en and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

In [136]:
url="https://twitter.com/marswxreport?lang=en"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'lxml')
mars_weather =soup.find("p",class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text.split("https")[0]

print(mars_weather)

InSight has detected microseisms for the first time on another planet 


## Mars Facts
##### Visit the Mars Facts webpage https://space-facts.com/mars/ and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc. Use Pandas to convert the data to a HTML table string.

In [143]:
url="https://space-facts.com/mars/"
tables = pd.read_html(url)
df = tables[0]
df.columns = ['Description', 'Value']
df.set_index('Description',inplace=True)

df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [117]:
mars_html_table = df.to_html().replace('\n', '')
mars_html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [118]:
df.to_html('mars_table.html')
!open mars_table.html

## Mars Hemispheres
##### Visit the USGS Astrogeology site https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars to obtain high resolution images for each of Mar's hemispheres. Click each of the links to the hemispheres in order to find the image url to the full resolution image. Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title. Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [123]:
hemisphere_image_urls =[]

url="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
items=soup.find("div",class_="collapsible results").find_all('div',class_="item")

for item in items:
    title=item.find("img", class_="thumb").get("alt").split(" thumbnail")[0]
    browser.click_link_by_partial_text(title)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    img_url = soup.find("div", class_="downloads").find("ul").find("li").a.get("href")
    hemisphere_image_urls.append({"title":title, 
                                  "img_url":img_url })
    browser.back()
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]