# Mission to Mars

# WebScraping
-------
## NASA Mars News
-------
- Get the latest  [NASA Mars News](https://mars.nasa.gov/news/) by scraping the website and collect the latest news title and paragragh text.

In [2]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd
import time 

In [3]:
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [4]:
mars_news_url = "https://mars.nasa.gov/news/"
browser.visit(mars_news_url)

In [5]:
html = browser.html
mars_news = bs(html, 'html.parser')

In [6]:
#Use prettify to analysze the html 
#print(mars_news.prettify())

In [7]:
# Inspect the above results, then determine element that contains sought info
# Get the first title by returning div element the class = content_title 

news_title = mars_news.find('div', class_='content_title').text

In [8]:
# After examing the results above, get the first paragraph text by returning the div element 
# with the class = rollover_description_inner.

news_p = mars_news.find('div', class_="rollover_description_inner").text

In [9]:
print("The latest Mars news is:",news_title)
print("The summary of this latest news is:",news_p)

The latest Mars news is: NASA's MRO Completes 60,000 Trips Around Mars
The summary of this latest news is: The orbiting spacecraft is also about to set a record for data relayed from the Martian surface.


## JPL Mars Space Images - Featured Image
------
- Visit the url for JPL's Featured Space [Image](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
- Use splinter to navigate the site and find the full size jpg image url for the current Featured Mars Image.
- Save a complete url string for this image

In [10]:
# While chromedriver is open go to JPL's Featured Space Image page. 
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

In [11]:
# Scrape the browser into soup and use soup to find the full resolution image of mars
# Save the image url to a variable called `featured_image_url`
html = browser.html
jpl_soup = bs(html, 'html.parser')
image_url = jpl_soup.find('a', {'id': 'full_image', 'data-fancybox-href': True}).get('data-fancybox-href')
image_url

'/spaceimages/images/mediumsize/PIA16567_ip.jpg'

In [12]:
# Get the base url from the href of the website 
jpl_logo_href = jpl_soup.find_all('div', class_='jpl_logo')
print(jpl_logo_href)

[<div class="jpl_logo">
<a href="//www.jpl.nasa.gov/" id="jpl_logo" title="Jet Propulsion Laboratory">Jet Propulsion Laboratory</a>
</div>, <div class="jpl_logo">
<a class="" href="" id="jpl_logo" title="">Jet Propulsion Laboratory</a>
</div>]


In [13]:
# Create BeautifulSoup object; parse with 'html.parser'
html_page = browser.html
JPL_soup = bs(html_page, "lxml")

In [14]:
# Get all the hrefs of the url
links = []
for link in JPL_soup.find_all('a'):
    links.append(link.get('href'))
 
print(links)

['http://www.nasa.gov', '//www.jpl.nasa.gov/', 'http://www.caltech.edu/', '#main', 'javascript:void(0);', 'http://www.nasa.gov', '', '', None, 'javascript:void(0);', '/about', '/about', '/about/exec.php', '/about/history.php', '/about/reports.php', '/contact_JPL.php', '/opportunities/', '/events', '/events', '/events/tours/views', '/events/lectures.php', '/events/speakers-bureau.php', '/events/team-competitions.php', '/events/special-events.php', '/edu/', '/edu/intern/', '/edu/learn/', '/edu/teach/', '/edu/news/', '/edu/events/', '/news', '/news', '/news/presskits.php', '/news/factsheets.php', '/news/mediainformation.php', 'http://blogs.jpl.nasa.gov', '/missions/', '/missions/?type=current', '/missions/?type=past', '/missions/?type=future', '/missions/?type=proposed', '/missions', '/spaceimages', '/spaceimages', '/videos', '/infographics', '/multimedia/audio.php', '/apps/', '/social', 'http://www.facebook.com/NASAJPL', '//twitter.com/NASAJPL', 'http://www.youtube.com/user/JPLnews?sub_c

In [15]:
# Retrieve the 2nd href in the list 
jpl_link = links[1].strip('/')
print(jpl_link)

www.jpl.nasa.gov


In [16]:
featured_image_url = "https://"+jpl_link+image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16567_ip.jpg


## Mars Weather 
------
- From the [Mars Weather twitter](https://twitter.com/marswxreport?lang=en) account scrape the latest Mars weather tweet from the page.
- Save the tweet text for the weather report.

In [17]:
# While chromedriver is open go to Mars weathe twitter page. 
twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(twitter_url)

In [18]:
html = browser.html
twitter_news = bs(html, 'html.parser')

In [19]:
weather = twitter_news.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')
weather

<p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Yes, Mars has clouds. Beautiful images from <a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="15473958" dir="ltr" href="/MarsCuriosity"><s>@</s><b>MarsCuriosity</b></a> Sol 2410 <a class="twitter-timeline-link" data-expanded-url="https://marsmobile.jpl.nasa.gov/msl/multimedia/raw/?s=2410&amp;camera=NAV_RIGHT_" dir="ltr" href="https://t.co/1g0bggEeUh" rel="nofollow noopener" target="_blank" title="https://marsmobile.jpl.nasa.gov/msl/multimedia/raw/?s=2410&amp;camera=NAV_RIGHT_"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">marsmobile.jpl.nasa.gov/msl/multimedia</span><span class="invisible">/raw/?s=2410&amp;camera=NAV_RIGHT_</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/3k0wBT6Qpv">

In [20]:
# Extract the unwanted a tag with the following class or similar class if a pic is attached to the tweet. 
#unwanted = weather.find('a', class_="twitter-timeline-link u-hidden")
#unwanted.extract()

In [21]:
mars_weather = weather.text.strip()
mars_weather

'Yes, Mars has clouds. Beautiful images from @MarsCuriosity Sol 2410 https://marsmobile.jpl.nasa.gov/msl/multimedia/raw/?s=2410&camera=NAV_RIGHT_\xa0…pic.twitter.com/3k0wBT6Qpv'

## Mars Facts
---- 
- Visit the [Mars Facts webpage](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
- Use Pandas to convert the data to a HTML table string.

In [22]:
#  While chromedriver is open go to Mars facts url using chrome from the excuteable command above. 
mars_facts_url = "https://space-facts.com/mars/"
browser.visit(mars_facts_url)

In [23]:
html = browser.html
mars_facts = bs(html, 'html.parser')

In [24]:
# Convert the url to a pandas df
mars_df = pd.read_html(mars_facts_url)
mars_facts_df = pd.DataFrame(mars_df[0])

In [25]:
# Define the columns and set the index.  
mars_facts_df.columns = ['Characteristic','Data']
mars_df_table = mars_facts_df.set_index("Characteristic")
mars_df_table

Unnamed: 0_level_0,Data
Characteristic,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [26]:
# Convert the pd df to HTML table and clean up. 
mars_html_table = mars_df_table.to_html(classes='marsdata')
mars_table = mars_html_table.replace('\n', ' ')
mars_table

'<table border="1" class="dataframe marsdata">   <thead>     <tr style="text-align: right;">       <th></th>       <th>Data</th>     </tr>     <tr>       <th>Characteristic</th>       <th></th>     </tr>   </thead>   <tbody>     <tr>       <th>Equatorial Diameter:</th>       <td>6,792 km</td>     </tr>     <tr>       <th>Polar Diameter:</th>       <td>6,752 km</td>     </tr>     <tr>       <th>Mass:</th>       <td>6.42 x 10^23 kg (10.7% Earth)</td>     </tr>     <tr>       <th>Moons:</th>       <td>2 (Phobos &amp; Deimos)</td>     </tr>     <tr>       <th>Orbit Distance:</th>       <td>227,943,824 km (1.52 AU)</td>     </tr>     <tr>       <th>Orbit Period:</th>       <td>687 days (1.9 years)</td>     </tr>     <tr>       <th>Surface Temperature:</th>       <td>-153 to 20 °C</td>     </tr>     <tr>       <th>First Record:</th>       <td>2nd millennium BC</td>     </tr>     <tr>       <th>Recorded By:</th>       <td>Egyptian astronomers</td>     </tr>   </tbody> </table>'

## Mars Hemispheres
----
- Visit the [USGS Astrogeology site](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.
- Get the image url to the full resolution image for each hemisphere.
- Save both the image url string as a Python dictionary for the full resolution hemipshere image, and the Hemisphere title containing the hemisphere name.
- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [27]:
#  While chromedriver is open go to USGS Astr ogeologyurl using chrome from the excuteable command above. 
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)

In [28]:
html = browser.html
mars_hemispheres = bs(html, 'html.parser')

In [29]:
#print(mars_hemispheres.prettify())

In [30]:
# Get the div element that holds the images. 
images = mars_hemispheres.find('div', class_='collapsible results')
#print(images.prettify())

In [31]:
#Loop through the class="item" by clicking the h3 tag and getting the title and url. 

hemispheres_image_urls = []

for i in range(len(images.find_all("div", class_="item"))):
    time.sleep(5)
    image = browser.find_by_tag('h3')
    image[i].click()
    html = browser.html
    soup = bs(html, 'html.parser')
    title = soup.find("h2", class_="title").text
    div = soup.find("div", class_="downloads")
    for li in div:
               link = div.find('a')
    url = link.attrs['href']
    hemispheres = {
            'title' : title,
            'img_url' : url
        }
    hemispheres_image_urls.append(hemispheres)
    browser.back()

In [32]:
hemispheres_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]