# Mission to Mars

In this challenge, we are going to scrape the Mars News Site and collect the latest News Title and Paragraph text.

In [1]:
# dependencies first
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
# using splinter to visit the site
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# set url of page to scrape and visit
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# set html with what we grab from the site's html
html = browser.html

In [5]:
# create beautifulSoup object
soup = bs(html, 'html.parser')

In [6]:
# we want the news article title and paragraph
# title = div class list_text > div class content_title
# paragraph = div class list_text > div class article_teaser_body
# create list of dictionaries at the same time

searchResults = []

results = soup.find_all('div', class_='list_text')
for result in results:
    title = result.find(class_='content_title').text
    paragraph = result.find(class_='article_teaser_body').text
    
    #create a dictionary to append to our searchResults list
    content={'title':title,'paragraph':paragraph}    
    searchResults.append(content)
    
    #print content to screen as well
    print('-------')
    print(title)
    print(paragraph)

-------
Media Get a Close-Up of NASA's Mars 2020 Rover
The clean room at NASA's Jet Propulsion Laboratory was open to the media to see NASA's next Mars explorer before it leaves for Florida in preparation for a summertime launch.
-------
NASA InSight's 'Mole' Is Out of Sight
Now that the heat probe is just below the Martian surface, InSight's arm will scoop some additional soil on top to help it keep digging so it can take Mars' temperature.
-------
Heat and Dust Help Launch Martian Water Into Space, Scientists Find
Scientists using an instrument aboard NASA’s Mars Atmosphere and Volatile EvolutioN, or MAVEN, spacecraft have discovered that water vapor near the surface of the Red Planet is lofted higher into the atmosphere than anyone expected was possible. 
-------
Scientists Explore Outback as Testbed for Mars 
Australia provides a great place for NASA's Mars 2020 and the ESA-Roscosmos ExoMars scientists to hone techniques in preparation for searching for signs ancient life on Mars.


# JPL Mars Space Images - Featured Image

In [7]:
url='https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
# navigate to featured image using the FULL IMAGE button
browser.links.find_by_partial_text('FULL IMAGE').click()

In [9]:
# image is kept in img class headerimage fade-in and jpg is kept in src

image = browser.find_by_css('img[class="headerimage fade-in"]')
featured_image_url = image['src']
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


# Mars Facts

In [10]:
url='https://galaxyfacts-mars.com/'
browser.visit(url)

In [11]:
# from the instructions, we're interested in the table containing facts
# about the planet including Diameter, Mass, etc.
# I'm interpretting this as the table found in the diagram about halfway
# down the page, rather than what's in the sidebar
# in this case, the class is 'table', and not 'table table-striped'
marsFacts = pd.read_html(url, attrs = {'class':'table'})

In [12]:
marsFacts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C]

In [13]:
# transform list into df and clean it up
marsDF = pd.DataFrame(marsFacts[0])
marsClean = marsDF.transpose()
marsClean.rename(columns=marsClean.iloc[0], inplace=True)
marsClean.drop(marsClean.index[0], inplace=True)
marsClean.set_index('Mars - Earth Comparison', inplace=True)
marsClean

Unnamed: 0_level_0,Diameter:,Mass:,Moons:,Distance from Sun:,Length of Year:,Temperature:
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Mars,"6,779 km",6.39 × 10^23 kg,2,"227,943,824 km",687 Earth days,-87 to -5 °C
Earth,"12,742 km",5.97 × 10^24 kg,1,"149,598,262 km",365.24 days,-88 to 58°C


In [14]:
#send df to html and create html file
marsClean.to_html(open('marsClean.html','w'))

# Mars Hemispheres

In [15]:
url='https://marshemispheres.com/'
browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')

In [16]:
# goal is to get each full res image
# under div id=product-section > div class item > a href has html
# need to click on each link and then get the full res

hemisphere_image_urls = []

imageLinks = soup.find_all(class_='item')
for image in imageLinks:
    aElement = image.find('a')
    link=aElement['href']
    name = image.find('h3').text
           
    #next, we'll click on the link using the name we found
    browser.links.find_by_partial_text(name).click()
    
    #now we need new html + soup object
    imagehtml = browser.html
    imageSoup = bs(imagehtml, 'html.parser')
    
    #we can get the picture elements from downloads
    imageDownloads = imageSoup.find(class_='downloads')
    #original image will always be second in the list within the downloads class
    imageFull = imageDownloads.find_all('li')
    originalA = imageFull[1].find('a')
    imageLink = originalA['href']
    
    hemisphere_image_urls.append({'title':name,'img_url':imageLink})
    
    #save image
    browser.links.find_by_text('Original').click()
    
    browser.back()

In [17]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'images/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'images/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'images/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'images/valles_marineris_enhanced.tif'}]

In [18]:
browser.quit()