In [1]:
import pandas as pd
import pymongo
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser

# define a placeholder for final dictionary to pass into mars_app.py
mars_dict = {}

In [2]:
# function to get headlines, paragraphs, not images
def scraping_func(url):
    # Path to chromedriver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    
    # Go to website
    browser.visit(url)
    browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)    
    
    # Convert the browser html to a soup object and then quit the browser
    html = browser.html
    scrape_soup = bs(html, 'html.parser')

    # Close the browser after scraping
    browser.quit()
    
    #return scraped object
    return scrape_soup

In [3]:
# function to get Featured image
def get_featured_img_func(url):
    # Path to chromedriver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
    
    # Go to website
    browser.visit(url)   
    
    # find "Full Image" button to click on it to get to next webpage
    full_img = browser.find_by_id("full_image")
    full_img.click()
    
    # find "More Info" button to click on it to get to next webpage
    browser.is_element_present_by_text('more info', wait_time=1)
    more_info_elem = browser.find_link_by_partial_text('more info')
    more_info_elem.click()
    
    # read website's html
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # find "a" tag to find href containing the URL
    result = browser.find_by_tag("a")
    relative_image_path = result[58]["href"] 
    
    # Close the browser after scraping
    browser.quit()
    
    #return scraped object
    return relative_image_path

In [4]:
# function to get Hemis images
def get_hemis_img(url):
    # Path to chromedriver
    executable_path = {'executable_path': 'chromedriver.exe'}
    browser = Browser('chrome', **executable_path, headless=False)
            
    # Go to website
    browser.visit(url)
    
    # read website's html
    html = browser.html
    soup = bs(html, 'html.parser')

    # find "a" tag
    result = browser.find_by_tag("a")
    
    # define a list to hold 1st link to full images
    hemis_image_path_list = []
    for i in range(8):
        # if link exist, skip saving to list
        if (result[i+4]["href"]) in hemis_image_path_list:
            print('')
        else:
            hemis_image_path_list.append(result[i+4]["href"])

    # Close the browser after scraping
    browser.quit()
    
    final_hemis_img_url_list = []
    for i in range(len(hemis_image_path_list)):
        # Path to chromedriver
        executable_path = {'executable_path': 'chromedriver.exe'}
        browser = Browser('chrome', **executable_path, headless=False)
        
        # Go to website
        browser.visit(hemis_image_path_list[i])
        
        # read website's html
        html = browser.html
        soup = bs(html, 'html.parser')
        
        # get image title
        result_title = soup.find('h2', class_='title').get_text()  
        
        # get image URL
        result = soup.find('img', class_='wide-image')["src"]
        final_url = 'https://astrogeology.usgs.gov' + result
        
        # concat image URL to get complete URL link
        final_hemis_img_url_list.append({"title": result_title, "img_url": final_url})

        # Close the browser after scraping
        browser.quit()
    
    return final_hemis_img_url_list

In [5]:
### NASA Mars News
# * Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. 
# Assign the text to variables that you can reference later.

# Set Website URL to scrape
url = 'https://mars.nasa.gov/news/'

# Call scrape function and pass in url
scrape_soup = scraping_func(url)

# Get latest title
news_title_find = scrape_soup.find('div', class_='content_title')

# Get title from scrape
news_title = news_title_find.get_text()
news_title

"Robotic Toolkit Added to NASA's Mars 2020 Rover"

In [6]:
# Get latest title's paragraph
news_p_find = scrape_soup.find('div', class_='article_teaser_body')
news_p = news_p_find.get_text()
news_p

"The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "

In [7]:
### JPL Mars Space Images - Featured Image
#* Use splinter to navigate the site and find the image url for the current Featured Mars Image and 
# assign the url string to a variable called `featured_image_url`.

# Set Website URL to scrape
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/'

# call function to get the URL
featured_image_url = get_featured_img_func(url)
featured_image_url

In [8]:
### Mars Weather
#* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and 
# scrape the latest Mars weather tweet from the page. 
#Save the tweet text for the weather report as a variable called `mars_weather`

# Set Website URL to scrape
url = 'https://twitter.com/marswxreport?lang=en'

# Call scrape function and pass in url
scrape_soup = scraping_func(url)

# Get latest weather tweet
mars_weather_find = scrape_soup.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')

mars_weather = mars_weather_find.get_text()
mars_weather

'InSight sol 258 (2019-08-18) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.2ºF)\nwinds from the SSE at 5.3 m/s (11.9 mph) gusting to 16.8 m/s (37.6 mph)\npressure at 7.60 hPapic.twitter.com/5nCVjcsmlZ'

In [9]:
### Mars Facts
#* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and 
#use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
#* Use Pandas to convert the data to a HTML table string.

# Set Website URL to scrape
url = 'https://space-facts.com/mars/'

tables = pd.read_html(url)
comp_table = tables[0]
comp_table

Unnamed: 0,Mars - Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [10]:
mars_earth_comp_df = tables[0]
mars_earth_comp_df.columns = ['Mars-Earth Comparison', 'Mars', 'Earth']
mars_earth_comp_df

Unnamed: 0,Mars-Earth Comparison,Mars,Earth
0,Diameter:,"6,779 km","12,742 km"
1,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
2,Moons:,2,1
3,Distance from Sun:,"227,943,824 km","149,598,262 km"
4,Length of Year:,687 Earth days,365.24 days
5,Temperature:,-153 to 20 °C,-88 to 58°C


In [11]:
mars_comparison = []
dict_temp = {}
for i in range(len(mars_earth_comp_df)):
    for j in range(1):
        dict_temp = {"description": mars_earth_comp_df.iloc[i][j], "mars": mars_earth_comp_df.iloc[i][j+1], 
                    "earth": mars_earth_comp_df.iloc[i][j+2]}
        mars_comparison.append(dict_temp)
mars_comparison

[{'description': 'Diameter:', 'mars': '6,779 km', 'earth': '12,742 km'},
 {'description': 'Mass:',
  'mars': '6.39 × 10^23 kg',
  'earth': '5.97 × 10^24 kg'},
 {'description': 'Moons:', 'mars': '2', 'earth': '1'},
 {'description': 'Distance from Sun:',
  'mars': '227,943,824 km',
  'earth': '149,598,262 km'},
 {'description': 'Length of Year:',
  'mars': '687 Earth days',
  'earth': '365.24 days'},
 {'description': 'Temperature:',
  'mars': '-153 to 20 °C',
  'earth': '-88 to 58°C'}]

In [12]:
mars_planet_profile_df = tables[1]
mars_planet_profile_df.columns = ['Mars Planet Profile', 'Measurement']
mars_planet_profile_df

Unnamed: 0,Mars Planet Profile,Measurement
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
mars_profile = []
dict_temp = {}
for i in range(len(mars_planet_profile_df)):
    for j in range(1):
        dict_temp = {"description":mars_planet_profile_df.iloc[i][j], "value": mars_planet_profile_df.iloc[i][j+1]}
        mars_profile.append(dict_temp)
mars_profile

[{'description': 'Equatorial Diameter:', 'value': '6,792 km'},
 {'description': 'Polar Diameter:', 'value': '6,752 km'},
 {'description': 'Mass:', 'value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'description': 'Moons:', 'value': '2 (Phobos & Deimos)'},
 {'description': 'Orbit Distance:', 'value': '227,943,824 km (1.38 AU)'},
 {'description': 'Orbit Period:', 'value': '687 days (1.9 years)'},
 {'description': 'Surface Temperature:', 'value': '-87 to -5 °C'},
 {'description': 'First Record:', 'value': '2nd millennium BC'},
 {'description': 'Recorded By:', 'value': 'Egyptian astronomers'}]

In [16]:
#* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) 
#to obtain high resolution images for each of Mar's hemispheres.
#* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
#* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing 
#the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.
#* Append the dictionary with the image url string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.

# Set Website URL to scrape
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# call function to get Hemis Images URL
final_hemis_img_url_list = get_hemis_img(url)
final_hemis_img_url_list







[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [17]:
mars_dict["mars_title"] = news_title
mars_dict["mars_news"] = news_p
mars_dict["mars_image"] = featured_image_url
mars_dict["mars_currentweather"] = mars_weather
mars_dict["mars_comparison"] = mars_comparison
mars_dict["mars_profile"] = mars_profile
mars_dict["mars_image_urls"] = final_hemis_img_url_list

mars_dict

{'mars_title': "Robotic Toolkit Added to NASA's Mars 2020 Rover",
 'mars_news': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. ",
 'mars_image': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18914_hires.jpg',
 'mars_currentweather': 'InSight sol 258 (2019-08-18) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.2ºF)\nwinds from the SSE at 5.3 m/s (11.9 mph) gusting to 16.8 m/s (37.6 mph)\npressure at 7.60 hPapic.twitter.com/5nCVjcsmlZ',
 'mars_comparison': [{'description': 'Diameter:',
   'mars': '6,779 km',
   'earth': '12,742 km'},
  {'description': 'Mass:',
   'mars': '6.39 × 10^23 kg',
   'earth': '5.97 × 10^24 kg'},
  {'description': 'Moons:', 'mars': '2', 'earth': '1'},
  {'description': 'Distance from Sun:',
   'mars': '227,943,824 km',
   'earth': '149,598,262 km'},
  {'description': 'Length of Year:',
   'mars': '687 Earth days',
   'earth': '365.24 days'},
  {'description': 'Temperature:',
   'mars': '-1

In [18]:
mars_dict['mars_profile']

[{'description': 'Equatorial Diameter:', 'value': '6,792 km'},
 {'description': 'Polar Diameter:', 'value': '6,752 km'},
 {'description': 'Mass:', 'value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'description': 'Moons:', 'value': '2 (Phobos & Deimos)'},
 {'description': 'Orbit Distance:', 'value': '227,943,824 km (1.38 AU)'},
 {'description': 'Orbit Period:', 'value': '687 days (1.9 years)'},
 {'description': 'Surface Temperature:', 'value': '-87 to -5 °C'},
 {'description': 'First Record:', 'value': '2nd millennium BC'},
 {'description': 'Recorded By:', 'value': 'Egyptian astronomers'}]