In [1]:
# Dependencies
import pandas as pd
from pprint import pprint 
import time
import requests as req
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager 

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Driver [C:\Users\ocean\.wdm\drivers\chromedriver\win32\89.0.4389.23\chromedriver.exe] found in cache






# Scraping

## NASA Mars News

- Scraped the [NASA Mars News Site](https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest) and collected the latest News Title and Paragraph Text.  
- Assign the text to variables that we can reference later.

In [3]:
# set up url for NASA news site, browser, and html
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)
news_html = browser.html

# parse with BeautifulSoup
news_soup = bs(news_html, 'html.parser')

# find the latest news article title and print it
article_title = news_soup.find_all('div', class_='content_title')[1].find('a').text
time.sleep(5) # sleep before next task 

# find the latest news article paragraph text and print it
article_p = news_soup.find_all('div', class_='article_teaser_body')[1].text
time.sleep(5) # sleep before next task 

# print the title and the paragraph text
print(article_title)
print(article_p)
time.sleep(5) # sleep before next task 

Another First: Perseverance Captures the Sounds of Driving on Mars
Members of the projects will lay out the steps necessary before the helicopter attempts its historic test flights. 


## JPL Mars Space Images - Featured Image

- Visit the url for JPL Featured Space Image [here](https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html).
- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Make sure to find the image url to the full size .jpg image.

In [4]:
# set up url for JPL Featured Mars Image, browser, and html
image_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html'
browser.visit(image_url)
image_html = browser.html
time.sleep(5) # sleep before next task 

# go to 'FULL IMAGE', set the new browser link, and html
browser.links.find_by_partial_text('FULL IMAGE').first.click()
full_image_html = browser.html
time.sleep(5) # sleep before next task 

# parse with BeautifulSoup
image_soup = bs(full_image_html, 'html.parser')
time.sleep(5) # sleep before next task 

# scrape the URL
feature_url = image_soup.find('img', class_='fancybox-image')['src']
time.sleep(5) # sleep before next task 

# print the url for the full image version of the Featured Mars Image
base_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/'
featured_image_url = f'{base_url}{feature_url}'
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars1.jpg


## Mars Facts
- Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
- Use Pandas to convert the data to an HTML table string.

In [5]:
# set up url for Mars Facts and browser
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
time.sleep(5) # sleep before next task 

# use Pandas to parse facts url to find all tables
facts_tables = pd.read_html(facts_url)

# select the correct table from the list of tables
facts_df = facts_tables[0]

# rename the columns with appropriate headings
facts_df.columns = ['Variable', 'Value']

# convert the data to an HTML string
facts_string = facts_df.to_html(index=False)
print(facts_string)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Variable</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <td>Recorded By:</td>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Mars Hemispheres
- Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.
- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
- Append the dictionary with the image url string and the hemisphere title to a list. This list contains one dictionary for each hemisphere.

In [6]:
# set up url and browser
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
html_hemispheres = browser.html
time.sleep(5) # sleep before next task 

# parse with BeautifulSoup
hemispheres_soup = bs(html_hemispheres, 'html.parser')
time.sleep(5) # sleep before next task 

# # Retreive all items that contain mars hemispheres information
# items = soup.find_all('div', class_='item')

# # Create empty list for hemisphere urls 
# hemisphere_image_urls = []

# # Store the main_ul 
# hemispheres_main_url = 'https://astrogeology.usgs.gov'

# # Loop through the items previously stored
# for i in items: 
#     # Store title
#     title = i.find('h3').text
    
#     # Store link that leads to full image website
#     partial_img_url = i.find('a', class_='itemLink product-item')['href']
    
#     # Visit the link that contains the full image website 
#     browser.visit(hemispheres_main_url + partial_img_url)
    
#     # HTML Object of individual hemisphere information website 
#     partial_img_html = browser.html
    
#     # Parse HTML with Beautiful Soup for every individual hemisphere information website 
#     soup = bs( partial_img_html, 'html.parser')
    
#     # Retrieve full image source 
#     img_url = hemispheres_main_url + soup.find('img', class_='wide-image')['src']
    
#     # Append the retreived information into a list of dictionaries 
#     hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    

# # Display hemisphere_image_urls
# hemisphere_image_urls

# # close the browser
# browser.quit()

<html lang="en"><head>
<link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.3/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
<title>Astropedia Search Results | USGS Astrogeology Science Center</title>
<meta content="USGS Astrogeology Science Center Astropedia search results." name="description"/>
<meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
<meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
<!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
<link href="/css/main.css" media="screen" rel="stylesheet"/>
<link href="/css/print.css" media="print" rel="stylesheet"/>
<!--[if lt IE 9]>
			<s