In [44]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pandas as pd

In [45]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/8770/nasas-perseverance-rover-will-peer-beneath-mars-surface/'

In [46]:
# Retrieve page with the requests module
response = requests.get(url)

In [47]:
response.text

'<!DOCTYPE html>\n<html lang=\'en\' xml:lang=\'en\' xmlns=\'http://www.w3.org/1999/xhtml\'>\n<head>\n<meta content=\'text/html; charset=utf-8\' http-equiv=\'Content-Type\'>\n<!-- Always force latest IE rendering engine or request Chrome Frame -->\n<meta content=\'IE=edge,chrome=1\' http-equiv=\'X-UA-Compatible\'>\n<!-- Responsiveness -->\n<meta content=\'width=device-width, initial-scale=1.0\' name=\'viewport\'>\n<!-- Favicon -->\n<link href=\'/apple-touch-icon.png\' rel=\'apple-touch-icon\' sizes=\'180x180\'>\n<link href=\'/favicon-32x32.png\' rel=\'icon\' sizes=\'32x32\' type=\'image/png\'>\n<link href=\'/favicon-16x16.png\' rel=\'icon\' sizes=\'16x16\' type=\'image/png\'>\n<link href=\'/manifest.json\' rel=\'manifest\'>\n<link color=\'#e48b55\' href=\'/safari-pinned-tab.svg\' rel=\'mask-icon\'>\n<meta content=\'#000000\' name=\'theme-color\'>\n\n<meta name="csrf-param" content="authenticity_token" />\n<meta name="csrf-token" content="deGnIJya5vDdSk1xFdcMKBiNSOy0HCnEfkeht1VFLhIvs4/+9

In [48]:
# Create BeautifulSoup object; parse with 'html.parser' Consider html5.parser
soup = BeautifulSoup(response.text, 'html.parser')

In [49]:
# Examine the results, then determine element that contains sought info
print(soup.prettify())

<!DOCTYPE html>
<html lang="en" xml:lang="en" xmlns="http://www.w3.org/1999/xhtml">
 <head>
  <meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
  <!-- Always force latest IE rendering engine or request Chrome Frame -->
  <meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
  <!-- Responsiveness -->
  <meta content="width=device-width, initial-scale=1.0" name="viewport"/>
  <!-- Favicon -->
  <link href="/apple-touch-icon.png" rel="apple-touch-icon" sizes="180x180"/>
  <link href="/favicon-32x32.png" rel="icon" sizes="32x32" type="image/png"/>
  <link href="/favicon-16x16.png" rel="icon" sizes="16x16" type="image/png"/>
  <link href="/manifest.json" rel="manifest"/>
  <link color="#e48b55" href="/safari-pinned-tab.svg" rel="mask-icon"/>
  <meta content="#000000" name="theme-color"/>
  <meta content="authenticity_token" name="csrf-param">
   <meta content="deGnIJya5vDdSk1xFdcMKBiNSOy0HCnEfkeht1VFLhIvs4/+9Kl+69bN0J0efnZWz35hule7cHxGvW4lV3+UxQ==" name="csrf-to

In [50]:
# Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text.
# Assign the text to variables that you can reference later.
news_title = soup.h1.text.strip()
news_title

"NASA's Perseverance Rover Will Peer Beneath Mars' Surface"

In [51]:
news_p = soup.body.find_all('p')[1].text
news_p

"The agency's newest rover will use the first ground-penetrating radar instrument on the Martian surface to help search for signs of past microbial life."

In [68]:
from webdriver_manager.chrome import ChromeDriverManager
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 86.0.4240
[WDM] - Get LATEST driver version for 86.0.4240
[WDM] - Driver [C:\Users\sdles\.wdm\drivers\chromedriver\win32\86.0.4240.22\chromedriver.exe] found in cache


 


In [104]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [105]:
full_image = browser.find_by_id('full_image')
full_image.click()

In [106]:
browser.is_element_present_by_text('more info',wait_time = 1)
more_info = browser.links.find_by_partial_text('more info')
more_info.click()

In [107]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [108]:
# .lede is a shortcut for class = 'lede'
image_source = soup.select_one('figure.lede a img').get('src')
image_source

'/spaceimages/images/largesize/PIA18292_hires.jpg'

In [74]:
featured_image_url = 'https://www.jpl.nasa.gov' + image_source
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA01320_hires.jpg'

In [75]:
# grab the first table we see on the page and bring in as dataframe (default function in pd)
tables = pd.read_html('https://space-facts.com/mars/')[0]
tables

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [76]:
tables.columns=['Parameter', 'Value']
tables.set_index('Parameter',inplace = True)
tables

Unnamed: 0_level_0,Value
Parameter,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [77]:
tables.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Parameter</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n 

In [109]:
# Mars Hemisphere 0
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
browser.find_by_css('div.item a.product-item h3')[0].click()

In [110]:
# Mars Hemisphere 1
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
browser.find_by_css('div.item a.product-item h3')[1].click()

In [112]:
#image_source = soup.select_one('figure.lede a img').get('src')
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
image_source = soup.select_one('div.downloads a a').get('src')

AttributeError: 'NoneType' object has no attribute 'get'

In [97]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
results = soup.find_all('div', class_='downloads')
image = results.find('li')
#image_text = results.li.text
image

AttributeError: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?

In [65]:
# url and the picture
# <a target="_blank" href="https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg">Sample</a>

In [None]:
# title
<h2 class="title">Cerberus Hemisphere Enhanced</h2>
for loop of splinter naviogating to the images and storing what we want into list of dictionaries. 