In [128]:
# Import Splinter and BeautifulSoup
from splinter import Browser
from bs4 import BeautifulSoup

# Import pandas 
import pandas as pd 
import numpy as np


In [24]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

In [6]:
# Visit the mars nasa news site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
# Optional delay for loading the page
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

True

In [7]:
html = browser.html
news_soup = BeautifulSoup(html, 'html.parser')
slide_elem = news_soup.select_one('ul.item_list li.slide')
slide_elem.find('div', class_='content_title')

<div class="content_title"><a href="/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/" target="_self">How NASA's Mars Helicopter Will Reach the Red Planet's Surface</a></div>

In [8]:
# Use the parent element to find the first `a` tag and save it as `news_title`
news_title = slide_elem.find("div", class_='content_title').get_text()
news_title

"How NASA's Mars Helicopter Will Reach the Red Planet's Surface"

In [9]:
# Use the parent element to find the paragraph text 
news_p = slide_elem.find('div', class_="article_teaser_body").get_text()
news_p

'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.'

### Image Scraping Begins


In [10]:
# Visit URL
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [11]:
# Find and click the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [12]:
# Find the more info button and click that
browser.is_element_present_by_text('more info', wait_time=1)
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()



In [13]:
# Parse the resulting html with soup
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')


In [14]:
# Find the relative image url 
img_url_rel = img_soup.select_one('figure.lede a img').get('src')
img_url_rel


'/spaceimages/images/largesize/PIA23436_hires.jpg'

In [15]:
# Use the base URL to create an absolute URL
img_url = f'https://www.jpl.nasa.gov{img_url_rel}'
img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23436_hires.jpg'

In [125]:
df = pd.read_html('http://space-facts.com/mars/')[0]
df.columns=['description', 'value']
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [126]:
mars_list_2 = df['description'].to_list()
mars_list_2

['Equatorial Diameter:',
 'Polar Diameter:',
 'Mass:',
 'Moons:',
 'Orbit Distance:',
 'Orbit Period:',
 'Surface Temperature:',
 'First Record:',
 'Recorded By:']

In [118]:
mars_dict = df['value'].to_list()
mars_dict

['6,792 km',
 '6,752 km',
 '6.39 × 10^23 kg (0.11 Earths)',
 '2 (Phobos & Deimos)',
 '227,943,824 km (1.38 AU)',
 '687 days (1.9 years)',
 '-87 to -5 °C',
 '2nd millennium BC',
 'Egyptian astronomers']

In [129]:
mars_list = []
for num in np.arange(len(mars_dict)):
    row = {'description': mars_list_2[num], 'value': mars_dict[num]}
    mars_list.append(row)

In [130]:
mars_list

[{'description': 'Equatorial Diameter:', 'value': '6,792 km'},
 {'description': 'Polar Diameter:', 'value': '6,752 km'},
 {'description': 'Mass:', 'value': '6.39 × 10^23 kg (0.11 Earths)'},
 {'description': 'Moons:', 'value': '2 (Phobos & Deimos)'},
 {'description': 'Orbit Distance:', 'value': '227,943,824 km (1.38 AU)'},
 {'description': 'Orbit Period:', 'value': '687 days (1.9 years)'},
 {'description': 'Surface Temperature:', 'value': '-87 to -5 °C'},
 {'description': 'First Record:', 'value': '2nd millennium BC'},
 {'description': 'Recorded By:', 'value': 'Egyptian astronomers'}]

In [17]:
df.to_html()


'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

### Start scraping high def photos

In [78]:
# Create list for dictionaries 
img_list = []

In [79]:
# Open browser to new page 
url = 'https://2u-data-curriculum-team.s3.amazonaws.com/dataviz-online-content/module_10/Astropedia+Search+Results+_+USGS+Astrogeology+Science+Center.htm'
browser.visit(url)

In [80]:
# Hemisphere 1 
tag_1 = "Cerberus Hemisphere Enhanced"
full_image_elem = browser.find_by_text(f'{tag_1}')
full_image_elem.click()
full_image_elem = browser.find_by_text("Original")
url_1 = full_image_elem['href']
image_1_dict = {'title': tag_1, 'img_url': url_1}
img_list.append(image_1_dict)

In [81]:
# Hemisphere 2
browser.back()
tag_2 = "Schiaparelli Hemisphere Enhanced"
full_image_elem = browser.find_by_text(f'{tag_2}')
full_image_elem.click()
full_image_elem = browser.find_by_text("Original")
url_2 = full_image_elem['href']
image_2_dict = {'title': tag_2, 'img_url': url_2}
img_list.append(image_2_dict)

In [82]:
# Hemisphere 3
browser.back()
tag_3 = "Syrtis Major Hemisphere Enhanced"
full_image_elem = browser.find_by_text(f'{tag_3}')
full_image_elem.click()
full_image_elem = browser.find_by_text("Original")
url_3 = full_image_elem['href']
image_3_dict = {'title': tag_3, 'img_url': url_3}
img_list.append(image_3_dict)

In [83]:
# Hemisphere 4
browser.back()
tag_4 = "Valles Marineris Hemisphere Enhanced"
full_image_elem = browser.find_by_text(f'{tag_4}')
full_image_elem.click()
full_image_elem = browser.find_by_text("Original")
url_4 = full_image_elem['href']
image_4_dict = {'title': tag_4, 'img_url': url_4}
img_list.append(image_4_dict)

In [None]:
browser.quit()