In [38]:
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import time

In [15]:
##
## Step 1: Scraping
## Initial Steps
##

executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [16]:
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [17]:
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [20]:
##
## Nasa Mars News
##

# Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. 
# Assign the text to variables that you can reference later.

news = soup.find("div", class_="list_text")
news_paragraph = news.find("div", class_="article_teaser_body").text
news_title = news.find("div",class_="content_title").text

print(news_title)
print(news_paragraph)

NASA's Perseverance Rover Bringing 3D-Printed Metal Parts to Mars
For hobbyists and makers, 3D printing expands creative possibilities; for specialized engineers, it's also key to next-generation spacecraft design.


In [25]:
##
## JPL Mars Space Images - Featured Image
##

# Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
# Use splinter to navigate the site and find the image url for the current Featured Mars Image
# and assign the url string to a variable called `featured_image_url`.

jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

In [26]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [27]:
# Find first featured Mars image
image = soup.find("img",class_="thumb")["src"]

# Find image url to full size .jpg image and append to initial url
image_url = "https://jpl.nasa.gov"+image
featured_image_url = image_url

In [28]:
##
## Mars Facts
##

# Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas 
# to scrape the table containing facts about the planet including Diameter, Mass, etc.

# Use Pandas to convert the data to a HTML table string.

facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)


In [30]:
# Scrape Mars Facts webpage fpr table containing facts about the planet

facts_pd = pd.read_html(facts_url)
facts_df = pd.DataFrame(facts_pd[0])
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [32]:
facts_df.columns=['Attribute','Data']
facts_df

Unnamed: 0,Attribute,Data
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [33]:
# Use Pandas to convert the data into an HTML table string using Pandas .to_html and .replace

facts_table = facts_df.set_index("Attribute")
mars_facts = facts_table.to_html(classes = 'mars_facts')
mars_facts = mars_facts.replace('\n', ' ')
mars_facts

'<table border="1" class="dataframe mars_facts">   <thead>     <tr style="text-align: right;">       <th></th>       <th>Data</th>     </tr>     <tr>       <th>Attribute</th>       <th></th>     </tr>   </thead>   <tbody>     <tr>       <th>Equatorial Diameter:</th>       <td>6,792 km</td>     </tr>     <tr>       <th>Polar Diameter:</th>       <td>6,752 km</td>     </tr>     <tr>       <th>Mass:</th>       <td>6.39 × 10^23 kg (0.11 Earths)</td>     </tr>     <tr>       <th>Moons:</th>       <td>2 (Phobos &amp; Deimos)</td>     </tr>     <tr>       <th>Orbit Distance:</th>       <td>227,943,824 km (1.38 AU)</td>     </tr>     <tr>       <th>Orbit Period:</th>       <td>687 days (1.9 years)</td>     </tr>     <tr>       <th>Surface Temperature:</th>       <td>-87 to -5 °C</td>     </tr>     <tr>       <th>First Record:</th>       <td>2nd millennium BC</td>     </tr>     <tr>       <th>Recorded By:</th>       <td>Egyptian astronomers</td>     </tr>   </tbody> </table>'

In [35]:
##
## Mars Hemispheres 
##

# Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) 
# to obtain high resolution images for each of Mar's hemispheres.
#
# Append the dictionary with the image url string and the hemisphere title to a list. 
# This list will contain one dictionary for each hemisphere.


hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemi_url)

In [36]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
hemispheres = []

In [42]:
for x in range (4):
    #time.sleep(5)
    image_array = browser.find_by_tag('h3')
    image_array[x].click()
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    src = soup.find("img", class_="wide-image")["src"]
    x_url = 'https://astrogeology.usgs.gov'+src
    x_title = soup.find("h2", class_="title").text
    hemi_dict = {"title":x_title,"img url":x_url}
    hemispheres.append(hemi_dict)
    browser.back()
    

    
print(hemispheres)

[{'title': 'Cerberus Hemisphere Enhanced', 'img url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]
