In [23]:
# Load Dependencies 
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
from splinter import Browser
import os
from urllib.parse import urlsplit

In [24]:
# ChromeDriver path
executable_path = {"executable_path":"/Users/deanna/Desktop/Data_Sci_HW/web-scraping-challenge/chromedriver"}
browser = Browser("chrome", **executable_path, headless = False)

In [25]:
# NASA News Site
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [26]:
# Beautiful Soup parses HTML results 
html = browser.html
soup = bs(html,"html.parser")

# NASA Mars News

In [27]:
# Print Title and Paragraphs 
title = soup.find("div",class_="content_title").text
para = soup.find("div", class_="article_teaser_body").text
print(f"Title: {title}")
print(f"Para: {para}")

Title: Nine Finalists Chosen in NASA's Mars 2020 Rover Naming Contest
Para: Nine finalists have been chosen in the essay contest for K-12 students across U.S. to name NASA's next Mars rover. Now you can help by voting for your favorite. 


# JPL Mars Space Images - Featured Image

In [28]:
# URL to navigate to Featured Mars image 
featured_image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(featured_image_url)

In [29]:
base_url = "{0.scheme}://{0.netloc}/".format(urlsplit(featured_image_url))
print(base_url)

https://www.jpl.nasa.gov/


In [30]:
xpath = "//*[@id=\"page\"]/section[3]/div/ul/li[1]/a/div/div[2]/img"

In [31]:
mars_feat_image = browser.find_by_xpath(xpath)

In [32]:
img = mars_feat_image[0]

In [33]:
img.click()

In [34]:
html_image = browser.html

In [35]:
soup = bs(html_image, "html.parser")

In [36]:
img_url = soup.find("img", class_="fancybox-image")["src"]

In [37]:
feat_img_url = base_url + img_url

In [38]:
print(feat_img_url)

https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA23655_hires.jpg


# Mars Weather

In [39]:
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

In [40]:
html_weather = browser.html
soup = bs(html_weather, "html.parser")

In [42]:
# To find weather with BS use soup.find('div', attrs=('class': 'tweet', 'data-name': 'Mars Weather'))
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

AttributeError: 'NoneType' object has no attribute 'text'

# Mars Facts

In [43]:
facts_url = 'https://space-facts.com/mars/'

In [44]:
facts_table = pd.read_html(facts_url)
facts_table[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [45]:
facts_df = facts_table[0]
# change column names
facts_df.columns = ['Measurement', 'Value']
facts_df.set_index('Measurement')

Unnamed: 0_level_0,Value
Measurement,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [46]:
# Convert Pandas df to HTML String 
facts_html_string = facts_df.to_html()
facts_html_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium 

# Mars Hemispheres




In [51]:
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [52]:
browser.visit(hemispheres_url)

In [53]:
base_hemi_url = "{0.scheme}://{0.netloc}/".format(urlsplit(hemispheres_url))
print(base_hemi_url)

https://astrogeology.usgs.gov/


In [54]:
#Create empty list for image urls 
hemi_image_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")

for item in range(len(links)):
    hemisphere = {}
    
    # Find Element on Each Loop to Avoid a Stale Element Exception
    browser.find_by_css("a.product-item h3")[item].click()
    
    # Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    # Find Sample Image Anchor Tag & Extract <href>
    sample_element = browser.find_link_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]
    

    # Append Hemisphere Object to List
    hemi_image_urls.append(hemisphere)
    
    # Navigate Backwards
    browser.back()

NoSuchWindowException: Message: no such window: target window already closed
from unknown error: web view not found
  (Session info: chrome=79.0.3945.130)


In [None]:
hemi_image_urls