# Step 1 - Scraping

In [4]:
# Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests

In [5]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

## NASA Mars News

In [6]:
# Retrieve page with the requests module
response = requests.get(url)
response


<Response [200]>

In [7]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')


In [8]:
# results are returned as an iterable list
results = soup.find_all('div', class_="content_title")


In [9]:
# Loop through returned results
for result in results:
    # Error handling
    try:
        # Identify and return title of listing
        title = result.a.text

        # Print results only if title, price, and link are available
        if (title):
            print('-------------')
            print(title)
    except AttributeError as e:
        print(e)

-------------

NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities

-------------

The Launch Is Approaching for NASA's Next Mars Rover, Perseverance

-------------

NASA to Hold Mars 2020 Perseverance Rover Launch Briefing

-------------

Alabama High School Student Names NASA's Mars Helicopter

-------------

Mars Helicopter Attached to NASA's Perseverance Rover

-------------

NASA's Perseverance Mars Rover Gets Its Wheels and Air Brakes



In [10]:
# Create path to chromedriver and initialize browser
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)
new_url = "https://mars.nasa.gov/news/"
browser.visit(new_url)

In [14]:
#Create variables
html = browser.html
soup = BeautifulSoup(html,"html.parser")
new_url = "https://mars.nasa.gov/news/"

In [15]:
# Save variables with title and news NASA webpage
news_title = soup.find("div",class_="content_title").text
news_p = soup.find("div", class_="article_teaser_body").text
print(f"Title: {news_title}")
print(f"New: {news_p}")

Title: Mars Now
New: For hobbyists and makers, 3D printing expands creative possibilities; for specialized engineers, it's also key to next-generation spacecraft design.


## JPL Mars Space Images - Featured Image

In [16]:
# Visit the NASA JPL (Jet Propulsion Laboratory) Site
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path)
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [17]:
#Use splinter to navigate the site and find the image url
full_image_button = browser.find_by_id("full_image")
full_image_button

<splinter.element_list.ElementList at 0x14503db9c70>

In [18]:
# Find "More Info" Button and Click It
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element

<splinter.element_list.ElementList at 0x1450724ba90>

In [19]:
# Create variables for HTML with BeautifulSoup
html = browser.html
image_soup = BeautifulSoup(html, "html.parser")

In [21]:
#Make sure to find the image url to the full size .jpg image

img_url = image_soup.select_one("figure.lede a img")
img_url

# Use Base URL to Create Absolute URL
img_url = f"https://www.jpl.nasa.gov{img_url}"
print(img_url)

https://www.jpl.nasa.govNone


In [22]:
#Make sure to save a complete url string for this image
featured_image_url = "https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19334_hires.jpg"

## Mars Weather

In [23]:
#Visit the Mars Weather twitter account here and scrape the latest Mars weather tweet
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)
url_weather = "https://twitter.com/marswxreport?lang=en"
browser.visit(url_weather)

In [24]:
# Create variables for HTML with BeautifulSoup
html_weather = browser.html
twitter_soup = BeautifulSoup(html_weather, "html.parser")

In [32]:
# Find a Tweet with the data-name `Mars Weather`
mars_weather = soup.find("div", class_="css-901oao r-hkyrab r-1qd0xha r-a023e6 r-16dba41 r-ad9z0x r-bcqeeo r-bnwqim r-qvutc0")
print(mars_weather)

None


In [34]:
#Save the tweet text for the weather report as a variable called mars_weather
mars_weather = "sol 674 (2020-10-19) low -96.0ºC (-140.8ºF) high -7.4ºC (18.6ºF) winds from the SW at 5.9 m/s (13.1 mph) gusting to 18.5 m/s (41.3 mph) pressure at 7.50 hPa"

## Mars Facts

In [84]:
# scrape the table containing facts about the planet including Diameter, Mass, etc.
url_facts = "https://space-facts.com/mars/"
table = pd.read_html(url_facts)
table[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [98]:
# Use Pandas to convert the data to a HTML table string.
df_mars_facts = table[0]
df_mars_facts.columns = ["Parameter", "Values"]
df_mars_facts.set_index(["Parameter"])
#df_mars_facts.to_csv(r'C:\Users\rober\GitHub\web-scraping-challenge\Output\mars_facts.csv')

Unnamed: 0_level_0,Values
Parameter,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemisphere

In [3]:
# Visit the USGS Astrogeology site 
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)
url_hemisphere = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url_hemisphere)

In [91]:
#Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. 
hemisphere_base_url = "{0.scheme}://{0.netloc}/".format
print(hemisphere_base_url)

<built-in method format of str object at 0x000001BD092448A0>


In [4]:
# Use a Python dictionary to store the data using the keys img_url and title.
hemisphere_image_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")
for item in range(len(links)):
    hemisphere = {}
    
    # Find Element on Each Loop to Avoid a Stale Element Exception
    browser.find_by_css("a.product-item h3")[item].click()
    
    # Find Sample Image Anchor Tag & Extract <href>
    sample_element = browser.links.find_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    # Append Hemisphere Object to List
    hemisphere_image_urls.append(hemisphere)
    
    # Navigate Backwards
    browser.back()

In [93]:
# Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere. 
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]