## Mission to Mars - Web Scrapping Challenge
### Matheus Gratz - matheusgratz@gmail.com - https://github.com/matheusgratz/

In [1]:
# Dependencies
import os
import re
import requests
import pandas as pd
from bs4 import BeautifulSoup
from splinter import Browser

## 1. NASA Mars News

In [2]:
# URL of Mars News Website
url = 'https://mars.nasa.gov/news/'

In [3]:
# Retrieve page with the requests module
response = requests.get(url)

In [4]:
# Create BeautifulSoup object and parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
# Assign the text to variables that you can reference later.
news_title = soup.find_all('div', class_='content_title')
news_paragraph = soup.find_all('div', class_='rollover_description_inner')

title = news_title[0].text.replace('\n', '')
paragraph = news_paragraph[0].text.replace('\n', '')

print(f'------ Latest News ----------')
print(f'News Title : {title}')
print(f'News Paragraph : {paragraph}')
print(' ')

------ Latest News ----------
News Title : NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities
News Paragraph : Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.
 


## 2. JPL Mars Space Images

In [6]:
#Visit the url for JPL Featured Space Image
browser = Browser('chrome', headless=False)
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [7]:
# Use splinter to navigate the site and find the image url
# Find the "Full Image" text to get reference

full_image_button = browser.find_by_id("full_image")
full_image_button.click()

In [8]:
# Make sure to find the image url to the full size .jpg image.
# Make sure to save a complete url string for this image.
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()

In [9]:
html = browser.html
image_soup = BeautifulSoup(html, "html.parser")

In [10]:
img_url = image_soup.select_one("figure.lede a img").get("src")
print(f'https://www.jpl.nasa.gov{img_url}')

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19036_hires.jpg


In [11]:
browser.quit()

## 3. Mars Weather

In [12]:
# Visit the Mars Weather Twitter Account
browser = Browser('chrome', headless=False)
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [13]:
# Parse Results HTML
html = browser.html
weather_soup = BeautifulSoup(html, "html.parser")

In [13]:
# Find a Tweet with the data-name `Mars Weather`
mars_weather_tweet = weather_soup.find("div", attrs={"class": "tweet", "data-name": "Mars Weather"})

try:
    mars_weather = mars_weather_tweet.find("p", "tweet-text").get_text()
    mars_weather
except AttributeError:
    pattern = re.compile(r'sol')
    mars_weather = weather_soup.find('span', text=pattern).text

print(mars_weather)

InSight sol 674 (2020-10-19) low -96.0ºC (-140.8ºF) high -7.4ºC (18.6ºF)
winds from the SW at 5.9 m/s (13.1 mph) gusting to 18.5 m/s (41.3 mph)
pressure at 7.50 hPa


In [14]:
browser.quit()

## 4. Mars Facts

In [15]:
# Visit the Mars Facts Site Using Pandas to Read
mars_df = pd.read_html("https://space-facts.com/mars/")[0]


In [16]:
mars_df.columns=["Description", "Value"]


In [17]:
mars_df.set_index("Description", inplace=True)
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## 5. Mars Hemispheres

In [18]:
#Visit the USGS Website
browser = Browser('chrome', headless=False)
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

In [19]:
hemisphere_urls = []

# Get a List of All the Hemispheres
links = browser.find_by_css("a.product-item h3")
for item in range(len(links)):
    hemisphere = {}
    
    
    browser.find_by_css("a.product-item h3")[item].click()
    
    sample_element = browser.find_link_by_text("Sample").first
    hemisphere["img_url"] = sample_element["href"]
    
    # Get Hemisphere Title
    hemisphere["title"] = browser.find_by_css("h2.title").text
    
    hemisphere_urls.append(hemisphere)
    
    browser.back()

In [20]:
hemisphere_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [21]:
browser.quit()