# Web Scraping Homework - Mission to Mars

## Scraping 

### NASA Mars News

In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd
import time
import re


In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
# Retrieve page with the requests module
response = requests.get(url)
response

<Response [200]>

In [6]:
# Create BeautifulSoup object; parse with 'lxml'
html = browser.html
    
soup = bs(html, "lxml")

In [7]:
# Retrieve the parent divs for the article
result = soup.find('div', class_='list_text')

# scrape the article header 
news_title = result.find('div', class_='content_title').text
    
# scrape the article subheader
news_p = result.find('div', class_='article_teaser_body').text


print(news_title)
print(news_p)

How NASA's Mars Helicopter Will Reach the Red Planet's Surface
The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.


### JPL Mars Space Images - Featured Image

In [8]:
# URL where image is locates
url_img = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_img)

# Click FULL IMAGE button
browser.click_link_by_id("full_image")
time.sleep(2)

# Click More Info button
# browser.links.find_by_partial_text("more info")
browser.click_link_by_partial_text("more info")

# Create BeautifulSoup object; parse with 'lxml'
html_img = browser.html

soup = bs(html_img, "lxml")

# Scrape for featured image and save url
featured_url = soup.find('img', class_='main_image')['src']
featured_image_url = "https://www.jpl.nasa.gov" + featured_url
featured_image_url



'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17832_hires.jpg'

### Mars Weather

In [9]:
tw_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(tw_url)
browser.is_element_present_by_xpath("/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/div[2]/section/div/div/div/div[2]/div/div/div/div/article/div/div[2]", wait_time = 1)

soup = bs(browser.html, "html.parser")

mars_weather = soup.find(text = re.compile("InSight"))

print(mars_weather)


InSight sol 563 (2020-06-27) low -89.5ºC (-129.1ºF) high -3.9ºC (25.0ºF)
winds from the SW at 5.5 m/s (12.3 mph) gusting to 18.5 m/s (41.3 mph)
pressure at 7.60 hPa


### Mars Facts

In [10]:
# URL of Mars facts
facts_url = 'https://space-facts.com/mars/'

# We can use the read_html function in Pandas to automatically scrape any tabular data from a page.
tables = pd.read_html(facts_url)

In [11]:
myTable = tables[0]
myTable.columns = ['Parameter','Value']
myTable

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
myTable.to_html("table_mars.html")

### Mars Hemispheres

In [13]:
# URL of USGS Astrology
usgs_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(usgs_url)

#creating HTML object and parsing
html_hem = browser.html
soup = bs(html_hem, 'html.parser')

# Retrieve the parent divs for all images
results = soup.find_all("div", class_="item")

# Create empty list for hemisphere urls 
hemisphere_image_urls = []

# Store the main URL for the USGS Astrology
main_url = 'https://astrogeology.usgs.gov'

# loop over results to get image data

for result in results:
    # scrape the title
    title = result.find('h3').text
    
    # scrape the full img url
    half_full_img = result.find('a', class_='itemLink product-item')['href']
    
    # go to the link with the full image
    browser.visit(main_url + half_full_img)
    
    #creating HTML object and parsing
    html_individual = browser.html
    soup = bs(html_individual, 'html.parser')
    
    # Retrieve full image source 
    img_url = main_url + soup.find('img', class_='wide-image')['src']
    
    # Append to a list of dictionaries 
    hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    
    
hemisphere_image_urls


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]