In [1]:
# Step 1 HW week 13 Webscraping
# Dependencies

from splinter import Browser
from bs4 import BeautifulSoup as bs
import pymongo
import time
import pandas as pd

In [2]:
# The default port used by MongoDB is 27017
# https://docs.mongodb.com/manual/reference/default-mongodb-port/
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
# Define database and collection
db = client.mars_db
collection = db.titles

## NASA Mars News

In [4]:
# Scrape the [Nasa Mars News Site] (https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text
# Assign the text to variables that you can reference later
# Set up as Windows User!
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser("chrome", **executable_path, headless = False)

# URL Path for mars news then configure initial BeautifulSoup and browser names
url = "https://mars.nasa.gov/news/"
browser.visit(url)
html = browser.html
soup = bs(html,"html.parser")

In [5]:
# Collect latest News Title and Paragraph Text with assigned variables for later
news_title = soup.find('div', class_='content_title').text
news_para = soup.find('div', class_='article_teaser_body').text

In [6]:
print(news_title)
print(news_para)

NASA Seeking Partner in Contest to Name Next Mars Rover
NASA has a class assignment for corporations, nonprofits and educational organizations involved in science and space exploration: partner with the agency to inspire future engineers and scientists by sponsoring a contest to name the next rover to venture to the Red Planet.


## JPL Mars Space Images

In [9]:
# Visit the url for JPL featured space image https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

In [10]:
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)
browser.click_link_by_partial_text('more info')

In [11]:
# Scrape the browser into soup and use soup to find the image of mars
# Save the image url to a variable called `img_url`
image_html = browser.html
soup = bs(image_html, "html.parser")

In [12]:
image_url = soup.find('img', class_="main_image")['src']

In [13]:
print(image_url)

/spaceimages/images/largesize/PIA17936_hires.jpg


In [14]:
main_url = 'https://www.jpl.nasa.gov/'

image_url_combined = main_url + image_url

In [15]:
print(image_url_combined)

https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA17936_hires.jpg


In [16]:
browser.visit(image_url_combined)

## Mars Weather

In [17]:
# Mars twitter account  https://twitter.com/marswxreport?lang=en
# Scrape the latest Mars weather tweet from the page. 
# Save the tweet text for the weather report as a variable called "mars_weather"

In [18]:
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)

In [19]:
twitter_html = browser.html
soup = bs(twitter_html, "html.parser")

In [20]:
mars_weather = soup.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text

In [21]:
print(mars_weather)

Jose Morales captured Mars from Chicago last night. 15000 frames for this Mars tonight.  The South Pole, Syrtis Major Planum, and Hellas Planitia are visible.pic.twitter.com/cFkgmdoHDV


## Mars Facts

In [22]:
# Mars Facts webpage http://space-facts.com/mars/
# Use Panda to scrape the table containing facts about the planet including Diameter, Mass, etc
# Use Pandas to convert the data to a HTML table string

In [23]:
url = "https://space-facts.com/mars/"
browser.visit(url)

In [24]:
facts_html = browser.html
soup = bs(facts_html, "html.parser")

In [25]:
mars_dict = {}

In [26]:
results = soup.find('tbody').find_all('tr')

In [27]:
for result in results:
    column_description = result.find('td', class_="column-1").text
    column_fact = result.find('td', class_="column-2").text
    mars_dict[column_description] = column_fact

In [28]:
df = pd.DataFrame(list(mars_dict.items()), columns=['Facts', 'Data'])

In [29]:
df

Unnamed: 0,Facts,Data
0,Equatorial Diameter:,"6,792 km\n"
1,Polar Diameter:,"6,752 km\n"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)\n
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


## Mars Hemispheres

In [30]:
# USBS Astrology site https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars
# Obtain high resolution images for each of Mar's hemispheres

In [31]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser("chrome", **executable_path, headless = False)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [32]:
hemispheres = ['Cerberus Hemisphere Enhanced', 
               'Schiaparelli Hemisphere Enhanced', 
               'Syrtis Major Hemisphere Enhanced', 
               'Valles Marineris Hemisphere Enhanced']
links = []
for hemisphere in hemispheres:
    browser.visit(url)
    browser.click_link_by_partial_text(hemisphere)
    highresMars_html = browser.html
    soup = bs(highresMars_html, "html.parser")
    image_url_hemisphere = soup.find('div', class_='downloads').a['href']
    links.append(image_url_hemisphere)
    
hemisphere_links = dict(zip(hemispheres, links))

In [33]:
print(hemisphere_links)

{'Cerberus Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg', 'Schiaparelli Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg', 'Syrtis Major Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg', 'Valles Marineris Hemisphere Enhanced': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}
