In [89]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
import pandas as pd

In [2]:
# set the chromedriver path
executable_path = {"executable_path": "chromedriver.exe"}
browser = Browser("chrome", **executable_path, headless=False)

In [None]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [None]:
# Define database and collection
db = client.mars_db
collection = db.articles

### Mars News

In [None]:
# URL of page to be scraped
mars_news_url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(mars_news_url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [None]:
# Retrieve the parent divs for all articles
results = soup.find_all('div', class_='slide')

In [None]:
post = {}

# Loop through results to retrieve article title, header, and timestamp of article
for result in results:
    news_t = result.find('div', class_='content_title').find('a').text

    news_p = result.find('div', class_='rollover_description_inner').text
    
    post.update({news_t : news_p})

In [None]:
print(post)

### JPL Mars Space Images - Featured Image

In [24]:
# Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
jpl_base_url = 'https://www.jpl.nasa.gov'
mars_images_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(mars_images_url)

In [25]:
xpath = '//footer//a[@id="full_image"]'

In [26]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image
results = browser.find_by_xpath(xpath)
mars_feat_image = results[0]
mars_feat_image.click()

In [27]:
# Scrape the browser into soup and use soup to find the full resolution image of mars
#assign the url string to a variable called `featured_image_url`.
html = browser.html
jpl_soup = BeautifulSoup(html, 'html.parser')
featured_image_url = jpl_soup.find("img", class_="fancybox-image")["src"]

featured_image_url = jpl_base_url + featured_image_url

In [28]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19382_ip.jpg'

### Mars Weather

In [85]:
#mars weather twitter feed
mars_weather_twitter_url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
mars_weather_response = requests.get(mars_weather_twitter_url)

# Create BeautifulSoup object; 
mars_weather_html = BeautifulSoup(mars_weather_response.text, 'html.parser')

In [86]:
timeline = mars_weather_html.select('#timeline li.stream-item')

mars_weather = timeline[0].select('p.tweet-text')[0].get_text()

#mars_weather

'"Dusty" the Earthbound twin (triplet?) of @MarsRovers  Opportunity & Spirit is heading for a new home at the National @airandspace Museum.https://twitter.com/AstroStaab/status/1162103864608169984\xa0…'

### Mars Facts

In [90]:
# Visit the Mars Facts webpage [here](https://space-facts.com/mars/) 
mars_facts_url = 'https://space-facts.com/mars/'


In [118]:
# use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
tables = pd.read_html(mars_facts_url)
#tables

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [142]:
#grab the second table
mars_facts_df = tables[1]

#transpose the table
mars_facts_df = mars_facts_df.T

#use the first row as the column names
mars_facts_df = mars_facts_df.rename(columns=mars_facts_df.iloc[0]).drop(mars_facts_df.index[0])

#cleanup column names
mars_facts_df.columns = mars_facts_df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace(':', '')

#mars_facts_df

Unnamed: 0,equatorial_diameter,polar_diameter,mass,moons,orbit_distance,orbit_period,surface_temperature,first_record,recorded_by
1,"6,792 km","6,752 km",6.39 × 10^23 kg (0.11 Earths),2 (Phobos & Deimos),"227,943,824 km (1.38 AU)",687 days (1.9 years),-87 to -5 °C,2nd millennium BC,Egyptian astronomers


### Mars Hemispheres

#Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

#You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

#Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

#Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.