# Web Scraping Homework - Mission to Mars

In [None]:
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager

In [None]:
#import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import pymongo
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import time

In [None]:
# Setup config variables to enable Splinter interaction with browser
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

<strong> Hint:</strong> Use Splinter to navigate the sites when needed and BeautifulSoup to help find and parse out the necessary data.

In [None]:
# Create dictionary to store news
scraped_data = {}

## NASA Mars News

Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [None]:
# Visit Nasa news url through splinter module
nasa_url = "https://redplanetscience.com" 
browser.visit(nasa_url)
# Wait for 5 seconds for error purpouses
time.sleep(5)

In [None]:
html = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(html, 'html.parser')

In [None]:
# Get news title and news text by searching for appropriate div class 
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text
print(news_title)
print(news_p)

In [None]:
# Create dictionary to store data and save entries
scrape_nasa_news={"Title":news_title, "Paragraph":news_p}
scrape_nasa_news

In [None]:
# Save scraped data as a new entry in the dictionary
scraped_data ["Title"] = news_title
scraped_data["Paragraph"] = news_p

## JPL Mars Space Images - Featured Image

- Visit the url for JPL Featured Space Image [here](https://spaceimages-mars.com).
- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Find the image url to the full size .jpg image. Make sure to save a complete url string for this image.

In [None]:
mars_url = "https://spaceimages-mars.com" 
browser.visit( mars_url)
image_html = browser.html

# Create BeautifulSoup object; parse with 'html.parser'
soup = bs( image_html, "html.parser")

In [None]:
featured_image = soup.find_all("img", class_ = "headerimage fade-in")[0]["src"]
featured_image_url = mars_url + "/" + featured_image
print(featured_image_url)

In [None]:
# Create dictionary to store data and save entries
jpl = {"img_url":featured_image_url}
jpl

In [None]:
# Save scraped data as a new entry in the dictionary
scraped_data["img_url"] = featured_image_url

In [None]:
browser.quit()

## Mars Facts

Visit the Mars Facts webpage [here](https://galaxyfacts-mars.com/) and use Pandas to scrape the table containing facts about the planet.
Use Pandas to convert the data to a HTML table string.

In [None]:
facts_url = "https://galaxyfacts-mars.com/"
facts_data = pd.read_html(facts_url)[0]
facts_data

In [None]:
facts_data.columns=["Description", "Mars", "Earth"]
facts_data.set_index("Description", inplace=True)
facts_data

In [None]:
facts_table = facts_data.to_html(index=False)
facts_table

In [None]:
# Check out table
facts_table.replace("\n", "")
print(facts_table)

In [None]:
# Create dictionary to store data and save entries
mars_facts = {"htmlTable":facts_data}

## Mars Hemispheres:

Visit the USGS Astrogeology site [here](https://marshemispheres.com/) to obtain high resolution images for each of Mar's hemispheres.

You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

<strong>Example:</strong> hemisphere_image_urls = [ {"title": "Valles Marineris Hemisphere", "img_url": "..."}, {"title": "Cerberus Hemisphere", "img_url": "..."}, {"title": "Schiaparelli Hemisphere", "img_url": "..."}, {"title": "Syrtis Major Hemisphere", "img_url": "..."}, ]

In [None]:
# Visit hemisphere url through splinter module
#Mars Hemispheres
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

hem_url = 'https://marshemispheres.com/'
browser.visit(hem_url)

In [None]:
html = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(html, 'html.parser')

In [None]:
items = soup.find_all('div', class_='item')

In [None]:
hemi_urls = []
hemi_title = []

# Create list of dictionaries for each hemisphere and append the dict 
# with an image URL string and title.
for item in items:
    hemi_urls.append( hem_url + item.find('a')['href'])
    hemi_title.append( item.find('h3').text.strip())

print( hemi_urls)
hemi_title

In [None]:
hemi_img_urls = []

for url in hemi_urls:
    browser.visit(url)
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # Find image urls and append to list
    source_url = hem_url + soup.find('img',class_='wide-image')['src']
    hemi_img_urls.append( source_url)
    
hemi_img_urls

In [None]:
# Create dictionary to store data and save entries
usgs = []
for i in range( len( hemi_title)):
    usgs.append({ 'title':hemi_title[i], 'img_url':hemi_img_urls[i]})

usgs

In [None]:
browser.quit()

In [None]:
# Define mars dictionary
mars_dict ={"news_title": news_title, "news_p": news_p, "featured_image_url": featured_image_url, 
            "facts_table": facts_table, "hem_url":hem_url}
mars_dict

## MongoDB and Flask Application

- Use MongoDB with Flask templating to create a new HTML page that displays all of the information that was scraped from the URLs above.
- Start by converting your Jupyter notebook into a Python script called scrape_mars.py with a function called scrape that will execute all of your scraping code from above and return one Python dictionary containing all of the scraped data.
- Next, create a route called /scrape that will import your scrape_mars.py script and call your scrape function.
- Store the return value in Mongo as a Python dictionary.
- Create a root route / that will query your Mongo database and pass the mars data into an HTML template to display the data.
- Create a template HTML file called index.html that will take the mars data dictionary and display all of the data in the appropriate HTML elements. Use the following as a guide for what the final product should look like, but feel free to create your own design.

In [None]:
# Use flask_pymongo to set up mongo connection
# conn =  "mongodb://localhost:27017/mars_mission_scraping"
# client =  pymongo.MongoClient(conn)

In [None]:
# Get collection and drop existing data for this application
# db = client.mars_mission_scraping
# db.mars_data.drop()

In [None]:
# db.mars_data.insert_many([scraped_data])