In [19]:
#BeautifulSoup, Pandas, and Requests/Splinter.
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import pandas as pd
import time
from flask import Flask, jsonify, render_template, redirect
import pymongo

executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [20]:
#* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text.
#Loading page

url = 'https://mars.nasa.gov/news/'
browser.visit(url)

# Not too fast...
time.sleep(2)

# Put it all in a soup
html = browser.html  
soup = bs(html, 'html.parser')

# Collect news and title
news_title = soup.select('.grid_gallery.list_view li.slide .content_title a', limit=1)[0].contents[0]
news_p = soup.select('.grid_gallery.list_view li.slide .article_teaser_body', limit=1)[0].contents[0]

# Create a dictionary and add to return value dictionary
mars_portal_info={}
mars_portal_info["news_title"] = news_title
mars_portal_info["news_description"] = news_p
print(news_title)

print(news_p)

IndexError: list index out of range

In [None]:
#* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
# Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the 
#url string to a variable called `featured_image_url`.
#Make sure to find the image url to the full size `.jpg` image.
# Make sure to save a complete url string for this image.

#
#<article alt="Bright Penelope" class="carousel_item" style="background-image: 
#url('/spaceimages/images/wallpaper/PIA11591-1920x1200.jpg');">
#
#becomes:
# https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA11591_hires.jpg


#Loading the website
jpl_img_string="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_img_string)
# Wait for page to load
time.sleep(2)

# Sending it to soup
html = browser.html  
soup = bs(html, 'html.parser')

# Finding the image
featim_rel = soup.select('div.carousel_container .floating_text_area footer a')[0]["data-fancybox-href"]
featured_image_url = f"https://www.jpl.nasa.gov{featim_rel}"

# Add to return value dictionary
mars_portal_info["featured_image_url"] = featured_image_url

print(featured_image_url)


In [None]:
### Mars Weather
#* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars 
#weather tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.
#
#Get tweet page

tweets_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(tweets_url)

#Load it slow
time.sleep(2)

#Soup to read it
html = browser.html  
soup = bs(html, 'html.parser')

# Find all tweets on the page
all_tweets = soup.select('.stream-items .js-stream-item .tweet .content p')

# Loop through tweets to get the first actual weather report
mars_weather = ""
for result in all_tweets:
    tweet_text = result.contents[0]
    tweet_first_three = tweet_text[0:3]
    if tweet_first_three == 'Sol':
        mars_weather = tweet_text
    break
# Add to return value dictionary
mars_portal_info["mars_weather"] = mars_weather
print(mars_weather)


In [21]:
#* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing 
#facts about the planet including Diameter, Mass, etc.
#* Use Pandas to convert the data to a HTML table string.
#
#Getting table from site

mars_facts_url="https://space-facts.com/mars/"
tables = pd.read_html(mars_facts_url)
df = tables[0]

# Moving it to a table
mars_facts_table = df.to_html(buf=None, columns=None, col_space=None, header=False, index=False, \
na_rep='NaN', index_names=False, justify='right', bold_rows=True, classes=None, \
escape=True, max_rows=None, max_cols=None, show_dimensions=False, \
notebook=False, decimal='.', border=1)
mars_portal_info["mars_facts_table"] = mars_facts_table


In [22]:
### Mars Hemispheres
#* Visit the USGS Astrogeology site [here]
#(https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars)
#to obtain high resolution images for each of Mar's hemispheres.
#* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution 
#image.
#
#https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars
#
#* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the 
#hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.
#* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one 
#dictionary for each hemisphere.

# Example:
hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif": "..."},
    {"title": "Cerberus Hemisphere", "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif": "..."},
    {"title": "Schiaparelli Hemisphere", "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif": "..."},
    {"title": "Syrtis Major Hemisphere", "http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif": "..."},
]

In [24]:
## Step 2 - MongoDB and Flask Application
#
#Use MongoDB with Flask templating to create a new HTML page that displays all of the information that was scraped from
#the URLs above.
#
#* Start by converting your Jupyter notebook into a Python script called `scrape_mars.py` with a function called 
#`scrape` that will execute all of your 
#scraping code from above and return one Python dictionary containing all of the scraped data.
#
#* Next, create a route called `/scrape` that will import your `scrape_mars.py` script and call your `scrape` function.
#
#  * Store the return value in Mongo as a Python dictionary.
#
#* Create a root route `/` that will query your Mongo database and pass the mars data into an HTML template to display
#the data.
#
#* Create a template HTML file called `index.html` that will take the mars data dictionary and display all of the data
#in the appropriate HTML elements. Use the following as a guide for what the final product should look like, but feel 
#free to create your own design.
#
#![final_app_part1.png](Images/final_app_part1.png)
#![final_app_part2.png](Images/final_app_part2.png)

# Starting stuff
app = Flask(__name__)

# Routes
@app.route("/")
def index():
    mars_index_info = mongo.db.mars_db.find_one()
    return render_template("index.html", portal_info=mars_index_info)

# Putting the data into mars_db and the routes
@app.route("/scrape")
def scrape():
    # Grab dictionary of scraped values from function
    marsportal_scraped_values = scrape_mars.scrape()   # THE REAL THING

    # Declare the database
    mars_db = mongo.db.mars_db

    # Insert (or update) the document containing our dictionary into the database
    mars_db.update(
        {},
        marsportal_scraped_values,
        upsert=True
    )
    # Redirect to the home page to pass MongoDB values into our HTML template
    return redirect("http://localhost:5000/", code=302)

if __name__ == "__main__":
    app.run(debug=True)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


OSError: [Errno 48] Address already in use