In [None]:
#Import dependancies
from flask import Flask, Response
from splinter import Browser
from bs4 import BeautifulSoup as bs
import datetime as dt
import pandas as pd
import time
import requests

In [None]:
#initiate scrape function
def scrape_all():
    browser = Browser("chrome", executable_path="chromedriver", headless=True)
    mars_title, mars_text_div = mars_news(browser)

#stores dictionary containing scraped data
    data_dict = {
        "news_title": mars_title,
        "news_paragraph": mars_text_div,
        "img": featured_img(browser),
        "facts": mars_facts(),
        "hemispheres": hemispheres(browser)
        }
#quits browser and returns the data dictionary
    browser.quit()
    return data_dict

In [None]:
def mars_news(browser):
#goes to url and pauses to prevent crashing
    url = "https://mars.nasa.gov/news"
    browser.visit(url)
    browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)
#creating soup and parsing the html
    html = browser.html
    news_soup = bs(html, "html.parser")

# getting the first title and description from hmtl
    try:
        mars_slide = news_soup.select_one("ul.item_list li.slide")
        mars_title = mars_slide.find("div", class_="content_title").get_text()
        mars_text_div = mars_slide.find(
            "div", class_="article_teaser_body").get_text()
        
# maintain program integrity despite AttributeError
    except AttributeError:
        return None, None

#return the title and description
    return mars_title, mars_text_div

In [None]:
def featured_img(browser):
    #goes to url and pauses to prevent crashing
    img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(img_url)
    time.sleep(1)
    
#selecting and clicking on full image id 
    full_image = browser.find_by_id("full_image")
    full_image.click()

    # Finding more info button and clicking
    browser.is_element_present_by_text("more info", wait_time=1)
    more_info = browser.find_link_by_partial_text("more info")
    more_info.click()
    
    #parsing through html page and pulling image 'src'
    html = browser.html
    img_soup = bs(html, "html.parser")
    img = img_soup.select_one("figure.lede a img")
    try:
        rel_img_path = img.get('src')

    # maintain program integrity despite AttributeError
    except AttributeError:
        return None

    #creating url path to mars images
    mars_img_url = 'https://www.jpl.nasa.gov'+rel_img_path
    
    #returning the image url
    return mars_img_url

In [None]:
def mars_facts():
    #reading url and creating df
    facts_url = "https://space-facts.com/mars/"
    try:
        facts_table = pd.read_html(facts_url)[0]
    # maintain program integrity despite AttributeError
    except AttributeError:
        return None
    #creating columns and indexing under 'Description'
    facts_table.columns = ['Description', 'Value']
    facts_table.set_index('Description', inplace=True)
    #creating table from dataframe
    facts_df = facts_table.to_html(classes="table table-striped")
    
    #returing the dataframe containing mars facts
    return facts_df

In [None]:
def hemispheres(browser):
    #going to url
    hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)

    #creating empty list to store hemisphere links
    hemisphere_links = []
    
    #looping through and returning links to the pictures
    for y in range(4):
        browser.find_by_css("a.product-item h3")[y].click()
        hemisphere_data = scrape_hemisphere(browser.html)
        hemisphere_links.append(hemisphere_data)
        browser.back()
        
    #returning hemispheres
    return hemisphere_links

In [None]:
def scrape_hemisphere(html_text):
    #creating soup for the hemisphere pictures
    hemisphere_soup = bs(html_text, "html.parser")
    #returning href for picture
    try:
        hemi_title_rel = hemisphere_soup.find("h2", class_="title").get_text()
        hemi_img_par = hemisphere_soup.find("a", text="Sample").get("href")

    except AttributeError:
        return None, None
    #returning dictionary with the title and image url
    hemisphere = {
        "title" : hemi_title_rel,
        "img_url" : hemi_img_par
    }
    return hemisphere

if __name__ == "__main__":
    print(scrape_all())