In [6]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pandas as pd
from cachecontrol import CacheControl

### NASA Mars Article

In [7]:
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

sess = requests.session()
cached_sess = CacheControl(sess)

In [8]:
# Retrieve page with the requests module
response = cached_sess.get(url, headers={"Cache-Control": "no-cache",
    "Pragma": "no-cache"})
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [9]:
# Parse the page for the "content_title" element
news_title = soup.find('div', class_="content_title").text.strip()
news_title

"NASA's Perseverance Drives on Mars' Terrain for First Time"

In [10]:
# Parse the page for the "article_teaser_body" element
news_paragraph = soup.find('div', class_="rollover_description_inner").text
print(news_paragraph)


The first trek of the agency’s largest, most advanced rover yet on the Red Planet marks a major milestone before science operations get under way.



### JPL Mars Space Images

In [None]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

imageurl="https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
    
browser.visit(imageurl)

In [None]:
html = browser.html
imgsoup = BeautifulSoup(html, 'html.parser')

In [None]:
short_url = imgsoup.find('img', class_="headerimage fade-in")
short_url_clean = short_url['src']
featured_image_url = f'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/{short_url_clean}'
print(featured_image_url)

### Mars Facts

In [None]:
factsurl="https://space-facts.com/mars/"

In [None]:
factstable = pd.read_html(factsurl)
factstable[0].columns = ['Description','Mars']
factstable[0].set_index("Description",inplace=True)
factstable[0]

In [None]:
factstable_html = factstable[0].to_html()
factstable_html

In [None]:
factstable_html.replace('\n','')
print(factstable_html)

### Mars Hemispheres

In [None]:
hemi_url="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    
browser.visit(hemi_url)
html = browser.html
hemisoup = BeautifulSoup(html, 'html.parser')

In [None]:
hemi_results = hemisoup.find("div", class_="collapsible results")
hemi_items = hemi_results.find_all("div", class_="item")
hemi_items[0]

In [None]:
hemisphere_image_urls = []

# loop over results to get titles and urls
for item in hemi_items:
    # scrape the image title
    description = item.find('div', class_='description')
    title = description.find('h3').text
    
    # scrape the image url
    hemi_page_url_short = item.a['href']
    
    browser.visit(f'https://astrogeology.usgs.gov{hemi_page_url_short}')
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    downloads = soup.find("div", class_="downloads")
    img_url = downloads.find('li').a['href']
    
    # Create Dictionary Element
    hemi_dict = {
        'title': title,
        'img_url': img_url,
    }

    # Append to list
    hemisphere_image_urls.append(hemi_dict)

In [None]:
hemisphere_image_urls

In [None]:
final_dict={
    "news_title": news_title,
    "news_p": news_p,
    "featured_image_url":featured_image_url,
    "factstable_html":factstable_html,
    "hemisphere_images":hemisphere_image_urls
}

In [None]:
final_dict