In [16]:
#Step 1: Scraping Dependencies
from bs4 import BeautifulSoup
from splinter import Browser
from selenium import webdriver
from selenium.webdriver.common.by import By

#Step 2: Database Dependencies
import pymongo

#Python Dependencies
import pandas as pd


# STEP 1: Web Scraping

1. Use Splinter to create a browser object connected to NASA Mars website
2. Use BeautifulSoup to parse HTML and retrieve information

In [17]:
#define path to chrome driver
executable_path = {'executable_path': '/chromedriver'}

#Set default behaviors
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-notifications")

#Create browser object
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [18]:
#Connect to NASA Mars webpage
browser.visit("https://mars.nasa.gov/news")

In [39]:
#Create a soup object 
html = browser.html
soup = BeautifulSoup(html, 'lxml')

In [44]:
#Access the titles and article teasers for each news article
articles = soup.findAll("li", class_="slide")
titles = []
teasers = []

for article in articles:
    title = article.find('div', class_="content_title").text
    titles.append(title)
    teaser = article.find('div', class_="article_teaser_body").text
    teasers.append(teaser)


['In honor of Hispanic Heritage Month, Christina Hernandez, an instrument engineer on the Mars 2020 mission, talks about her childhood and journey to NASA.',
 "Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.",
 'An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.',
 "A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing.",
 'The board will assist with analysis of current plans and goals for one of the most difficult missions humanity has ever undertaken.',
 'Headed to the Red Planet with the Perseverance rover, the pioneering helicopter is powered up for the first time in interplanetary space as part of a systems check.',
 'Marking its 15th anniversary since launch, one of the

### JPL Mars Space Images

In [6]:
#Connect to JPL NASA web page
browser.visit("https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars")

In [7]:
#Create a soup object 
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
#Use Splinter to Click on full JPG image for featured image
browser.find_by_id('full_image').first.click()

In [9]:
pic = browser.links.find_by_partial_text("more info")
pic.click()

ElementDoesNotExist: no elements could be found with link by partial text "more info"

In [10]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

img_url = soup.select_one("figure.lede a img").get("src")
feat_img_url = "https://www.jpl.nasa.gov" + img_url
print(feat_img_url)

AttributeError: 'NoneType' object has no attribute 'get'

### Mars Facts
1. Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
2. Use Pandas to convert the data to a HTML table string.

In [11]:
url = 'https://space-facts.com/mars'

In [12]:
tables = pd.read_html(url)

KeyboardInterrupt: 

In [10]:
#Rename Columns
df = tables[0]
df.columns = ['Mars Planet Profile', 'Value']

In [13]:
#Convert table to an html table
html_table = df.to_html()

#Clean up table
html_table.replace('\n', '')

#Save directly to file
df.to_html('table.html')

In [14]:
!open table.html

### Mars Hemispheres

In [33]:
# Mars hemisphere name and image to be scraped
main_url = 'https://astrogeology.usgs.gov'
hemis_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

In [34]:
#Create Soup Object
browser.visit(hemis_url)
hemis_html = browser.html
hemis_soup = BeautifulSoup(hemis_html, 'html.parser')

In [35]:
# Mars Hemispheres Data Page Links
all_hemis = hemis_soup.find('div', class_='collapsible results')
mars_hemis = all_hemis.find_all('div', class_='item')

hemi_img_urls = []

# Loop through the main hemisphere data page
for hemi in mars_hemis:
    # Collect Title
    hemisphere = hemi.find('div', class_="description")
    title = hemisphere.h3.text
    
    # Scrape image links on hemisphere page
    hemi_link = hemi.a["href"]    
    browser.visit(main_url + hemi_link)
    
    image_html = browser.html
    image_soup = BeautifulSoup(image_html, 'html.parser')
    
    image_link = image_soup.find('div', class_='downloads')
    image_url = image_link.find('li').a['href']

    # Create Dictionary to with key: title and value: url info
    image_dict = {}
    image_dict['title'] = title
    image_dict['img_url'] = image_url
    
    hemi_img_urls.append(image_dict)

print(hemi_img_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


# Step2: MongoDB and Flask Application

In [None]:

conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)