In [1]:
import pandas as pd
import numpy as np

from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 92.0.4515
Get LATEST driver version for 92.0.4515
Driver [C:\Users\Sophia\.wdm\drivers\chromedriver\win32\92.0.4515.107\chromedriver.exe] found in cache


In [3]:
# NASA Mars News

# Get splinter to open browser at url
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Examine the html code
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
# print(soup.body.prettify())

In [5]:
# Get article titles and teasers
news_titles = soup.find_all(class_='content_title')
news_teasers = soup.find_all(class_='article_teaser_body')

titles = []
teasers = []

for news_title in news_titles:
    for news_teaser in news_teasers:
        titles.append(news_title.text)
        teasers.append(news_teaser.text)

In [6]:
# Create data frame (cause why not?)
nasa_news = {"Title": titles, "Teaser": teasers}
df = pd.DataFrame(nasa_news)

# Save into variables
nasa_news_title = df['Title'].iloc[0]
nasa_news_teaser = df['Teaser'].iloc[0]
print(nasa_news_title)

All About the Laser (and Microphone) Atop Mars 2020, NASA's Next Rover


In [7]:
# JPL Mars Space Images - Featured Image
# Get splinter to open browser at url
url2 = 'https://spaceimages-mars.com/'
browser.visit(url2)

# Examine the html code
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
# print(soup.body.prettify())

In [8]:
# Find featured image data
featured_img = soup.find_all(class_='headerimage')

# Grab the image source and create the complete url, storing in a new variable
for item in featured_img:
    featured_img_url = url2 + item['src']
featured_img_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

In [9]:
# Mars facts

url3 = 'https://galaxyfacts-mars.com/'
# Get splinter to open browser at url
tables = pd.read_html(url3)

# Save tables separately
mars_earth_comparison = tables[0].copy()
mars_profile = tables[1].copy()

# Add column headers
mars_earth_comparison.columns =['', 'Mars', 'Earth']
# Remove non-header row with duplicate column names
mars_earth_comparison = mars_earth_comparison.drop(mars_earth_comparison.index[[0]])
mars_earth_comparison = mars_earth_comparison.set_index('')

# Add column headers
mars_profile.columns =['Mars Planet Profile', '']
mars_profile = mars_profile.set_index('Mars Planet Profile')
mars_profile
#df to html - see sean's code to get bootstrap table
mars_profile = mars_profile.to_html(classes="table table-striped")
mars_profile

'<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n    </tr>\n    <tr>\n      <th>Mars Planet Profile</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian ast

In [10]:
# Mars Hemispheres

url4 = 'https://marshemispheres.com/'
# Get splinter to open browser at url
browser.visit(url4)

# Examine the html code
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
# print(soup.body.prettify())

In [11]:
# Mars Hemispheres

# Create empty list to hold dictionaries of each hemisphere image with the title & image url
hemisphere_image_urls = []

# find the links (to help with navigation from page to page)
html_info = soup.find_all('h3')
links = [html_info[0].text, html_info[1].text, html_info[2].text, html_info[3].text]
back = html_info[4].text

for link in links:
    mars_dict = {}
    browser.find_by_text(link).click()
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    images = soup.find_all(class_='wide-image')
    titles = soup.find_all(class_ = 'title')
    
    for title in titles:
        for image in images:
            mars_dict["title"] = title.text
            mars_dict["img_url"] = url4 + image['src']
            hemisphere_image_urls.append(mars_dict)
        
    browser.find_by_text(back).click()

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [12]:
mars_data = { 
    'mars news': [nasa_news_title, nasa_news_teaser],
    'mars featured image': featured_img_url,
    'mars facts': mars_profile,
    'mars hemispheres': hemisphere_image_urls
}
mars_data

{'mars news': ["All About the Laser (and Microphone) Atop Mars 2020, NASA's Next Rover",
  'SuperCam is a rock-vaporizing instrument that will help scientists hunt for Mars fossils.'],
 'mars featured image': 'https://spaceimages-mars.com/image/featured/mars1.jpg',
 'mars facts': '<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th></th>\n    </tr>\n    <tr>\n      <th>Mars Planet Profile</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n

In [13]:
browser.quit()