## Dependencies

In [1]:
# Dependencies
import pandas as pd
import requests
from splinter import Browser
from bs4 import BeautifulSoup
from flask import Flask, render_template, redirect, url_for
from flask_pymongo import PyMongo
import pymongo
import datetime
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# to setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\lcswi\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


## NASA Mars News - Scrape the Mars News Site and collect the latest News Title and Paragraph Text

In [3]:
# URL of page to be scraped
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
html = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(html, 'html.parser')

In [5]:
news_title = soup.find_all('div', class_='content_title')[0].text
news_p = soup.find_all('div', class_='article_teaser_body')[0].text

print(news_title)
print('---------------------------------------------------------------------------')
print(news_p)

HiRISE Views NASA's InSight and Curiosity on Mars
---------------------------------------------------------------------------
New images taken from space offer the clearest orbital glimpse yet of InSight as well as a view of Curiosity rolling along.


## JPL Mars Space Images - Featured Image 

In [6]:
# URL of page to be scraped
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [7]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup_JPL = BeautifulSoup(html, 'html.parser')

In [8]:
# identify and return the url string for the featured_image_url 
image_path = soup_JPL.find_all('img')[1]['src']
featured_image_url = url + image_path
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


## MARS Facts - Table Mars facts including diameter, mass, etc

In [9]:
# We can use the read_html function in Pandas to automatically scrape any tabular data from a page.
url = 'https://galaxyfacts-mars.com'
tables = pd.read_html(url)
table = tables[1]
table.columns = ['Mars Planet Profile', "Dimentions"]
Mars_Table = table.drop(index=0)
Mars_Table.set_index('Mars Planet Profile', inplace=True)
Mars_Table

Unnamed: 0_level_0,Dimentions
Mars Planet Profile,Unnamed: 1_level_1
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
# comparison table converted from dataframe to html table (note: all "\n" - unwanted news lines, have been dropped from html table)
html_table = Mars_Table.to_html()
clean_html_table = html_table.replace('\n', '')
print(clean_html_table)

<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Dimentions</th>    </tr>    <tr>      <th>Mars Planet Profile</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 ( Phobos &amp; Deimos )</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>


In [11]:
# Saving the Mars Earth Comparison table directly to an HTML table string called "table.html"
Mars_Table_HTML = Mars_Table.to_html('table.html')

In [12]:
# to print out html saved to file - "table.html"
Mars_Table_HTML = Mars_Table.to_html()
print(Mars_Table_HTML)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Dimentions</th>
    </tr>
    <tr>
      <th>Mars Planet Profile</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 ( Phobos &amp; Deimos )</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## MARS Hemispheres

In [13]:
url_hemisphere = "https://marshemispheres.com"
browser.visit(url_hemisphere)
html_hemisphere = browser.html
soup = BeautifulSoup (html_hemisphere, "html.parser")

In [14]:
hemispheres = soup.find_all("div", class_="item")
hemispheres_info = []
hemispheres_url = "https://marshemispheres.com/"

for i in hemispheres:
    title = i.find("h3").text
    hemispheres_img = i.find("a", class_="itemLink product-item")["href"]
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_url + hemispheres_img)
    
    # HTML Object
    image_html = browser.html
    web_info = BeautifulSoup(image_html, "html.parser")
    
    # Create full image url
    img_url = hemispheres_url + web_info.find("img", class_="wide-image")["src"]
    
    hemispheres_info.append({"title" : title, "img_url" : img_url})

In [15]:
hemispheres_info

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [16]:
browser.quit()