In [1]:
# Dependencies and Setup
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
import pymongo
import datetime as dt
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# "C:\driver"
# use chrome's driver manager
executable_path = {'executable_path':'C:\\driver\\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# call url here
nasa_url = "https://redplanetscience.com/"
browser.visit(nasa_url)

html = browser.html
nasa_soup = BeautifulSoup(html, "html.parser")

In [4]:
# NASA News
#Scrape the Mars News Site https://redplanetscience.com/
#Assign the text to variables that you can reference later.

#collect the latest News Title 
newest_title = nasa_soup.find("div", class_="content_title").text
print(newest_title)
#collect the Paragraph Text. 
newest_par = nasa_soup.find("div", class_="article_teaser_body").text
print(newest_par)

InSight's 'Mole' Team Peers into the Pit
Efforts to save the heat probe continue.


In [5]:
# Images
image_url = "https://spaceimages-mars.com/"
browser.visit(image_url)
html = browser.html
image_soup = BeautifulSoup(html, "html.parser")

In [6]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image 
#assign the url string to a variable called featured_image_url.
# combine the full url
image_query = image_soup.find("div", class_="header")
next_query = image_query.find_all("a")
url_list= []
for i in next_query:
    url_list.append(i.get("href"))
url_list

['#', '#', 'image/featured/mars1.jpg']

In [7]:
feature_image_url = image_url + url_list[2]
feature_image_url

'https://spaceimages-mars.com/image/featured/mars1.jpg'

In [8]:
# Facts with pandas
#use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
facts_df = pd.read_html("https://galaxyfacts-mars.com/")[0]
facts_df.columns=facts_df.iloc[0]
facts_df.set_index("Mars - Earth Comparison", inplace=True)
final_df = facts_df.drop("Mars - Earth Comparison")
final_df

Unnamed: 0_level_0,Mars,Earth
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [9]:
facts_html = final_df.to_html(classes="table table-striped")

In [10]:
#Use Pandas to convert the data to a HTML table string.
#
print(facts_html)

<table border="1" class="dataframe table table-striped">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
    <tr>
      <th>Mars - Earth Comparison</th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Diameter:</th>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Distance from Sun:</th>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>Length of Year:</th>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>Temperature:</th>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


In [11]:
# Hemispheres
# https://marshemispheres.com/
#Visit the astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

# hard coded; see activity 10 for doing this dynamically in the future
# list of all hemispheres
hemis=['Cerberus Hemisphere Enhanced',
       'Schiaparelli Hemisphere Enhanced',
       'Syrtis Major Hemisphere Enhanced',
       'Valles Marineris Hemisphere Enhanced']
# list to be filled
hemis_url=[]

#Save both the image url string for the full resolution hemisphere image, 
#and the Hemisphere title containing the hemisphere name. 
#Use a Python dictionary to store the data using the keys img_url and title.
for h in hemis: 
    
    hemi_url="https://marshemispheres.com/"
    browser.visit(hemi_url)
    browser.is_element_present_by_text(h, wait_time=1)
    link = browser.links.find_by_partial_text(h)
    
    #click into new page
    link.click()
    
    # get full image
    full_image = browser.find_by_id('wide-image-toggle')
    #select
    full_image.click()
    
    hemi_soup=BeautifulSoup(browser.html, 'html.parser')
    image = hemi_soup.body.find('img', class_='wide-image')
    image_link = image['src']
    image_url=f"{hemi_url}{image_link}"
    
    #append to list
    hemis_url.append(image_url)

In [12]:
#This list will contain one dictionary for each hemisphere
# fill in using the now completed list asw as the given names
hemisphere_image_urls = [
    {"title": "Cerberus Hemisphere", "img_url": hemis_url[0]},
    {"title": "Schiaparelli Hemisphere", "img_url":hemis_url[1]},
    {"title": "Syrtis Major Hemisphere", "img_url": hemis_url[2]},
    {"title": "Valles Marineris Hemisphere", "img_url": hemis_url[3]},]

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [13]:
mars_info = {"newest_title": newest_title,
            "newest_paragraph": newest_par,
            "feature_image": feature_image_url,
            "facts": facts_html,
            "hemispheres": hemisphere_image_urls,
            "last_modified": dt.datetime.now()}
mars_info

{'newest_title': "InSight's 'Mole' Team Peers into the Pit",
 'newest_paragraph': 'Efforts to save the heat probe continue.',
 'feature_image': 'https://spaceimages-mars.com/image/featured/mars1.jpg',
 'facts': '<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 

In [27]:
#export to pymongo
conn = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn)

db = client.mars_db
#allows for repetition if testing multiple times
db.mars.drop()

In [28]:
db.mars.insert_one(mars_info)

<pymongo.results.InsertOneResult at 0x2a1cc7fe888>