In [1]:
## Import Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
from flask import Flask, render_template, redirect
from flask_pymongo import PyMongo
import pandas as pd

## NASA Mars News

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 97.0.4692
Get LATEST chromedriver version for 97.0.4692 google-chrome
Driver [C:\Users\shrey\.wdm\drivers\chromedriver\win32\97.0.4692.71\chromedriver.exe] found in cache


In [3]:
# Scrape news title and news paragraph -> https://redplanetscience.com/
url = "https://redplanetscience.com/"
browser.visit(url)
html = browser.html
soup = bs(html,'html.parser')

In [4]:
# Retrieve the latest news title
news_title = soup.find('div', class_ = 'content_title').text
# Retrieve the latest news paragraph
news_para = soup.find('div', class_ = 'article_teaser_body').text
# Print news title and paragraph
print(f"News Title: {news_title} \n\nNews Paragraph: {news_para}")

News Title: NASA Wins 4 Webbys, 4 People's Voice Awards 

News Paragraph: Winners include the JPL-managed "Send Your Name to Mars" campaign, NASA's Global Climate Change website and Solar System Interactive.


In [5]:
# Scrape Mars Image -> https://spaceimages-mars.com/
jpl_image_url = "https://spaceimages-mars.com/"
browser.visit(jpl_image_url)
html = browser.html
soup = bs(html,"html.parser")

In [6]:
# Retrieve image
image_url = soup.find_all("img", class_="headerimage fade-in")
# Retrieve featured image URL
featured_image_url=f"{jpl_image_url}{image_url[0]['src']}"
# Print image URL
print(f"Link to the Featured Mars Image: {featured_image_url}")

Link to the Featured Mars Image: https://spaceimages-mars.com/image/featured/mars1.jpg


In [7]:
# Scrape Mars facts -> https://galaxyfacts-mars.com/
url = 'https://galaxyfacts-mars.com/'
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [8]:
mars_fact = tables [1]

In [9]:
mars_fact = mars_fact.rename(columns={0:"Profile",1:"Value"}, errors="raise")
mars_fact.set_index("Profile", inplace=True)
mars_fact

Unnamed: 0_level_0,Value
Profile,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
fact_table = mars_fact.to_html()
fact_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Profile</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n 

In [11]:
fact_table.replace('\n','')
print(fact_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Profile</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 ( Phobos &amp; Deimos )</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


## Mars Hemispheres

In [12]:
# Scrape Mars Hemispheres URL -> https://marshemispheres.com/
hemisphere_url = "https://marshemispheres.com/"
browser.visit(hemisphere_url)
html = browser.html
soup = bs(html,'html.parser')

In [13]:
def scrape_img(search_url):
    # Retrieve page with the requests module
    response = requests.get(search_url)
    soup = bs(response.text, "html5lib")
    # Store high resolution image URL to create a final hemisphere image URL
    hem_img_url = soup.find('img', class_='wide-image')['src']
    final_img_url = hemisphere_url + hem_img_url
    # Store the image url information into a dictionary
    title_img_dicts['image_url'] = final_img_url
    
    return (title_img_dicts['image_url'])          
            
def dict_to_list(title_img_dicts):
    new_dict = {}
    
    copy_dict = title_img_dicts.copy()
    new_dict.update(copy_dict)
    
    return (new_dict)

In [14]:
# Use a Python dictionary to store the data using the keys image URL and title

img_containers = soup.find_all('div', class_='item')

# Empty list to import image reference links
img_url = []

# Empty dictionary to holds title information and URL of image
title_img_dicts = {}

# Empty list to hold each dictionary of title and full img url
hemisphere_img_urls = []

# Extract the title and full image URL
for img in img_containers:
    title_img_dicts['title'] = img.find('h3').text
    img_link = img.find('a', class_='itemLink product-item')['href']
    img_url.append(img_link)
    img_url_list = [hemisphere_url + url for url in img_url]
    
    for search_url in img_url_list:
        scrape_img(search_url)
    
# Append the empty dictionary with the image URL string and the hemisphere title

    hemisphere_img_urls.append(dict_to_list(title_img_dicts))

In [15]:
# Display the dictionary
hemisphere_img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'image_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'image_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'image_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'image_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [16]:
# Create dictionary for all info scraped from sources above
mars_dict={
    "news_title":news_title,
    "news_para":news_para,
    "featured_image_url":featured_image_url,
    "fact_table":fact_table,
    "hemisphere_images":hemisphere_img_urls
}

In [17]:
# Display the dictionary
mars_dict

{'news_title': "NASA Wins 4 Webbys, 4 People's Voice Awards",
 'news_para': 'Winners include the JPL-managed "Send Your Name to Mars" campaign, NASA\'s Global Climate Change website and Solar System Interactive.',
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars1.jpg',
 'fact_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Profile</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\