## Mars Mission Web Scrape

In [2]:
# Dependencies
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests

from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

import pymongo

#### NASA Mars News

In [18]:
# Set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit webpage and scrape the page
url = "https://redplanetscience.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")

# Get the news
news = soup.find('div', id = 'news')

news_title = news.find_all('div', class_='content_title')[0].text
news_teaser = news.find_all('div', class_='article_teaser_body')[0].text

print(news_title)
print(news_teaser)

# Close the browser
browser.quit()




Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\skim3\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


Alabama High School Student Names NASA's Mars Helicopter
Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.


#### Space Image

In [29]:
# Set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Visit webpage and scrape the page
url = "https://spaceimages-mars.com/"
browser.visit(url)
html = browser.html
soup = bs(html, "html.parser")

# Get the image
space_img = soup.find('a', class_='showimg')

featured_image_url = f"{url}{space_img['href']}"

print(featured_image_url)

# Close the browser
browser.quit()



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\skim3\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


https://spaceimages-mars.com/image/featured/mars3.jpg


#### Mars Facts Tabulation

In [31]:
# Get tabular data from webpage
url = "https://galaxyfacts-mars.com/"
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [35]:
# Convert table to a datframe
mars_df = tables[0]
mars_df.head(10)

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [42]:
# Reformat dataframe
clean_mars_df = mars_df.rename(columns= {0: 'Mars-Earth Comparisons', 1: 'Mars', 2:'Earth'})
clean_mars_df = clean_mars_df.drop(0, axis = 0)
clean_mars_df.set_index('Mars-Earth Comparisons', inplace = True)
clean_mars_df.head()

Unnamed: 0_level_0,Mars,Earth
Mars-Earth Comparisons,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days


In [43]:
# Convert dataframe to an HTML string
mars_html_table = clean_mars_df.to_html()
mars_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars-Earth Comparisons</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [44]:
# Export table as html file
clean_mars_df.to_html('mars_table.html')

#### Hemisphere Images

In [47]:
# Create url strings
base_url = 'https://marshemispheres.com/'
hemi_urls = [
    'cerberus.html', 
    'schiaparelli.html',
    'syrtis.html',
    'valles.html'
]

In [48]:
# Loop through hemisphere list and scrape images and titles
img_urls = []
hemi_titles = []

# Set up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

for url_extension in hemi_urls:
    
    # Visit webpage and scrape the page
    browser.visit(base_url+url_extension)
    html = browser.html
    soup = bs(html, "html.parser")

    # Get the image
    downloads = soup.find('div', class_ = 'downloads')
    img_url = downloads.find_all('a')[0]['href']

    full_img_url = f"{base_url}{img_url}"
    
    img_urls.append(full_img_url)
    
    # Get the title
    cover = soup.find('div', class_ = 'cover')
    title = cover.h2.text.replace(' Enhanced', '')
    hemi_titles.append(title)
    
    print(f"{full_img_url} and {title} added.")

# Close the browser
browser.quit()



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\skim3\.wdm\drivers\chromedriver\win32\91.0.4472.101\chromedriver.exe] found in cache


https://marshemispheres.com/images/full.jpg and Cerberus Hemisphere added.
https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg and Schiaparelli Hemisphere added.
https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg and Syrtis Major Hemisphere added.
https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg and Valles Marineris Hemisphere added.


In [51]:
# Create list of dictionaries to hold titles and urls
hemi_img_urls = []
for i in range(len(img_urls)):
    hemi_dict = {'title': hemi_titles[i], 'img_url': img_urls[i]}
    hemi_img_urls.append(hemi_dict)
    
hemi_img_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]