In [1]:
# Matthew Lett
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager


In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\matt\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


## NASA Mars News

In [3]:
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retrieve all elements that contain headline information
titles = soup.find_all('div', class_='content_title')

news_title = titles[0].get_text() #first position
news_title

"NASA InSight's 'Mole' Is Out of Sight"

In [5]:
# Retrieve all elements that contain book information
paragraphs = soup.find_all('div', class_='article_teaser_body')

news_p = paragraphs[0].get_text() #first position

print(news_p)
# browser.quit

Now that the heat probe is just below the Martian surface, InSight's arm will scoop some additional soil on top to help it keep digging so it can take Mars' temperature.


## JPL Mars Space Images - Featured Image

In [6]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [7]:
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

In [8]:
# Click the Full Image button
featured_img_url = ''

# Click the button
browser.links.find_by_partial_text("FULL IMAGE").click()

# Read the new html
html = browser.html
img_soup = BeautifulSoup(html, 'html.parser')

# Look for the image name
space_image = img_soup.find('img', class_='fancybox-image').get('src')

# combine the image with the site
featured_img_url = url + space_image
print(featured_img_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


## Mars Facts

In [9]:
import pandas as pd
url = 'https://galaxyfacts-mars.com/'

In [10]:
tables = pd.read_html(url)[1]
tables

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [11]:
tables.columns=['description', 'data']
tables.set_index('description', inplace=True)
tables

Unnamed: 0_level_0,data
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 ( Phobos & Deimos )
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# Convert to an html table string
html_table = tables.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>data</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 ( Phobos &amp; Deimos )</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

## Mars Hemispheres

In [13]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [14]:
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

In [15]:
# get the links for the hemispheres
links = browser.links.find_by_partial_text('Hemisphere Enhanced')

# setup an empty list
hemisphere_image_urls = []

# loop through the links to move to the different pages
for link in range(len(links)):
    # Read the new html
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # click the link to move the hemisphere page
    browser.links.find_by_partial_text('Hemisphere Enhanced')[link].click()
    
    # Read the new html
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    #scrape title and image url
    title = soup.find('h2', class_='title')
    title_text = title.text
    img_url = soup.find('img', class_='wide-image').get('src')
    
    # create a dictionary out of the information
    hemi_dict = {
                'title': title_text,
                'img_url': url + img_url
    }
    
    # add the dictionary to the list
    hemisphere_image_urls.append(hemi_dict)
    
    # return to the previous page so we can continue to the next link
    browser.back()

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [17]:
browser.quit()