In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\tpatel\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


# NASA Mars News
Scrape the Mars News Site and collect the latest News Title and Paragraph Text to be referenced later.

In [3]:
# Website to Scrape data
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [5]:
# Scrape the site to find all news titles
titles = soup.find_all('div', class_='content_title')
news_title = []
for title in titles:
    news_title.append(title.text)

news_title

["From JPL's Mailroom to Mars and Beyond",
 "NASA's Mars 2020 Comes Full Circle",
 'NASA Establishes Board to Initially Review Mars Sample Return Plans',
 "Screening Soon: 'The Pathfinders' Trains Lens on Mars",
 "NASA's Mars 2020 Rover Completes Its First Drive",
 "NASA's Briefcase-Size MarCO Satellite Picks Up Honors",
 'NASA Wins Two Emmy Awards for Interactive Mission Coverage',
 'Mars Is Getting a New Robotic Meteorologist',
 "NASA-JPL Names 'Rolling Stones Rock' on Mars",
 'With Mars Methane Mystery Unsolved, Curiosity Serves Scientists a New One: Oxygen',
 "NASA's New Mars Rover Will Use X-Rays to Hunt Fossils",
 "NASA's MAVEN Observes Martian Night Sky Pulsing in Ultraviolet Light",
 "NASA Engineers Checking InSight's Weather Sensors",
 "Mars Helicopter Attached to NASA's Perseverance Rover",
 "The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover"]

In [6]:
# Scrape the site to find all news paragraphs
paragraphs = soup.find_all('div', class_='article_teaser_body')
news_p = []
for paragraph in paragraphs:
    news_p.append(paragraph.text)

news_p

['Bill Allen has thrived as the mechanical systems design lead for three Mars rover missions, but he got his start as a teenager sorting letters for the NASA center.',
 "Aiming to pinpoint the Martian vehicle's center of gravity, engineers took NASA's 2,300-pound Mars 2020 rover for a spin in the clean room at JPL. ",
 'The board will assist with analysis of current plans and goals for one of the most difficult missions humanity has ever undertaken.',
 'With the Mars 2020 mission ramping up, the documentary — the first of four about past JPL missions to the Red Planet to be shown at Caltech — tells a gripping backstory.',
 'In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.',
 'The twin spacecraft, the first of their kind to fly into deep space, earn a Laureate from Aviation Week & Space Technology.',
 "NASA-JPL's coverage of the Mars InSight landing earns one of the two wins, making this the NASA center's second Emmy.",
 "S

# JPL Mars Space Images - Featured Image
Visit the specified URL, use splinter to navigate the site and find the image url fo rhte current Featured Mars Image.

In [7]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Retrieve image url
div_data=soup.find_all('div', class_='floating_text_area')

# Go through data and find the image href
for div in div_data:
    image_url = div.find('a')['href']

# Join the url with image_url to create the fully qualified image_url to be used
featured_image_url = url + image_url

print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


# Mars Facts
Visit the specified URL, scrape the table containing facts about the planet, includign Diameter, Mass etc. Use Pandas to convert the data to a HTML table string

In [9]:
url = 'https://galaxyfacts-mars.com/'

In [10]:
# Read tables 
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [11]:
# Use first table found to operate on
df = tables[0]
df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [12]:
# Rename columns, reset index, and drop the first row as its unnecessary
df=df.rename(columns={0: "Description", 1:"Mars",2:"Earth"})
df = df.reset_index(drop=True)
df.drop(index=df.index[0], axis=0, inplace=True)
df.head()

Unnamed: 0,Description,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days


In [15]:
# Convert dataframe into html table
html_table = df.to_html()
html_table = html_table.replace('\n','')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Description</th>      <th>Mars</th>      <th>Earth</th>    </tr>  </thead>  <tbody>    <tr>      <th>1</th>      <td>Diameter:</td>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>4</th>      <td>Distance from Sun:</td>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>5</th>      <td>Length of Year:</td>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>6</th>      <td>Temperature:</td>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

# Mars Hemispheres
Visit the specified URL, click links in order to find the image url to the full res image. Save image url and title into a Python dictionary. Append the dicutionary with the image url string and the hemisphere title to a list. The list will contain one dictionary for each hemisphere.

In [None]:
url='https://marshemispheres.com/'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [None]:
items=soup.find_all('div', class_='item')

In [None]:
hemisphere_image_urls  = []
# Loop through each hemisphere item
for item in items:
    try:
        # Find title and append it to list
        hemisphere = item.find('div',class_='description')
        hemisphere_title = hemisphere.h3.text
        
        # Find image urls
        hemisphere_url = item.find('a')['href']
        
        # Visit the URL and extract the image link
        browser.visit(url + hemisphere_url)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        image_url = soup.find('li').a['href']
        
        # Add title and image to a dictionary
        hemisphere_dictionary = {
            "title" : hemisphere_title,
            "img_url" : url + image_url
        }
        
        # Add dictionary to a list
        hemisphere_image_urls.append(hemisphere_dictionary)
        
    except Exception as e:
        print(e)

print(hemisphere_image_urls)