In [1]:
from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [C:\Users\tpatel\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


# NASA Mars News
Scrape the Mars News Site and collect the latest News Title and Paragraph Text to be referenced later.

In [3]:
# Website to Scrape data
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')


In [5]:
# Scrape the site to find all news titles
titles = soup.find_all('div', class_='content_title')
news_title = []
for title in titles:
    news_title.append(title.text)

news_title

['NASA to Hold Mars 2020 Perseverance Rover Launch Briefing',
 "Screening Soon: 'The Pathfinders' Trains Lens on Mars",
 'Join NASA for the Launch of the Mars 2020 Perseverance Rover',
 "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
 "A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes",
 "NASA's New Mars Rover Is Ready for Space Lasers",
 'NASA Administrator Statement on Moon to Mars Initiative, FY 2021 Budget',
 "8 Martian Postcards to Celebrate Curiosity's Landing Anniversary",
 'Mars 2020 Unwrapped and Ready for More Testing',
 "NASA-JPL Names 'Rolling Stones Rock' on Mars",
 "6 Things to Know About NASA's Ingenuity Mars Helicopter",
 "The Extraordinary Sample-Gathering System of NASA's Perseverance Mars Rover",
 'New Selfie Shows Curiosity, the Mars Chemist',
 "Alabama High School Student Names NASA's Mars Helicopter",
 'NASA Perseverance Mars Rover Scientists Train in the Nevada Desert']

In [6]:
# Scrape the site to find all news paragraphs
paragraphs = soup.find_all('div', class_='article_teaser_body')
news_p = []
for paragraph in paragraphs:
    news_p.append(paragraph.text)

news_p

["Learn more about the agency's next Red Planet mission during a live event on June 17.",
 'With the Mars 2020 mission ramping up, the documentary — the first of four about past JPL missions to the Red Planet to be shown at Caltech — tells a gripping backstory.',
 'No matter where you live, choose from a menu of activities to join NASA as we "Countdown to Mars" and launch the Perseverance rover to the Red Planet.',
 "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July.",
 "Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. ",
 'Perseverance is one of a few Mars spacecraft carrying laser retroreflectors. The devices could provide new science and safer Mars landings in the future.',
 "Jim Bridenstine addresses NASA's ambitious plans for the coming years, inclu

# JPL Mars Space Images - Featured Image
Visit the specified URL, use splinter to navigate the site and find the image url fo rhte current Featured Mars Image.

In [7]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Retrieve image url
div_data=soup.find_all('div', class_='floating_text_area')

# Go through data and find the image href
for div in div_data:
    image_url = div.find('a')['href']

# Join the url with image_url to create the fully qualified image_url to be used
featured_image_url = url + image_url

print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars1.jpg


# Mars Facts
Visit the specified URL, scrape the table containing facts about the planet, includign Diameter, Mass etc. Use Pandas to convert the data to a HTML table string

In [9]:
url = 'https://galaxyfacts-mars.com/'

In [10]:
# Read tables 
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [11]:
# Use first table found to operate on
df = tables[0]
df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [12]:
# Rename columns, reset index, and drop the first row as its unnecessary
df=df.rename(columns={0: "Description", 1:"Mars",2:"Earth"})
df = df.reset_index(drop=True)
df.drop(index=df.index[0], axis=0, inplace=True)
df.head()

Unnamed: 0,Description,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days


In [13]:
# Convert dataframe into html table
html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>1</th>\n      <td>Diameter:</td>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Distance from Sun:</td>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Length of Year:</td>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Temperature:</td>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

# Mars Hemispheres
Visit the specified URL, click links in order to find the image url to the full res image. Save image url and title into a Python dictionary. Append the dicutionary with the image url string and the hemisphere title to a list. The list will contain one dictionary for each hemisphere.

In [14]:
url='https://marshemispheres.com/'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [15]:
items=soup.find_all('div', class_='item')

In [17]:
hemisphere_image_urls  = []
# Loop through each hemisphere item
for item in items:
    try:
        # Find title and append it to list
        hemisphere = item.find('div',class_='description')
        hemisphere_title = hemisphere.h3.text
        
        # Find image urls
        hemisphere_url = item.find('a')['href']
        
        # Visit the URL and extract the image link
        browser.visit(url + hemisphere_url)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        image_url = soup.find('li').a['href']
        
        # Add title and image to a dictionary
        hemisphere_dictionary = {
            "title" : hemisphere_title,
            "img_url" : url + image_url
        }
        
        # Add dictionary to a list
        hemisphere_image_urls.append(hemisphere_dictionary)
        
    except Exception as e:
        print(e)

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]
