In [1]:
# Dependencies
import pandas as pd

from splinter import Browser
from bs4 import BeautifulSoup
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
htmlMarsTable = './assets/pages/table.html'

In [3]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [/Users/stevewalker/.wdm/drivers/chromedriver/mac64/91.0.4472.19/chromedriver] found in cache


## NASA Mars News

In [4]:
# URL of page to be scraped
urlRPS = 'https://redplanetscience.com'
browser.visit(urlRPS)

In [5]:
# User BeautifulSoup to go to URL and get elements
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Error handling
try:
    # Identify and return news title
    newsT = soup.find('div', class_='content_title').text
    # Identify and return news paragraph
    newsP = soup.find('div',class_='article_teaser_body').text
    # Identify and return news date
    newsD = soup.find('div',class_='list_date').text
    

    # Print results only if title and paragraph exist
    if (newsT and newsP):
        print('-------------')
        print(newsD)
        print(newsT)
        print(newsP)
except AttributeError as e:
    print(e)

-------------
June 8, 2021
Follow NASA's Perseverance Rover in Real Time on Its Way to Mars
A crisply rendered web application can show you where the agency's Mars 2020 mission is right now as it makes its way to the Red Planet for a Feb. 18, 2021, landing.


## JPL Mars Space Images - Featured Image

In [6]:
# URL of page to be scraped
urlSIM = 'https://spaceimages-mars.com'
browser.visit(urlSIM)

In [7]:
# Get HTML
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Capture image information 
featureImageSrc = soup.find('img', class_='headerimage')['src']
#featureImageSrc #sanitycheck

# Capture image URL 
featureImageUrl = f"{urlSIM}/{featureImageSrc}"
featureImageUrl

'https://spaceimages-mars.com/image/featured/mars3.jpg'

## Mars Facts

In [20]:
# URL of page to be scraped
urlGF = 'https://galaxyfacts-mars.com'

# Load URL into pandas
tables = pd.read_html(urlGF)

#sanity checks
#tables 
#type(tables) #list

In [33]:
# Create df of the first table on the page
df = tables[0]

df=df.rename(columns={0:"Description",1:"Mars",2:"Earth"},errors="raise")
df.set_index("Description",inplace=True)
#df #sanity check    

# convert df to html
marsDataHtml = df.to_html(classes=['table','table striped','table hover']).replace('\n','')
marsDataHtml

'<table border="1" class="dataframe table table striped table hover">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>      <th>Earth</th>    </tr>    <tr>      <th>Description</th>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Mars - Earth Comparison</th>      <td>Mars</td>      <td>Earth</td>    </tr>    <tr>      <th>Diameter:</th>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>Moons:</th>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>Distance from Sun:</th>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>Length of Year:</th>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>Temperature:</th>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

## Mars Hemispheres

In [10]:
# URL of page to be scraped
urlMH = 'https://marshemispheres.com'
browser.visit(urlMH)

In [11]:
# User BSoup to go to URL and get elements
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Create variable to hold the list of links
items = soup.find_all('div', class_='item')
#items #sanity check

# Capture list of URLS for the pictures 
# by looping through tags
urlList = []

for item in items:
    picDict = {} # going to pass in both the title and URL into dict
    
    picUrl = item.find('a')['href'] #the URL
     
    picTitle = item.find('h3').text
    picTitle = picTitle.replace(' Enhanced','') #removed unneeded syntax
    
    picDict[picTitle]=f"{urlMH}/{picUrl}"
    urlList.append(picDict)  #combines the parent URL to pic URL

#urlList #sanity check

In [12]:
# Create list of pic URLS for the full resolution pics
picsList = []

for url in urlList:
    #print(url) #sanity check
    for key, value in url.items():
        #print(key +" - " + value)
        browser.visit(value)
        
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        
        picsDict = {}
        
        # Capture image information 
        imageSrc = soup.find('img', class_='wide-image')['src']
        #print(ImageSrc) #sanitycheck

        # Capture full image URL 
        imageUrl = f"{urlMH}/{imageSrc}"
        #print(ImageUrl) #sanitycheck
        
        # Add above to dict
        picsDict["title"] = key
        picsDict["img_url"] = imageUrl
        
        # Add dict to list
        picsList.append(picsDict)
        
picsList

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [13]:
browser.quit()