In [1]:
# Import dependenices
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
# Create executable path to open a browser for the scraping: using WebDriver Manager
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [/Users/juliabrunett/.wdm/drivers/chromedriver/mac64/91.0.4472.19/chromedriver] found in cache


## NASA Mars News

In [3]:
# Define the url 
url = 'https://redplanetscience.com/'

In [4]:
# Visit the website using splinter
browser.visit(url)

In [5]:
# Iterate once to find the container, and then the latest news title and paragraph
for x in range(1):
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # Define the container
    container = soup.find('section', class_='image_and_description_container')
    
    # Grab the title and paragraph text
    news_title = container.find('div', class_='content_title').text
    news_p = container.find('div', class_='article_teaser_body').text
    
    print(news_title)
    print(news_p)
    

A Year of Surprising Science From NASA's InSight Mars Mission
A batch of new papers summarizes the lander's findings above and below the surface of the Red Planet.


## JPL Mars Space Images

In [6]:
# Url for jpl images
url = 'https://spaceimages-mars.com/'
# Visit the website
browser.visit(url)

In [7]:
# Iterate once to find the container, and then click the button to see the full image
# find the url for the latest featured image
for x in range(1):
    html = browser.html
    soup = bs(html, 'html.parser')
    
    # Define the container
    container = soup.find('div', class_='floating_text_area')
    
    # Click on the full image button to get the full image
    browser.links.find_by_partial_text('FULL IMAGE').click()
    
    # Define the box
    box = container.find('a', class_='showimg fancybox-thumbs')
    
    # Grab the featured image url
    featured_image_url = box['href']
    featured_image_url = url + featured_image_url
    
    print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


## Mars Facts:

In [8]:
# Url for table facts
url = 'https://galaxyfacts-mars.com/'

In [9]:
# Find table data and load into pandas
tables = pd.read_html(url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [10]:
# Create a dataframe from the first table
mars_earth_df = tables[0]
mars_earth_df = mars_earth_df.rename(columns = mars_earth_df.iloc[0])
mars_earth_df = mars_earth_df.set_index("Mars - Earth Comparison")
mars_earth_df = mars_earth_df.iloc[1:]
mars_earth_df

Unnamed: 0_level_0,Mars,Earth
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [23]:
# Create html table for mars facts
html_table = mars_earth_df.to_html(index_names=False, justify="center", border = 0, classes=["table", "table-striped",
                                                                                "table-bordered", "table-hover"])

In [24]:
# Print html table for mars facts
print(html_table)

<table border="0" class="dataframe table table-striped table-bordered table-hover">
  <thead>
    <tr style="text-align: center;">
      <th></th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Diameter:</th>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Distance from Sun:</th>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>Length of Year:</th>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>Temperature:</th>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


In [58]:
header_list = []
data_list = []

for x in range(1):
    soup = bs(html_table, 'html.parser')
    headers = soup.find_all('th')
    data = soup.find_all('td')
    
    for header in headers:
        header_list.append(header.text)
    
    for x in data:
        data_list.append(x.text)
        
data_list

['6,779 km',
 '12,742 km',
 '6.39 × 10^23 kg',
 '5.97 × 10^24 kg',
 '2',
 '1',
 '227,943,824 km',
 '149,598,262 km',
 '687 Earth days',
 '365.24 days',
 '-87 to -5 °C',
 '-88 to 58°C']

## Mars Hemispheres:

In [13]:
# Define the url
url = 'https://marshemispheres.com/'

# Visit the url
browser.visit(url)

In [14]:
# Access the html and parse through it
html = browser.html
soup = bs(html, 'html.parser')
    
# Create a hemispheres list
hemisphere_image_urls = []
    
# Get the 4 descriptions for the website - h3
descriptions = soup.find_all('h3', limit=4)

In [15]:
# Iterate through the descriptions
for title in descriptions:
            
    # Click on the description to get the full image
    browser.links.find_by_partial_text(title.text).click()
            
    # Create a new html browser query for the new page
    html = browser.html
    soup = bs(html, 'html.parser')
            
    # Find the downloads box
    downloads = soup.find('div', class_='downloads')
            
    # Find the downloads list
    img_list = downloads.find('li')
            
    # Get the first list object's href
    img_url = img_list.a['href']
    
    # Create the full url for the image
    img_url = url + img_url
    
    # Create a dictionary of the values
    dictionary = {
        "title": title.text,
        "img_url": img_url
        }
    
    # Append the dictionary to the hemispheres list
    hemisphere_image_urls.append(dictionary) 
    
    # Navigate back to the main page
    browser.back()

In [16]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]
