## Scraping

In [1]:
# Dependencies & Set-up
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd


### NASA Mars News

In [2]:
# Set-up splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

# Set URL
browser.visit('https://mars.nasa.gov/news/')

html = browser.html
soup = bs(html, 'html.parser')


[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324
[WDM] - Driver [/Users/mori/.wdm/drivers/chromedriver/mac64/88.0.4324.96/chromedriver] found in cache






In [3]:
# Search for news titles and teaser paragraphs
results = soup.find_all('ul', class_='item_list')

# Loop through results
for result in results:
    
    title = result.find_all('div', class_='content_title')
    paragraph = result.find_all('div', class_='article_teaser_body')
    
    # Extract the first title and paragraph, and assign to variables
    news_title = title[0].text
    news_paragraph = paragraph[0].text
    
    # Print results
    print(news_title)
    print(news_paragraph)

NASA's Perseverance Mars Rover Mission Honors Navajo Language
Working with the Navajo Nation, the rover team has named features on Mars with words from the Navajo language.


### JPL Mars Space Images - Featured Image

In [4]:
# Open browser to JPL Featured Image

# Set URL
browser.visit('https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html')

html = browser.html
soup = bs(html, 'html.parser')


In [5]:
# Find image relative path
image = soup.find('a', class_='showimg')['href']
print(image)


image/featured/mars3.jpg


In [6]:
# Add relative path to full URL string
featured_image_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/' + image
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg


### Mars Facts

In [7]:
# Establish Mars facts url
url = 'https://space-facts.com/mars/'

# Use Pandas to parse the url
facts = pd.read_html(url)

# Set the data frame
mars_facts = facts[0]

# Assign the column headers
mars_facts.columns = ['Description', 'Value']

# Set Index to Description column without row indexing
mars_facts.set_index('Description', inplace=True)

# Display
mars_facts


Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [8]:
# Convert to html

mars_facts_table = [mars_facts.to_html(classes='data table table-borderless', index=False, header=False, border=0)]
mars_facts_table

['<table border="0" class="dataframe data table table-borderless">\n  <tbody>\n    <tr>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>']

### Mars Hemispheres

In [9]:
# Open browser to USGS Astrogeology site
browser.visit('https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars')


In [13]:
# Set up
html = browser.html
soup = bs(html, 'html.parser')

hemi_names = []

# Search for names of all 4 hemispheres
results = soup.find_all('div', class_='collapsible results')
hemispheres = results[0].find_all('h3')

# Get text and store in list
for name in hemispheres:
    hemi_names.append(name.text)
    
hemi_names

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [14]:
# Click through thumbnail links
thumbnail_results = results[0].find_all('a')
thumbnail_links = []

for thumbnail in thumbnail_results:
    
    if (thumbnail.img):
        thumbnail_url = 'https://astrogeology.usgs.gov/' + thumbnail['href']
        thumbnail_links.append(thumbnail_url)

thumbnail_links      

['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [16]:
# Extract image source of full-sized images
full_imgs = []

for url in thumbnail_links:
    browser.visit(url)
    
    html = browser.html
    soup = bs(html, 'html.parser')
    
    results = soup.find_all('img', class_='wide-image')
    relative_path = results[0]['src']
    
    img_link = 'https://astrogeology.usgs.gov/' + relative_path
    
    full_imgs.append(img_link)

full_imgs

['https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg',
 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']

In [19]:
# Store as a list of dictionaries

mars_hemi_zip = zip(hemi_names, full_imgs)

hemisphere_image_urls = []

for title, img in mars_hemi_zip:
    
    mars_hemi_dict = {}
    mars_hemi_dict['title'] = title
    mars_hemi_dict['img_url'] = img
    
    hemisphere_image_urls.append(mars_hemi_dict)

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [20]:
browser.quit()