In [2]:
# dependencies 
import pandas as pd
from bs4 import BeautifulSoup as bs
import os 
import time
from splinter import Browser
from urllib.parse import urlsplit
import requests

In [3]:
# chromedriver executable path
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# url to visit 
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)

In [5]:
# write to html with bs
html = browser.html
soup = bs(html,'html.parser')

In [6]:
# collect news_title and <p> text, assign the text to variables
news_title = soup.find('div',class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text
print(f'news_title: {news_title}')
print(f'news_p: {news_p}')

news_title: For InSight, Dust Cleanings Will Yield New Science
news_p: Wind can be crucial to clearing dust from spacecraft solar panels on Mars. With InSight's meteorological sensors, scientists get their first measurements of wind and dust interacting "live" on the Martian surface.  


In [7]:
# visit the url for JPL Featured Space Image
img_url_featured = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url_featured)

# find the featured image fullsize url with bs
html_image = browser.html
soup = bs(html_image, 'html.parser')

featured_img_url = soup.find('article')['style'].replace('background-image: url(','').replace(');', '')[1:-1]
main_url = 'https://www.jpl.nasa.gov'
featured_image_url = main_url + featured_img_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18899-1920x1200.jpg'

In [8]:
# visit url to collect Mars weather
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)

In [9]:
# scrape the latest weather with bs
weather_html = browser.html
soup = bs(weather_html, 'html.parser')
mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

InSight sol 156 (2019-05-05) low -99.2ºC (-146.6ºF) high -18.1ºC (-0.6ºF)
winds from the SW at 4.7 m/s (10.5 mph) gusting to 13.8 m/s (30.8 mph)
pressure at 7.40 hPapic.twitter.com/FlGDeYg8hi


In [10]:
# set url where we will scrape Mars facts
facts_url = 'http://space-facts.com/mars/'

In [11]:
scrape_table = pd.read_html(facts_url)
scrape_table[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
# load scraped table into dataframe, set titles
mars_facts_df = scrape_table[0]
mars_facts_df.rename(columns={0: 'Fact', 1: "Value"}, inplace=True)
mars_facts_df.set_index('Fact')
mars_facts_df

Unnamed: 0,Fact,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# transform to html table
mars_html_table = mars_facts_df.to_html()
mars_html_table.replace('\n', '')
print(mars_html_table)
browser.quit()

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Fact</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
      <td>

In [25]:
# chromedriver executable path
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
# set url where we will scrape hemispheres images
base_url = 'https://astrogeology.usgs.gov'
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Retrieve page with the requests module
response = requests.get(url)

# create bs object and visit url
soup = bs(response.text, 'html.parser')
browser.visit(url)

# parse html with bs
html = browser.html
soup = bs(html, 'html.parser')

# results are returned as an iterable list
hemispheres = soup.find_all('div', class_='item')

# Loop through returned results
hemisphere_img_urls = []


In [26]:
for result in hemispheres:

    try:
        # find title

        title = result.find('h3').text

        # find link
        link = result.find('a', class_='itemLink product-item')['href']
        print("title: ", title)

        # use the full url
        full_link = base_url + link
        print("full-link: ", full_link)

        # go to the link to get to the page with the full image
        response = requests.get(full_link)
        
        # parse with bs 
        soup = bs(response.text, 'html.parser')

        try:

            # get full image url from href
            download = soup.find('div', class_='downloads')
            full_href = download.find('a')['href']
            print('full_href: ', full_href)

            # put title and image url into dictionary
            hemisphere_img_urls.append({'title': title, 'img_url': full_href})


        except Exception as f:
            print('f: ', f)

    except Exception as e:
        print('e: ', e)

hemisphere_img_urls

title:  Cerberus Hemisphere Enhanced
full-link:  https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
full_href:  http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
title:  Schiaparelli Hemisphere Enhanced
full-link:  https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
full_href:  http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
title:  Syrtis Major Hemisphere Enhanced
full-link:  https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
full_href:  http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
title:  Valles Marineris Hemisphere Enhanced
full-link:  https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced
full_href:  http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]