In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd
import time

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Getting NASA Mars News

# URL of page to be scraped
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Retrieve page
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [4]:
# get 1st HTML results of "li" and "slide" class
result = soup.find('li', class_='slide')

In [5]:
# get News Title 
news_title = result.find('div', class_='content_title').text
news_title

'NASA Seeking Partner in Contest to Name Next Mars Rover'

In [6]:
# get News paragragh text of 1st article
news_p = result.find('div', class_='article_teaser_body').text
news_p

'NASA has a class assignment for corporations, nonprofits and educational organizations involved in science and space exploration: partner with the agency to inspire future engineers and scientists by sponsoring a contest to name the next rover to venture to the Red Planet.'

In [7]:
# Getting JPL Mars Space Images - Featured Image

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# URL of page to be scraped
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# visit the page
browser.visit(url)

# click "FULL IMAGE"
browser.click_link_by_partial_text('FULL IMAGE')

# wait for 1 second
time.sleep(1)

# click "more info"
browser.click_link_by_partial_text('more info')

# Retrieve page
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [8]:
# Getting image url element
featured_image_url_element = soup.find('figure', class_='lede').a['href']
featured_image_url = 'https://www.jpl.nasa.gov' + featured_image_url_element
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19046_hires.jpg'

In [9]:
# Getting Mars Weather

# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [10]:
tweets = soup.find_all('div', class_='tweet')

weather_list = []

for tweet in tweets:
    if "Sol" in tweet.p.text and "high" in tweet.p.text and "low" in tweet.p.text and "pressure" in tweet.p.text and "daylight" in tweet.p.text :
        weather_list.append(tweet.p.text)
        
mars_weather = weather_list[0]

In [11]:
mars_weather

'Sol 2171 (2018-09-14), high -12C/10F, low -65C/-84F, pressure at 8.79 hPa, daylight 05:43-17:59'

In [12]:
# Getting Mars Facts
url = 'https://space-facts.com/mars/'

tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
mars_df = tables[0]
mars_df.columns = ['Description', 'Value']
mars_df.set_index('Description', inplace=True)
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [14]:
# Converting HTML
html_table = mars_df.to_html().replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [15]:
# Getting Mars Hemispheres

# Main page URL
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# Retrieve page with the requests module
response = requests.get(url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

# Making product links
products = soup.find_all('div', class_='item')

product_links = []

for product in products:
    link_element = product.a['href']
    link = 'https://astrogeology.usgs.gov' + link_element
    product_links.append(link)

product_links

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [16]:
# Empty List to collect information from 4 products
hemisphere_image_urls = []

# Scraping Title and Image link in each product link
for product_link in product_links:
    url = product_link
    
    # Retrieve page with the requests module
    response = requests.get(url)
    
    # Create BeautifulSoup object; parse with 'html.parser'
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Getting Title
    title = soup.find('h2').text
    
    # Getting the Link of full resolution image
    image_link = soup.find('div', class_='downloads').find_all('a')[1]['href']
    
    # Getting the Link of jpg image
    jpg_image_link = soup.find('div', class_='downloads').find_all('a')[0]['href']
    
    # Making dictionary
    product_dic =  {"title": title, "img_url": image_link, "jpg_img_url": jpg_image_link}
    
    # Add the dictionary to list
    hemisphere_image_urls.append(product_dic)
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif',
  'jpg_img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif',
  'jpg_img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif',
  'jpg_img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif',
  'jpg_img_url': 'http://astropedia.astrogeology.usgs