In [1]:
#import dependencies
import requests
import time
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
#path to the chromedriver
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path)

### NASA Mars News

In [4]:
# Visit the site
url_marsnews = 'https://mars.nasa.gov/news/'
browser.visit(url_marsnews)
time.sleep(1)

In [5]:
# scrape the page
html_content = browser.html

In [6]:
# parse the html and store the results in a beautifulsoup object
soup = BeautifulSoup(html_content,'lxml')

In [7]:
news_articles = soup.find_all('li',class_='slide')
first_article = news_articles[0]

In [8]:
# get the date for the latest news article
news_date = first_article.find('div',class_='list_date')
news_date = news_date.get_text()

In [9]:
# get the title text for the latest news article 
news_title = first_article.find('div',class_='content_title')
news_title = news_title.get_text()
news_title = news_title.strip()

In [10]:
# get the paragraph text for the latest news article 
news_para = first_article.find('div',class_='article_teaser_body')
news_para = news_para.get_text()
news_para = news_para.strip()

In [11]:
marsNews = {'datePublished':news_date,
            'title':news_title,
            'contents':news_para}

In [12]:
marsNews

{'contents': "Scientists with NASA's Mars orbiters have been waiting years for an event like the current Mars global dust storm.",
 'datePublished': 'July 19, 2018',
 'title': "'Storm Chasers' on Mars Searching for Dusty Secrets"}

### JPL Mars Space Images - Featured Image

In [13]:
# Visit the NASA Jet Propultion Laboratory site
# Jet Propultion Laboratory
url_jplMars = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url_jplMars)
time.sleep(1)

In [14]:
# navigate to the full image using splinter
image = browser.find_by_id('full_image')
image.click()
time.sleep(1)

In [15]:
# parse the html and store the results in a beautifulsoup object
html = browser.html
soup = BeautifulSoup(html,'lxml')

In [16]:
# get the image url for the featured images
#fancybox-lock > div > div.fancybox-inner.fancybox-skin.fancybox-dark-skin.fancybox-dark-skin-open > img
featured_image_url = soup.find('div',class_='fancybox-inner')

In [17]:
featured_image_url = 'https://www.jpl.nasa.gov' + featured_image_url.find('img',class_='fancybox-image')['src']
featured_image_url 

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16726_ip.jpg'

### Mars Weather

In [18]:
# Visit the Mars Weather twitter account
url_tweetMars = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url_tweetMars)
time.sleep(1)

In [19]:
# parse the html and store the results in a beautifulsoup object
html = browser.html
soup = BeautifulSoup(html,'lxml')

In [20]:
# get the first tweet from the tweet feeds
#timeline > div > div.stream
tweets = soup.find('div',class_='stream').find_all('li',class_='js-stream-item')
first_tweet = tweets[0]
first_tweet_text = first_tweet.find('p',class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')
first_tweet_text = first_tweet_text.get_text()
first_tweet_text

'L-2 years. #Mars2020'

### Mars Facts

In [21]:
# Visit the Mars Weather twitter account
url_marsFacts = 'https://space-facts.com/mars/'

# pandas.read_html to parse the table data
df = pd.read_html(url_marsFacts)[0]
df.columns = ['Description','Value']
df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [22]:
# pandas.to_html to convert the pandas dataframe to HTML
html_table = df.to_html(header=True,index=False)
html_table = html_table.replace('\n','')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th>Description</th>      <th>Value</th>    </tr>  </thead>  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

### Mars Hemisperes

In [23]:
# Visit the USGS Astrogeology site
url_usgs = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# empty list to store the results
hemisphere_image_urls =[]

In [24]:
for x in list(range(4)):
    browser.visit(url_usgs)
    time.sleep(1)
    links = browser.find_by_css('div.item > div.description > a')
    links[x].click()
    time.sleep(1)
    html = browser.html
    soup = BeautifulSoup(html,'lxml')
    #splashy > div.wrapper > div.container > div.content > section > h2.title
    title = soup.find('h2',class_='title').get_text()
    #wide-image > img
    img_url = soup.find(id="wide-image").find('img',class_='wide-image')['src']
    img_url = 'https://astrogeology.usgs.gov' + img_url
    hemisphere_image_urls.append({'title':title,'img_url':img_url})

In [25]:
hemisphere_image_urls

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]