In [37]:
import requests
from bs4 import BeautifulSoup as BS
from splinter import Browser
import pandas as pd
import time

## Step 1 - Scraping

### NASA Mars News

In [38]:
# latest_news_raw = requests.get('https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest')
# soup = BS(latest_news_raw.text,'html.parser')

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [39]:
mars_news_url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(mars_news_url)

In [40]:
html = browser.html
soup = BS(html, 'html.parser')
# extract the news items from the html
news_items = soup.find('ul', class_='item_list')
latest_news = news_items.find_all('li', class_='slide')[0]
# retrieve the tile and paragraph
news_title = latest_news.find('div', class_='content_title').text
news_p = latest_news.find('div', class_='article_teaser_body').text

In [41]:
news_title

"'Storm Chasers' on Mars Searching for Dusty Secrets"

In [42]:
news_p

"Scientists with NASA's Mars orbiters have been waiting years for an event like the current Mars global dust storm."

### JPL Mars Space Images 

In [43]:
mars_images_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(mars_images_url)

In [44]:
# click on the 'full image' button to inspect the image
browser.find_by_xpath('//*[@id="full_image"]').click()
time.sleep(2)

In [45]:
# click on 'more info' button to get the full-size version of the image
time.sleep(5)
browser.find_by_xpath("//div[@class='buttons']/a[@class='button']").click()

In [46]:
# get the url of the large version of the image
img_html = browser.html
soup = BS(img_html, 'html.parser')
img_url = soup.find('img', class_='main_image')['src']
featured_image_url = 'https://www.jpl.nasa.gov/' + img_url

In [47]:
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA18851_hires.jpg'

### Mars Weather

In [48]:
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)

In [49]:
mars_weather_html = browser.html
soup = BS(mars_weather_html, 'html.parser')

In [50]:
tweets =soup.find('div', class_='stream').find_all('li')

In [51]:
mars_weather = None

for tweet in tweets:
    tweet_account = tweet.find('span',class_='FullNameGroup')
    if tweet_account:
        tweet_account_text= tweet_account.text.lstrip()
        if 'mars weather' in tweet_account_text.lower():
            # the latest weather report on Mars
            mars_weather = tweet.find('div', class_='js-tweet-text-container').find('p').text
            break

In [52]:
mars_weather

'Sol 2108 (2018-07-12), Sunny, high -24C/-11F, low -65C/-84F, pressure at 8.06 hPa, daylight 05:19-17:27'

### Mars Facts

In [53]:
# use pandas to scrape the html page
mars_facts_url = 'https://space-facts.com/mars/'
mars_facts_table = pd.read_html(mars_facts_url)

In [54]:
df = mars_facts_table[0]
# add columns to the dataframe
df.columns = ['description','value']
# set the alignment to left
df.style.set_properties(**{'text-align':'left'})
# set the index to the `description` column
# df.set_index('description', inplace=True)
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [55]:
# convert to HTML string
mars_facts_table_string = df.to_html(index=False)

In [56]:
mars_facts_table_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>description</th>\n      <th>value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

### Mars Hemispheres

In [57]:
mars_hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mars_hemispheres_url)

In [58]:
# extract the elements with BS
mars_hemispheres_html = browser.html
soup = BS(mars_hemispheres_html, 'html.parser')

In [59]:
hemisphere_image_urls =list()
hemisphere_image_items = soup.find_all('div',class_='item')

In [60]:
for item in hemisphere_image_items:
    
    # extract the title of the image
    title = item.find('h3').text
    # go to the link to get the details
    browser.find_by_text(title).click()
    # extract the url of the image
    img_html = browser.html
    soup = BS(img_html, 'html.parser')
    img_url = soup.find('div', class_='downloads').find_all('li')[0].find('a')['href']
    # add the `title` and `img_url` to the list
    hemisphere_image_urls.append({'title':title, 'img_url':img_url})
    # return to the previous webpage 
    browser.visit(mars_hemispheres_url)

In [61]:
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]