In [1]:
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests
import pprint

In [2]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

In [3]:
# Retrieve page with the requests module
response = requests.get(url)

In [4]:
#Create BeautifulSoup object; Parse with html parser
soup = BeautifulSoup(response.text, 'html.parser')

In [5]:
# Examine the results, then determine element that contains sought info
resultsTitle = soup.find_all('div', class_='content_title')
resultsPara = soup.find_all('div', class_='rollover_description_inner')

In [6]:
!which chromedriver

/usr/local/bin/chromedriver


# JPL Mars Space Images - Featured Image

In [7]:
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
#Use Splinter to visit the Mars Image URL
image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)

In [9]:
xpath = '//*[@id="page"]/section[3]/div/ul/li[29]/a/div/div[2]/img'
#Use splinter to click on the mars featured image
#to bring the full resolution image
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

In [10]:
base_url = image_url.rsplit('/',2)[0]

In [12]:
#get image url using BeautifulSoup
html_image = browser.html
soup = BeautifulSoup(html_image, "html.parser")
img_url = soup.find("img", class_="fancybox-image")['src']
featured_image_url = base_url + img_url
print(f"The featured Image URL is : {featured_image_url}")

The featured Image URL is : https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA22978_hires.jpg


# Mars Weather

In [13]:
#Use Splinter to visit Mars URL.
mars_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_url)

In [14]:
#Construct a Soap object on the Mars URL
html_mars = browser.html
soup_mars = BeautifulSoup(html_mars, 'html.parser')

In [15]:
#Find the weather tweet
weather_tweet = soup_mars.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')
mars_weather = weather_tweet.text
mars_weather

'Sol 2312 (2019-02-06), high -13C/8F, low -72C/-97F, pressure at 8.13 hPa, daylight 06:47-18:53pic.twitter.com/QpQemcmmJW'

# Mars Facts

In [16]:
#Scrape Mars Facts URL
mars_facts_url = 'https://space-facts.com/mars/'

#Create a Pandas Dataframe on the URL
mars_facts_df = pd.read_html(mars_facts_url)
mars_facts_df = mars_facts_df[0]

#Supplement Facts and Values as 2 columns to hold titles and values.
mars_facts_df.columns = ['Facts','Values']

#Convert the DataFrame to an HTML table of type String
mars_facts_df_table = mars_facts_df.to_html()
mars_facts_df_table = mars_facts_df_table.replace('\n','')
print(mars_facts_df_table)

<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Facts</th>      <th>Values</th>    </tr>  </thead>  <tbody>    <tr>      <th>0</th>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <th>1</th>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <th>2</th>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>3</th>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>4</th>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>5</th>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>6</th>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>7</th>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>8</th>      <td>Recorded By:</td>      <td>Egyptian astronomers</

# Mars Hemisphere

In [17]:
url_hemisphere = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

#Base URL
base_url_hemisphere = url_hemisphere.rsplit('/',2)[0]

#Request and Receive the Hemisphere Response Page
hemisphere_response = requests.get(url_hemisphere)

#Create a Beautiful Soap on the response text and parse the html
hemisphere_soap = BeautifulSoup(hemisphere_response.text, 'html.parser')

#Search for all the Image Items - Tag div and class item
imageItems = hemisphere_soap.find_all('div', class_='item')

#Construct a Hemisphere List
hemisphere_image_urls = []

'''
Iterate the Image Items to find all the product links and title.
Use Splinter to visit every product and scrape for the full Image link.
Append the Image Title and Link into a list.
'''
for imageItem in imageItems:
    imageSrc = imageItem.find('a', class_='itemLink product-item')['href']
    title = imageItem.find('div', class_='description').text
    
    browser.visit(base_url_hemisphere + imageSrc)
    enhanced_soup = BeautifulSoup(browser.html, 'html.parser')
    
    downloadTag = enhanced_soup.find('div', class_='downloads')
    enhancedImageSrc = downloadTag.find('a')['href']
    
    hemisphere_image_urls.append({
        "title":title,
        "img_url":enhancedImageSrc
    })
    
print("The following list displays all the Image titles and the corresponding urls.")
print("==========================================================================================================")
print(f"hemisphere_image_urls = {pprint.pprint(hemisphere_image_urls, indent=1)} ")
print("==========================================================================================================")

The following list displays all the Image titles and the corresponding urls.
[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]
hemisphere_image_urls = None 
