In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import pandas as pd
import time
import re

 ## NASA Mars News

In [8]:
!which chromedriver

/usr/local/bin/chromedriver


In [9]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [10]:
# URL of page to be scraped
news_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

# Use splinter to go to the url
browser.visit(news_url)

In [11]:
html = browser.html
news_soup = bs(html, 'html.parser')

In [12]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list

# Identify and return news title of listing
news_title = news_soup.find('div', class_='content_title').text
        
# Identify and return news content of listing
news_p = news_soup.find('div', class_='article_teaser_body').text

print(f'news_title = "{news_title}"')
print(f'news_p = "{news_p}"')

news_title = "Mars Now"
news_p = "To go along with the stunning 1.8-billion-pixel image, a new video offers a sweeping view of the Red Planet."


## JPL Mars Space Images - Featured Image

In [22]:
!which chromedriver

/usr/local/bin/chromedriver


In [23]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [24]:
# URL of page to be scraped
img_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Use splinter to go to the url
browser.visit(img_url)

In [25]:
# Use splinter to click on the 'FULL IMAGE' button
full_image_button = browser.find_by_css('.button.fancybox')
full_image_button.click()

In [26]:
# Use splinter to click on the 'more info' button
more_info_button = browser.links.find_by_partial_text('more info')
more_info_button.click()

In [27]:
# Use BeautifulSoup to scrape the featured image url
html = browser.html
featured_img_soup = bs(html, 'html.parser')

featured_img = featured_img_soup.find('img', class_='main_image')['src']
featured_image_url = f'https://www.jpl.nasa.gov{featured_img}'
print(f'featured_image_url = "{featured_image_url}"')

featured_image_url = "https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA11591_hires.jpg"


## Mars Weather

In [2]:
!which chromedriver

/usr/local/bin/chromedriver


In [3]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# URL of page to be scraped
weather_url = "https://twitter.com/marswxreport?lang=en"

# Use splinter to go to the url
browser.visit(weather_url)

In [5]:
# Create BeautifulSoup object; parse with ''html.parser'
html = browser.html
weather_soup = bs(html, 'html.parser')


In [7]:
# Examine the results, then determine element that contains sought info
# results are returned as an iterable list
weather_result = weather_soup.find('div', attrs ={"class": "tweets", "data-name": "Mars Weather"})

try:
    mars_weather = weather_result.find('p', 'tweet-text').get_text()
    mars_weather
except AttributeError:
    pattern = re.compile(r'sol')
    mars_weather = weather_soup.find('span', text=pattern).get_text()
    mars_weather

print(f'mars_weather = "{mars_weather}"')

mars_weather = "InSight sol 451 (2020-03-03) low -93.6ºC (-136.4ºF) high -10.3ºC (13.4ºF)
winds from the SSE at 6.3 m/s (14.2 mph) gusting to 19.9 m/s (44.5 mph)
pressure at 6.30 hPa"


## Mars Facts

In [2]:
# URL of page to be scraped
facts_url = "https://space-facts.com/mars/"

# Retrieve page with the requests module
response = requests.get(facts_url)

# Create BeautifulSoup object; parse with ''html.parser'
facts_scrape = bs(response.content, 'html.parser')


In [14]:
# print(facts_scrape.prettify())

In [7]:
# Scrape the table of Mars Facts
table = facts_scrape.find('table', class_='tablepress-id-p-mars')

col_1 = []
col_2 = []

for data in table.find_all('tr'):
    col_1.append(data.find('td', class_='column-1').text)
    col_2.append(data.find('td', class_='column-2').text)
    
print(col_1)
print(col_2)

['Equatorial Diameter:', 'Polar Diameter:', 'Mass:', 'Moons:', 'Orbit Distance:', 'Orbit Period:', 'Surface Temperature: ', 'First Record:', 'Recorded By:']
['6,792 km', '6,752 km', '6.39 × 10^23 kg (0.11 Earths)', '2 (Phobos & Deimos)', '227,943,824 km (1.38 AU)', '687 days (1.9 years)', '-87 to -5 °C', '2nd millennium BC', 'Egyptian astronomers']


In [8]:
# Insert into a DataFrame
mars_table = pd.DataFrame(columns = ["Characteristics", "Mars"])
mars_table["Characteristics"] = col_1
mars_table["Mars"] = col_2

mars_table

Unnamed: 0,Characteristics,Mars
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [10]:
# Convert the data frame to html format
html = mars_table.to_html(index=False)
html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Characteristics</th>\n      <th>Mars</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

## Mars Hemispheres

In [17]:
!which chromedriver

/usr/local/bin/chromedriver


In [18]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [20]:
# Visit the website of Mars Hemispheres using Splinter
hemisphere_link = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemisphere_link)

# Getting the links of each hemisphere products
html = browser.html
hemispheres_img_soup = bs(html, 'html.parser')

hemispheres_img = hemispheres_img_soup.find_all('div', class_='description')
hemispheres_img

img_links = []

for div in hemispheres_img:
    link = div.find('a', class_='itemLink product-item')['href']
    img_links.append(f'https://astrogeology.usgs.gov{link}')
img_links

['https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced']

In [22]:
# Visit each hemisphere products' links
img_hem_title = []
img_hem_href = []

for img_link in img_links:
    browser.visit(img_link)
    
    html = browser.html
    img_hem_soup = bs(html, 'html.parser')
    
    # Scrape Image Title and Link
    
    img_hem_title.append(img_hem_soup.find('h2', class_="title").text)
        
    img_hem_div = img_hem_soup.find_all('div', class_="downloads")   
    
    for img in img_hem_div:  
        img_hem_href.append(img.find('a')['href'])
        
print(img_hem_title)
print(img_hem_href)

['Cerberus Hemisphere Enhanced', 'Schiaparelli Hemisphere Enhanced', 'Syrtis Major Hemisphere Enhanced', 'Valles Marineris Hemisphere Enhanced']
['http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg', 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg', 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg', 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']


In [56]:
# Insert the Mars Hemispheres Title and URLs to DataFrame
img_hem_df = pd.DataFrame(columns=['title', 'img_url'])
img_hem_df['title'] = img_hem_title
img_hem_df['img_url'] = img_hem_href
img_hem_df

Unnamed: 0,title,img_url
0,Cerberus Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
1,Schiaparelli Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
2,Syrtis Major Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...
3,Valles Marineris Hemisphere Enhanced,http://astropedia.astrogeology.usgs.gov/downlo...


In [57]:
hemisphere_image_url = img_hem_df.to_dict('records')
hemisphere_image_url

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]