In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
import pandas as pd

In [2]:
url =  'https://mars.nasa.gov/news'
response = requests.get(url)
soup = bs(response.text, 'html.parser')

news = {}

# result = soup.find('li', class_='slide')
result = soup.find('div', class_='slide')
# Extract the most recent news' title and content
# news_title = result.find('li', class_="content_title").find('a').text
news_title = result.find('div', class_="content_title").find('a').text.strip()
# news_p = result.find('li', class_="article_teaser_body").text
news_p = result.a.text.strip()
news["news_title"] = news_title
news["news_p"] = news_p
news

{'news_title': 'Opportunity Hunkers Down During Dust Storm',
 'news_p': "It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home."}

In [3]:
# get the featured image
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
base_url ="https://www.jpl.nasa.gov"
    
response = requests.get(url)
soup = bs(response.text, 'lxml')
article = soup.find("article")

featured_img_url = base_url + article.a['data-fancybox-href']
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17175_ip.jpg'

In [4]:
# scrape latest mars_weather twit
url = "https://twitter.com/marswxreport?lang=en"
response = requests.get(url)
soup = bs(response.text, 'lxml')
    
mars_weather = soup.find_all("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")[0].text
mars_weather

'Sol 2227 (2018-11-11), high -3C/26F, low -72C/-97F, pressure at 8.63 hPa, daylight 06:21-18:38'

## Mars Facts

In [5]:
# Mars Facts
import pandas as pd
url = 'http://space-facts.com/mars/'
# Use Panda's `read_html` to parse the url
tables = pd.read_html(url)

df = tables[0]
df.columns = ['description', 'value']
df.set_index('description', inplace=True)

html_table = df.to_html()
html_table = html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th>description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [6]:
tables[0].to_html('table.html')
!explorer table.html

## Mars Hemispheres

In [7]:
from splinter import Browser
from bs4 import BeautifulSoup

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
# Initialize an empty list that will be filled with dictionaries
# Initialize a list of the hemispheres so we can loop through and do everything in one cell
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
response = requests.get(url)
soup = bs(response.text, 'lxml')

hemispheres = []
results = soup.find_all("div", class_="description")

for result in results:
    hemispheres.append(result.h3.text)
    
hemispheres

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [11]:
hemisphere_info = []
# Send the browser to the USGS Astrogeology site to get high res images of the hemispheres
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
    
for hemi in hemispheres:
    # Clich hemisphere links
    browser.click_link_by_partial_text(f'{hemi}')

    # create HTML object
    html = browser.html

    # Parse HTML with Beautiful Soup
    soup = BeautifulSoup(html, 'html.parser')

    # Initialize the dict
    hemi_dict = {}

    # Scrape the incomplete URL
    inc_url = soup.find('img', class_='wide-image')['src']

    # Construct the complete URL
    img_url = 'https://astrogeology.usgs.gov' + inc_url

    # Store the data in a dictionary then add that dictionary to the tracking list
    hemi_dict["title"] = f'{hemi}'
    hemi_dict["img_url"] = img_url
    hemisphere_info.append(hemi_dict)
    # Return to the original page
    browser.click_link_by_partial_text('Back')
        
browser.quit()
hemisphere_info

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [12]:
# All together
mars_facts = {}
mars_facts["news_title"] = news["news_title"]
mars_facts["news_p"] = news["news_p"]
mars_facts["featured_image_url"] = featured_img_url
mars_facts["mars_weather"] = mars_weather
mars_facts["mars_facts_table"] = html_table
mars_facts["hemisphere_info"] = hemisphere_info
# Return the dictionary
mars_facts

{'news_title': 'Opportunity Hunkers Down During Dust Storm',
 'news_p': "It's the beginning of the end for the planet-encircling dust storm on Mars. But it could still be weeks, or even months, before skies are clear enough for NASA's Opportunity rover to recharge its batteries and phone home.",
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA17175_ip.jpg',
 'mars_weather': 'Sol 2227 (2018-11-11), high -3C/26F, low -72C/-97F, pressure at 8.63 hPa, daylight 06:21-18:38',
 'mars_facts_table': '<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th>description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (