In [25]:
# Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup

### Windows Chrome Driver

In [17]:
# Function to choose the executable path to driver
def init_browser():
    executable_path = {"executable_path": "chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=False)

### NASA Mars News

In [22]:
# Run init_browser/driver.
browser = init_browser()

# Visit Nasa news url.
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
news_soup = BeautifulSoup(html, "html.parser")

# Retrieve the most recent article's title and paragraph.
# Store in news variables.
slide_element = news_soup.select_one("ul.item_list li.slide")
slide_element.find("div", class_="content_title").get_text()

# Scrape the Latest News Title
# Use Parent Element to Find First <a> Tag and Save it as news_title
news_title = slide_element.find("div", class_="content_title").get_text()

news_paragraph = slide_element.find("div", class_="article_teaser_body").get_text()

print(news_title)
print(news_paragraph)

# Exit Browser.
browser.quit()

Sensors on Mars 2020 Spacecraft Answer Long-Distance Call From Earth
Instruments tailored to collect data during the descent of NASA's next rover through the Red Planet's atmosphere have been checked in flight.


In [10]:
print(f'Title: {news_title}\nText: {news_paragraph}')

Title: Sensors on Mars 2020 Spacecraft Answer Long-Distance Call From Earth
Text: Instruments tailored to collect data during the descent of NASA's next rover through the Red Planet's atmosphere have been checked in flight.


### JPL Mars Space Images - Featured Image

In [11]:
# Run init_browser/driver.
browser = init_browser()

# Visit the url for JPL Featured Space Image.
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

# Select "FULL IMAGE".
browser.click_link_by_partial_text("FULL IMAGE")

# Find "more info" for first image, set to variable, and command click.
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
image_soup = BeautifulSoup(html, "html.parser")

# Scrape image URL.
image_url = image_soup.find("figure", class_="lede").a["href"]

# Concatentate https://www.jpl.nasa.gov with image_url.
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'

# Exit Browser.
browser.quit()

In [12]:
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17470_hires.jpg


### Mars Weather

### Mars Facts

In [21]:
# URL for Mars Facts.
def mars_facts():
    df = pd.read_html("https://space-facts.com/mars/")[0]
    df.columns=["Description", "Value"]
    df.set_index("Description", inplace=True)
    return df.to_html(classes="table table-striped")
# df_mars_facts

In [10]:
# Convert DF to html and save in Resources Folder.
df_mars_facts.to_html('Resources/mars_facts.html')

In [11]:
# Convert DF to HTML string.
mars_facts = df_mars_facts.to_html(header=True, index=True)
print(mars_facts)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres

In [12]:
# Run init_browser/driver.
browser = init_browser()

# Visit the url for USGS Astrogeology.
astrogeo_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(astrogeo_url)

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
astrogeo_soup = BeautifulSoup(html, "html.parser")

# Store main URL in a variable so that 'href' can be appended to it after each iteration.
main_astrogeo_url = "https://astrogeology.usgs.gov"

# Each link is located in 'div' tag, class "item".
# Locate all 4 and store in variable.
hems_url = astrogeo_soup.find_all("div", class_="item")

# Create empty list for each Hemisphere URL.
hemis_url = []

for hem in hems_url:
    hem_url = hem.find('a')['href']
    hemis_url.append(hem_url)

browser.quit()

In [13]:
print(hemis_url)

['/search/map/Mars/Viking/cerberus_enhanced', '/search/map/Mars/Viking/schiaparelli_enhanced', '/search/map/Mars/Viking/syrtis_major_enhanced', '/search/map/Mars/Viking/valles_marineris_enhanced']


In [14]:
# Create list of dictionaries called hemisphere_image_urls.
# Iterate through all URLs saved in hemis_url.
# Concatenate each with the main_astrogeo_url.
# Confirm the concat worked properly: confirmed.
# Visit each URL.

hemisphere_image_urls = []
for hemi in hemis_url:
    hem_astrogeo_url = main_astrogeo_url + hemi
    print(hem_astrogeo_url)
    
    # Run init_browser/driver.
    browser = init_browser()
    browser.visit(hem_astrogeo_url)
    
    # HTML Object.
    html = browser.html

    # Parse HTML with Beautiful Soup
    hemi_soup = BeautifulSoup(html, "html.parser")

    # Locate each title and save to raw_title, to be cleaned.
    raw_title = hemi_soup.find("h2", class_="title").text
    
    # Remove ' Enhanced' tag text from each "title" via split on ' Enhanced'.
    title = raw_title.split(' Enhanced')[0]
    
    # Locate each 'full.jpg' for all 4 Hemisphere URLs.
    img_url = hemi_soup.find("li").a['href']
    
    # Append both title and img_url to 'hemisphere_image_url'.
    hemisphere_image_urls.append({'title': title, 'img_url': img_url})
    
    browser.quit()

https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced
https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced


In [15]:
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


### Mars Data Dictionary - MongoDB

In [16]:
# Create empty dictionary for all Mars Data.
mars_data = {}

# Append news_title and news_paragraph to mars_data.
mars_data['news_title'] = news_title
mars_data['news_paragraph'] = news_paragraph

In [17]:
# Append featured_image_url to mars_data.
mars_data['featured_image_url'] = featured_image_url

In [18]:
# Append mars_weather to mars_data.
mars_data['mars_weather'] = mars_weather

In [19]:
# Append mars_facts to mars_data.
mars_data['mars_facts'] = mars_facts

In [20]:
# Append hemisphere_image_urls to mars_data.
mars_data['hemisphere_image_urls'] = hemisphere_image_urls

In [21]:
mars_data

{'news_title': "What's Mars Solar Conjunction, and Why Does It Matter?",
 'news_paragraph': 'NASA spacecraft at Mars are going to be on their own for a few weeks when the Sun comes between Mars and Earth, interrupting communications.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18840_hires.jpg',
 'mars_weather': 'InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF) winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph) pressure at 7.70 hPa',
 'mars_facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</t