## Mission to Mars


In [1]:
# initialize environment
from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 88.0.4324
[WDM] - Get LATEST driver version for 88.0.4324






[WDM] - Driver [C:\Users\kate_\.wdm\drivers\chromedriver\win32\88.0.4324.96\chromedriver.exe] found in cache


In [5]:
# define scraping function
def make_soup(browser, url, tag, class_name, find_all):
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    if find_all:
        return soup.find_all(tag, class_=class_name)
    else:
        return soup.find(tag, class_=class_name)

In [6]:
# URL of latest new page
latest_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&year=2021%3Apublish_date&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [7]:
# call scraper and pull out title of latest article
latest_soup = make_soup(browser, latest_url, "div", "content_title", True)
news_title = latest_soup[1].text
print(news_title)

Testing Proves Its Worth With Successful Mars Parachute Deployment


In [8]:
# call scraper and pull out article teaser text
teaser_soup = make_soup(browser, latest_url, "div", "article_teaser_body", True)
news_p = teaser_soup[0].text
print(news_p)

The giant canopy that helped land Perseverance on Mars was tested here on Earth at NASA’s Wallops Flight Facility in Virginia.


In [9]:
# define URL of page that displays the featured image,
# call scraper and pull out the URL of the full size image
feat_img_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
img_soup = make_soup(browser, feat_img_url, "a", "showimg", True)

picfile = img_soup[0]["href"]

# create full path to full size image file
featured_image_url = feat_img_url.replace("index.html", picfile)
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars2.jpg


In [11]:
# define the URL to the mars data table, call the
# scraper, and read the table into a dataframe
tbl_url = "https://space-facts.com/mars/"
tables = pd.read_html(tbl_url)
mars_df = tables[0]
mars_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [12]:
# rename the columns
mars_df = mars_df.rename(columns={0: "Aspect", 1: "Measurement"})
mars_df

Unnamed: 0,Aspect,Measurement
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# format the table as an HTML string, dropping the index column
mars_tbl_str = mars_df.to_html(index=False)
mars_tbl_str

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Aspect</th>\n      <th>Measurement</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <td>First Record:</td>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <td>Recorded By:</td>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </tbody>\n</table>'

In [14]:
# define the URL to the hemisphere data and call the scraper
hemisphere_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
hemisphere_soup = make_soup(browser, hemisphere_url, "div", "description", True)

# create and empty list
hemi_titles = []

# loop through the scraped data, split at the slash,
# remove the extraneous text, and append to the title list
for item in hemisphere_soup:
    temp_name = item.text.split("/")
    hemi_name = temp_name[0].replace(" Enhancedimage", "")
    hemi_titles.append(hemi_name)
    
print(hemi_titles)

['Cerberus Hemisphere', 'Schiaparelli Hemisphere', 'Syrtis Major Hemisphere', 'Valles Marineris Hemisphere']


In [15]:
# set the base URL for the photo page and create
# empty list
base_url = "https://astrogeology.usgs.gov"
hemi_img = []

In [16]:
# create a list of URLs to each hemisphere image
hemi_url_list = ["https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced",
                "https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced",
                "https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced",
                "https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced"]

In [17]:
# loop through URLs
for url in hemi_url_list:
    # call scraper and append URL to full size image to list
    hemi_soup = make_soup(browser, url, "img", "wide-image", False)   
    
    hemi_img.append(base_url + hemi_soup["src"])
       
print(hemi_img)

['https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg', 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg']


In [18]:
# create empty list
hemisphere_image_urls = []

# loop through list of titles and append a hemisphere dictionary for
# each title
for i in range(0, len(hemi_titles)):
    hemisphere_image_urls.append({"title": hemi_titles[i], "img_url": hemi_img[i]})
    
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]