In [1]:
# Set up dependencies
import pandas as pd
from pprint import pprint 
import time
import requests as req
from bs4 import BeautifulSoup as bs
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Set up chromedriver and browser
executable_path = {"executable_path": ChromeDriverManager().install()}
browser = Browser("chrome", **executable_path, headless=False)
time.sleep(5)

[WDM] - Current google-chrome version is 91.0.4472
[WDM] - Get LATEST driver version for 91.0.4472






[WDM] - Driver [C:\Users\rtear\.wdm\drivers\chromedriver\win32\91.0.4472.19\chromedriver.exe] found in cache


## Mars News Scraping 

In [3]:
# set up url for Mars News
news_url = 'https://mars.nasa.gov/news/'
browser.visit(news_url)
time.sleep(2)
news_html = browser.html

In [4]:
# parse with BeautifulSoup
news_soup = bs(news_html, 'html.parser')

In [5]:
# find the latest news article
all_news_title = news_soup.find_all("div", class_="content_title")
latest_title = all_news_title[1].find("a").get_text()

In [6]:
# latest article paragraph text
all_news_para = news_soup.find_all("div", class_="article_teaser_body")
latest_para = all_news_para[0].get_text()

In [7]:
# print article title and  paragraph text
pprint(latest_title)
pprint(latest_para)

"NASA's InSight Mars Lander Gets a Power Boost"
('The spacecraft successfully cleared some dust off its solar panels, helping '
 'to raise its energy and delay when it will need to switch off its science '
 'instruments.')


## Mars Fact Scraping

In [8]:
# set up url for Mars Facts
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
time.sleep(2)

In [9]:
# Parse facts url to find tables
facts_tables = pd.read_html(facts_url)
mars_table = facts_tables[0]

In [10]:
# rename columns
mars_table.columns = ["Description", "Mars"]
mars_table = mars_table.set_index("Description")

In [11]:
# convert data to html
html_table = mars_table.to_html()
html_table = html_table.replace('\n', '')

In [12]:
# print table
pprint(html_table)

('<table border="1" class="dataframe">  <thead>    <tr style="text-align: '
 'right;">      <th></th>      <th>Mars</th>    </tr>    <tr>      '
 '<th>Description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      '
 '<th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      '
 '<th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      '
 '<th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    '
 '<tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    '
 '<tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    '
 '</tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 '
 'years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 '
 'to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd '
 'millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      '
 '<td>Egyptian astronomers</td>    </tr>  </tbody></table>')


## Mars Image Scraping

In [13]:
# Set up url for Mars Image
image_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
browser.visit(image_url)
time.sleep(2)

In [14]:
# Set new browser link
browser.links.find_by_partial_text('FULL IMAGE').first.click()
time.sleep(2) 
full_image_html = browser.html

In [15]:
# parse with BeautifulSoup
image_soup = bs(full_image_html, "html.parser")

In [16]:
# scrape the URL
feature_url = image_soup.find('img')['src']

In [17]:
# print the url for the full image version of the Featured Mars Image
base_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/'
featured_image_url = f'{base_url}{feature_url}'

In [18]:
# print url
pprint(featured_image_url)

'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/nasa.png'


## Mars Hemispheres Scraping

In [19]:
# Set up url for Mars Hemisphere
hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemi_url)
time.sleep(2)
hemi_html = browser.html

In [20]:
# list for titles and image urls
hemi_scrape = []

# Loop for hemisphere info
for i in range(4):
    hemi_html = browser.html
    hemi_soup = bs(hemi_html, "html.parser")

    title = hemi_soup.find_all("h3")[i].get_text()
    browser.find_by_tag('h3')[i].click()

    hemi_html = browser.html
    hemi_soup = bs(hemi_html, "html.parser")

    hemi_url = hemi_soup.find("a", text="Sample")["href"]

    # append title and image url to dictionary
    hemi_scrape.append({
        "Title": title,
        "Link to image": hemi_url
    })

In [21]:
# print the hemispheres images
pprint(hemi_scrape)

[{'Link to image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'Title': 'Cerberus Hemisphere Enhanced'},
 {'Link to image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'Title': 'FGDC: xml metadata'},
 {'Link to image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'Title': 'Related Products'},
 {'Link to image': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_unenhanced.tif/full.jpg',
  'Title': 'Valles Marineris Hemisphere Unenhanced'}]


In [22]:
# close the browser
browser.quit()