### Import Dependancies

In [11]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

### Mars News Scrape

In [41]:
def news_scrape():   
    # browser = init_browser()
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)

    # LATEST MARS NEWS SCRAPE -------------------------------------------------------
    #latest news scrape source
    news_url = "https://redplanetscience.com/"
    browser.visit(news_url)
    
    #extract html and parse with beautifulsoup
    news_html = browser.html
    news_soup = bs(news_html, "html.parser")

    # extract all title containers from the Mars news pages
    results = news_soup.find_all('div', class_='list_text')

    #define articles list
    articles = []
    story = {}
    
    # loop thru results and create a dic for the latest 4 mars stories
    # add each story to the articles list and print 
    for item in results[:4]:
        story = {}
        story["headline"] = item.find('div',class_="content_title").get_text()
        story["tagline"] = item.find("div", class_="article_teaser_body").get_text()
        articles.append(story)
    
    # quit the browser
    browser.quit()
    
    return articles

In [42]:
#run scrape function
news_scrape()



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\tayly\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


[{'headline': "NASA's Perseverance Rover Is Midway to Mars ",
  'tagline': "Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination."},
 {'headline': "Deadline Closing for Names to Fly on NASA's Next Mars Rover",
  'tagline': 'You have until Sept. 30 to send your names to Mars aboard the Mars 2020 rover. '},
 {'headline': "NASA's MAVEN Explores Mars to Understand Radio Interference at Earth",
  'tagline': 'NASA’s MAVEN spacecraft has discovered “layers” and “rifts” in the electrically charged part of the upper atmosphere of Mars.'},
 {'headline': "NASA's Perseverance Rover Bringing 3D-Printed Metal Parts to Mars",
  'tagline': "For hobbyists and makers, 3D printing expands creative possibilities; for specialized engineers, it's also key to next-generation spacecraft design."}]

### Hemisphere Scrape

In [43]:
def hemi_scrape():
    # browser = init_browser()
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)

    #hemisphere images scrape source
    hemi_url = "https://marshemispheres.com/"
    browser.visit(hemi_url)

    #extract html and parse with beautifulsoup
    hemi_html = browser.html
    hemi_soup = bs(hemi_html, "html.parser")

    # extract all title containers from the Mars news pages
    hemi_results = hemi_soup.find_all('div', class_='description')

    #list to hold the images and titles.
    hemispheres = []

    # loop thru results and create a dic for the latest 4 mars stories
    # add each story to the articles list and print 
    for item in hemi_results:
        mars = {}
        mars['title'] = item.find('h3').get_text().replace(' Enhanced','')
        browser.click_link_by_partial_text(mars['title'])
        html=browser.html
        soup=bs(html,'html.parser')
        mars['image_src'] = (hemi_url + soup.find('li').a['href'])
        browser.back()
        hemispheres.append(mars)
    
    # quit the browser
    browser.quit()
    
    return hemispheres

In [44]:
hemi_scrape()



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\tayly\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


[{'title': 'Cerberus Hemisphere',
  'image_src': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'image_src': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'image_src': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'image_src': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]

In [45]:
def jpl_scrape():
    # browser = init_browser()
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)

    #hemisphere images scrape source
    jpl_url = "https://spaceimages-mars.com/"
    browser.visit(jpl_url)

    #extract html and parse with beautifulsoup
    jpl_html = browser.html
    jpl_soup = bs(jpl_html, "html.parser")

    featured_img_url = jpl_url + jpl_soup.find(class_='floating_text_area').a['href']

    # quit the browser
    browser.quit()
    
    return featured_img_url

In [46]:
jpl_scrape()



Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\tayly\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


'https://spaceimages-mars.com/image/featured/mars2.jpg'

In [53]:
def table_scrape():
    # browser = init_browser()
    executable_path = {'executable_path': ChromeDriverManager().install()}
    browser = Browser('chrome', **executable_path, headless=False)

    #hemisphere images scrape source
    table_url = "https://galaxyfacts-mars.com/"
    browser.visit(table_url)

    #extract html and parse with beautifulsoup
    table_html = browser.html
    table_soup = bs(table_html, "html.parser")
    
    table_src = pd.read_html(table_url)
    table_df = table_src[0]
    table_df = table_df.set_index([0])
    table_df = table_df.rename(columns={1 : "Mars", 2: "Earth"})
    table_df = table_df.drop(index='Mars - Earth Comparison')
    
    html_table = table_df.to_html()

    # quit the browser
    browser.quit()
    
    return html_table

In [54]:
table_scrape()




Current google-chrome version is 93.0.4577
Get LATEST driver version for 93.0.4577
Driver [C:\Users\tayly\.wdm\drivers\chromedriver\win32\93.0.4577.63\chromedriver.exe] found in cache


'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>0</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>\n</table>'

In [42]:
table_df.columns()

NameError: name 'table_df' is not defined