<h1>Web Scraping</h1>

<h4>Import Dependencies</h4>

In [1]:
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

<h4>Initialize a Browser</h4>

In [2]:
# Choose the executable path to driver 
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

<h4>NASA Mars News - Access and Store News Title and Paragraph</h4>

In [3]:
news_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
browser.visit(news_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [4]:
top_article_sect = soup.find("li", class_="slide")
news_title = top_article_sect.find("div", class_="content_title").find("a").text
print(news_title)
print("==================")
news_p = top_article_sect.find("div", class_="article_teaser_body").text
print(news_p)

Independent Review Indicates NASA Prepared for Mars Sample Return Campaign
NASA released an independent review report Tuesday indicating the agency is well positioned for its Mars Sample Return campaign to bring pristine samples from Mars to Earth for scientific study.


<h4>JPL Mars Space Images - Access and Store the Featured Image's URL</h4>

In [5]:
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")

In [6]:
base_url = "https://www.jpl.nasa.gov"
featured_image_url_end = soup.find("a", class_="button fancybox")["data-fancybox-href"]
featured_image_url = base_url + featured_image_url_end
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16101_ip.jpg


<h4>Mars Facts - Scrape Facts table using Pandas</h4>

In [7]:
facts_url = "https://space-facts.com/mars/"
facts_tables = pd.read_html(facts_url)

In [8]:
facts_df = facts_tables[0]
# facts_df.head()
facts_df.columns = ['Description','Mars']
# facts_df.head()
facts_df.set_index('Description', inplace=True)
# facts_df.head()

In [9]:
facts_df.to_html("facts_table.html")

<h4>Mars Hemispheres - Scrape Images of Mars' Hemispheres</h4>

In [10]:
hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(hemispheres_url)
html = browser.html
soup = BeautifulSoup(html, "html.parser")


In [11]:
hemi_descs = soup.find_all("div", class_="description")
base_url = "https://astrogeology.usgs.gov"
hemisphere_image_urls = []
for desc in hemi_descs:
    hemi_dict = {}
    title = desc.find("h3").text
    browser.click_link_by_partial_text(title)
    soup = BeautifulSoup(browser.html)
    img_path = soup.find("img", class_="wide-image")["src"]
    hemi_dict["title"] = title
    hemi_dict["img_url"] = base_url + img_path
    hemisphere_image_urls.append(hemi_dict)
    browser.back()
    
print(hemisphere_image_urls)
    



[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]


In [37]:
browser.quit()