## Imports

In [63]:
#Imports
from bs4 import BeautifulSoup
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
import pandas as pd
import re
import requests

## Mechanism to Open Webpage on Chrome

In [64]:
#https://splinter.readthedocs.io/en/latest/drivers/chrome.html
#Installed selenium and chromedriver using Honeybrew
#Located chromedriver path
!which chromedriver

/usr/local/bin/chromedriver


In [65]:
#Passed the executable path as a dictionary to the **kwargs argument
executable_path = {'executable_path':'/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

## Red Planet Science Web Scraping 

In [66]:
#Created variable for webpage
science_url = "https://redplanetscience.com"
#Visited webpage
browser.visit(science_url)

In [67]:
#Ensured that request succeeded
response = requests.get(science_url)
response

<Response [200]>

In [68]:
#Created variable for automated web testing
science_html = browser.html
#Used Beautiful Soup to parse through HTML
science_bs = BeautifulSoup(science_html, "html.parser")
#Used Prettify to enhance HTML readability
print(science_bs.prettify())

<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link crossorigin="anonymous" href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta1/dist/css/bootstrap.min.css" integrity="sha384-giJF6kkoqNQ00vy+HMDP7azOuL0xtbfIcaT9wjKHr8RbDVddVHyTfAAsrekwKmP1" rel="stylesheet"/>
  <link href="css/font.css" rel="stylesheet" type="text/css"/>
  <link href="css/app.css" rel="stylesheet" type="text/css"/>
  <link crossorigin="anonymous" href="https://pro.fontawesome.com/releases/v5.10.0/css/all.css" integrity="sha384-AYmEC3Yw5cVb3ZcuHtOA93w35dYTsvhLPVnYs9eStHfGJvOvKxVfELGroGkvsg+p" rel="stylesheet"/>
  <title>
   News - Mars Exploration Program
  </title>
 </head>
 <body>
  <div class="col-md-12">
   <div class="row">
    <nav class="navbar navbar-expand-lg navbar-light fixed-top">
     <div class="container-fluid">
      <a class="navbar-brand" href="#">
       <img src="image/nasa.png" width="80"/>
       <span class="logo">
        MA

In [69]:
#Scraped the Mars news site and collected the latest news title and paragraph text
#Assigned the text to variables for later reference
science_title = science_bs.find("div", class_="content_title").text
science_paragraph = science_bs.find("div", class_="article_teaser_body").text
print(f"1. Latest News Title: {science_title}")
print(f"2. Corresponding Paragraph Text: {science_paragraph}")

1. Latest News Title: NASA's Curiosity Mars Rover Takes a New Selfie Before Record Climb
2. Corresponding Paragraph Text: Along with capturing an image before its steepest ascent ever, the robotic explorer filmed its "selfie stick," or robotic arm, in action.


## Mars Space Images Web Scraping

In [70]:
#Created variable for webpage
space_url = "https://spaceimages-mars.com"
#Visited webpage
browser.visit(space_url)

In [71]:
#Ensured that request succeeded
response = requests.get(science_url)
response

<Response [200]>

In [72]:
#Created variable for automated web testing
space_html = browser.html
#Used Beautiful Soup to parse through HTML
space_bs = BeautifulSoup(space_html, "html.parser")
#Used Prettify to enhance HTML readability
print(space_bs.prettify())

<html class="">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1" name="viewport"/>
  <link href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet"/>
  <!-- <link rel="stylesheet" type="text/css" href="css/font.css"> -->
  <link href="css/app.css" rel="stylesheet" type="text/css"/>
  <link href="https://stackpath.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css" rel="stylesheet" type="text/css"/>
  <title>
   Space Image
  </title>
  <style type="text/css">
   .fancybox-margin{margin-right:0px;}
  </style>
 </head>
 <body>
  <div class="header">
   <nav class="navbar navbar-expand-lg">
    <a class="navbar-brand" href="#">
     <img id="logo" src="image/nasa.png"/>
     <span class="logo">
      Jet Propulsion Laboratory
     </span>
     <span class="logo1">
      California Institute of Technology
     </span>
    </a>
    <button aria-controls="navbarNav" aria-expanded="false" aria-label="Toggle 

In [73]:
#Scraped the space images site and collected the all JPEGs
jpeg_list = [item['src'] for item in space_bs.select("[src$='.jpg']")]
print(jpeg_list)

['image/featured/mars3.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Icaria Fossae.jpg', 'image/mars/Ariadnes Colles4.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Proctor Crater Dunes.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Sirenum Fossae.jpg', 'image/mars/Ariadnes Colles4.jpg', 'image/mars/South Polar Cap.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Ariadnes Colles3.jpg', 'image/mars/Atlantis Chaos.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Icaria Fossae.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Proctor Crater Dunes.jpg', 'image/mars/Reull Vallis.jpg', 'image/mars/Ariadnes Colles3.jpg', 'image/mars/Sirenum Fossae.jpg', 'image/mars/South Polar Cap.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Daedalia Planum.jpg', 'image/ma

In [74]:
#Selected the first JPEG in the list using indexing
#Assigned the JPEG to variable for later reference
featured_image = [item['src'] for item in space_bs.select("[src$='.jpg']")][0]
print(featured_image)

image/featured/mars3.jpg


In [75]:
#Printed full url of first JPEG
featured_image_url = f"https://spaceimages-mars.com/{featured_image}"
print(f"Complete URL String:\n{featured_image_url}")

Complete URL String:
https://spaceimages-mars.com/image/featured/mars3.jpg
