In [1]:
# Import Splinter and BeautifulSoup 
from splinter import Browser
from bs4 import BeautifulSoup as soup
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd 

In [2]:
# Set the executable path. 
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# Visit the mars nasa news site
url = 'https://redplanetscience.com'
browser.visit(url)
# Optional delay for loading the page
browser.is_element_present_by_css('div.list_text', wait_time=1)

True

In [4]:
# Set up the HTML parser. 
html = browser.html 

news_soup = soup(html, 'html.parser')

slide_elem = news_soup.select_one('div.list_text')

In [5]:
# Begin Scraping. 

slide_elem.find('div', class_='content_title')

<div class="content_title">While Stargazing on Mars, NASA's Curiosity Rover Spots Earth and Venus</div>

In [6]:
# Use the parent elecment to find the first 'a' tag and save it as 'news_title'

news_title = slide_elem.find('div', class_='content_title').get_text()

news_title

"While Stargazing on Mars, NASA's Curiosity Rover Spots Earth and Venus"

In [7]:
# Use the parent element to find the paragraph text. 

news_p = slide_elem.find('div', class_='article_teaser_body').get_text()

news_p

"This new portrait of the Red Planet's neighbors was taken during a time when there's more dust in the air on Mars."

### Featured Images 

In [8]:
# visit URL 

url = 'https://spaceimages-mars.com'
    # This is the URL of the website we will be scraping. 

browser.visit(url)
    # Interacts with splinter regarding the URL we are visiting. 
    # When this cell is executed the webpage should pop up in another window. 

In [9]:
# Find and click the full image button 

full_image_elem = browser.find_by_tag('button')[1]

full_image_elem.click()

# full_image_elem = a new variable to hold the scraping results. 

# browser.find_by_tag('button') = the browser finds an element by its tag. 

# full_image_elem.click() = Splinter will 'click' the image to view its full size. Also the [1] regards indexing. This we've 
# stipulated that we want our browser to click the second button. 

In [12]:
# Parse the resulting html with soup

html = browser.html

img_soup = soup(html, 'html.parser')

In [13]:
# Find the relative image url 

img_url_rel = img_soup.find('img', class_='fancybox-image').get('src')

img_url_rel

# an img tag is nested within this HTML, so we've included it. 

# .get('src') pulls the link to the image. 

# We've told BeautifulSoup to look inside the <img/> tag for an image with a class of fancybox-image. 

'image/featured/mars2.jpg'

In [14]:
# Use the base URL to create an absolute URL
img_url = f'https://spaceimages-mars.com/{img_url_rel}'
img_url

'https://spaceimages-mars.com/image/featured/mars2.jpg'

In [15]:
df = pd.read_html('https://galaxyfacts-mars.com')[0]

# we're creating a new DF from the HTML table. The pandas function read_htlm specifically searches for and returns a list of 
# tables found in the HTML. By specifying an index [0], we're telling Pandas to pull only the first table it encounters, or the 
# first item in the list. 

df.columns = ['description', 'Mars', 'Earth']

# Here we assign column to the new DF for additional clarity. 

df.set_index('description', inplace=True)

# we're turning the description column into the DF's index. inplace=true means that the updated index will remain in place,
#without having to reassign the DF to a new variable.

df


Unnamed: 0_level_0,Mars,Earth
description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [16]:
# Convert the DF back into HTML-ready code. 
df.to_html()

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

In [17]:
browser.quit()