# Web Scraping Homework - Mission to Mars

In [1]:
# Importing the relevant modules and packages
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# User log in for Windows Users
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# URL to visit
url = 'https://mars.nasa.gov/news/'

# Browser Engine to load the URL
browser.visit(url)

# NASA Mars News

In [4]:
# Setting up the html
html = browser.html

# Parsing with BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')


In [5]:
# Creating a variable for the first news title

li_slide = soup.find_all('li', class_='slide')

# Obtaining the body of the artile
try:
    li_slide = soup.find_all('li', class_='slide')
    news_title = li_slide[0].find('div', class_='content_title').text.strip()
    news_p = soup.find_all('div', class_='article_teaser_body')[0].text

except(IndexError):
    print('Index error, retrying web parsing')
    time.sleep(10)
    # Setting up the html
    html = browser.html

    # Parsing with BeautifulSoup
    soup = BeautifulSoup(html, 'html.parser')
    li_slide = soup.find_all('li', class_='slide')
    news_title = li_slide[0].find('div', class_='content_title').text.strip()
    news_p = soup.find_all('div', class_='article_teaser_body')[0].text
    print('Retry successfull')

print('Parsing successfull')


Index error, retrying web parsing
Retry successfull
Parsing successfull


In [6]:
# Output for Web Parsing
print(f'Headline:\n{news_title}\n__________________________\n\nBody of Article:\n{news_p}')

browser.quit()

Headline:
NASA's Curiosity Takes Selfie With 'Mary Anning' on the Red Planet
__________________________

Body of Article:
The Mars rover has drilled three samples of rock in this clay-enriched region since arriving in July.


# JPL Mars Space Images - Featured Image

In [7]:
# User log in for Windows Users
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
# URL to visit
fi_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

# Browser Engine to load the URL
browser.visit(fi_url)

In [9]:
# Setting up the html
fi_html = browser.html

# Parsing with BeautifulSoup
fi_soup = BeautifulSoup(fi_html, 'html.parser')

# Base URL for Image
base_url = 'https://www.jpl.nasa.gov'

# Pausing for 10 seconds
time.sleep(10)

In [10]:
# Obtaining the featured image
featured_image_pre = fi_soup.find('article', class_='carousel_item')['style'].strip('background-image: url') #Strip will remove these characters which are not required for the URL
featured_image_pre

"('/spaceimages/images/wallpaper/PIA18914-1920x1200.jpg');"

In [11]:
# Since we stil have the apostrophes and circle brackets within the strings, these will also need to be removed
start = featured_image_pre.find("('") + len("('")
end = featured_image_pre.find("')")
substring = featured_image_pre[start:end]
print(substring)

/spaceimages/images/wallpaper/PIA18914-1920x1200.jpg


In [12]:
# Featured Image URL
# As the Featured Image keeps changing on the website, the URL will be updated accordingly 
browser.quit()
featured_image = base_url + substring
featured_image

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18914-1920x1200.jpg'

In [13]:
# Displaying the Featured Image in Jupyter
import requests
import shutil
response = requests.get(featured_image, stream=True)
with open('img.png','wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)

# Displaying the featured image
from IPython.display import Image
Image(url='img.png')

# Mars Facts

In [14]:
# Establishing the URL
table_url = 'https://space-facts.com/mars/'

# Data Frame for tables
table_df = pd.read_html(table_url)


In [15]:
# Viewing the tables
table_df

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [16]:
# Seperating the tables
mars_profile_1 = table_df[0]
mars_profile = mars_profile_1.rename(columns={0:'Description',1:'Value'}).set_index('Description')
mars_profile

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [17]:
# Second Table comparing Mars to Earth
mars_vs_earth = table_df[1].set_index('Mars - Earth Comparison')
mars_vs_earth

Unnamed: 0_level_0,Mars,Earth
Mars - Earth Comparison,Unnamed: 1_level_1,Unnamed: 2_level_1
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [18]:
# Exporting the tables back to html
mars_profile.to_html('../Resources/mars_profile.html')

mars_vs_earth.to_html('../Resources/mars_vs_earth.html')

# Mars Hemispheres

In [19]:
# User log in for Windows Users
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [20]:
# New URL to retreive the featured image
img_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

# New Browser Option
img_browser = browser.visit(img_url)

In [21]:
# Setting up the HTML for images
img_html = browser.html

# Setting up Beautiful Soup to obtain images
img_soup = BeautifulSoup(img_html, 'html.parser')

# Creating lists to append the data
title_name = []

# Setting up a web query
results = img_soup.find_all('div', class_='description')

# Iterating through the site to append the lists
for result in results:
    h3 = result.find('h3').text
    print(h3)
    title_name.append(h3)

browser.quit()
    

Cerberus Hemisphere Enhanced
Schiaparelli Hemisphere Enhanced
Syrtis Major Hemisphere Enhanced
Valles Marineris Hemisphere Enhanced


In [22]:
# Creating a dictionary with links to images
hemisphere_image_urls = [
    {'Title':title_name[0],"img_url":'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
    
    {'Title':title_name[1],"img_url":'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif' },

    {'Title':title_name[2],"img_url":'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},

    {'Title':title_name[3],"img_url":'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}
]

In [23]:
hemisphere_image_urls

[{'Title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'Title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'Title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'Title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]