# Mission to Mars Web Scraping

by Mary Brown

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

### NASA Mars News

In [2]:
page = requests.get("https://mars.nasa.gov/news/")

In [3]:
soup = BeautifulSoup(page.content, 'html.parser')

In [4]:
news_title=soup.find_all('div',class_='content_title')[0].text
news_title = news_title.replace('\n','')
news_title

'NASA Invites Students to Name Mars 2020 Rover'

In [5]:
news_p=soup.find_all('div',class_='rollover_description_inner')[0].text
news_p = news_p.replace('\n','')
news_p

"Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover."

### JPL Mars Space Images - Featured Image

In [6]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [7]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [8]:
urlDomain = 'https://www.jpl.nasa.gov'
urlPath = '/spaceimages/?search=&category=Mars'
urlFull = urlDomain + urlPath
browser.visit(urlFull)

In [9]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.click_link_by_partial_text('FULL IMAGE')
fullImage=soup.find_all(id="full_image")
fullImage   

[<a class="button fancybox" data-description="This image of Ceres is part of a sequence taken by NASA's Dawn spacecraft April 24 to 26, 2015, from a distance of 8,500 miles (13,500 kilometers)." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA19323_ip.jpg" data-link="/spaceimages/details.php?id=PIA19323" data-title="Dawn RC3 Image 4" id="full_image">
 					FULL IMAGE
 				  </a>]

In [10]:
imagePath=fullImage[0]['data-fancybox-href']

In [11]:
featured_image_url= urlDomain + imagePath

In [12]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19323_ip.jpg'

### Mars Weather

In [13]:
tpage = requests.get("https://twitter.com/marswxreport?lang=en")
tsoup = BeautifulSoup(tpage.content, 'html.parser')

In [14]:
latestTweet=tsoup.find_all('p',class_='TweetTextSize')
latestTweet[0].text

'InSight sol 335 (2019-11-05) low -100.6ºC (-149.1ºF) high -24.3ºC (-11.8ºF)\nwinds from the SSE at 4.9 m/s (10.9 mph) gusting to 19.9 m/s (44.5 mph)\npressure at 6.90 hPa'

In [15]:
mars_weather=latestTweet[0].text

### Mars Facts


* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [16]:
url = 'https://space-facts.com/mars'

In [17]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [18]:
df=tables[2]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
df=df.rename(columns={0: "description",1: "value"})
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [20]:
df=df.set_index('description')
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [21]:
df.to_html('table.html')

### Mars Hemispheres


* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [22]:
urlDomain = 'https://astrogeology.usgs.gov'
urlPath = '/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
urlFull = urlDomain + urlPath
browser.visit(urlFull)

In [24]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
itemLinks = soup.find_all('a',class_="itemLink product-item")

In [39]:
# itemMax = len(itemLinks)
# itemLinks[0].attrs
# itemLinks[0]['href']
itemLinks[7].text

'Valles Marineris Hemisphere Enhanced'

In [40]:
title = []
image_url = []
for item in itemLinks:
    myLink = urlDomain + item['href']
    if item.text != "":
        if item.text == 'Cerberus Hemisphere Enhanced':
            title.append(item.text)
            print(item.text)
            browser.visit(myLink)
            html = browser.html
            soup = BeautifulSoup(html, 'html.parser')
            #subItem = soup.find_all('span', class_='text')


    

Cerberus Hemisphere Enhanced


In [42]:
soup

<html lang="en"><head>
<link href="//ajax.googleapis.com/ajax/libs/jqueryui/1.11.4/themes/smoothness/jquery-ui.css" rel="stylesheet" type="text/css"/>
<title>Cerberus Hemisphere Enhanced | USGS Astrogeology Science Center</title>
<meta content="Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from…" name="description"/>
<meta content="USGS,Astrogeology Science Center,Cartography,Geology,Space,Geological Survey,Mapping" name="keywords"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1" name="viewport"/>
<meta content="x61hXXVj7wtfBSNOPnTftajMsZ5yB2W-qRoyr7GtOKM" name="google-site-verification"/>
<!--<link rel="stylesheet" href="http://fonts.googleapis.com/css?family=Open+Sans:400italic,400,bold"/>-->
<link href="/css/main.css" media="screen" rel="stylesheet"/>
<link href="/css

In [None]:

Like this:

>>> keys = ['a', 'b', 'c']
>>> values = [1, 2, 3]
>>> dictionary = dict(zip(keys, values))
>>> print(dictionary)
{'a': 1, 'b': 2, 'c': 3}