# Mission to Mars Web Scraping

by Mary Brown

In [3]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

### NASA Mars News

In [4]:
page = requests.get("https://mars.nasa.gov/news/")

In [5]:
soup = BeautifulSoup(page.content, 'html.parser')

In [6]:
news_title=soup.find_all('div',class_='content_title')[0].text
news_title = news_title.replace('\n','')
news_title

'NASA Invites Students to Name Mars 2020 Rover'

In [7]:
news_p=soup.find_all('div',class_='rollover_description_inner')[0].text
news_p = news_p.replace('\n','')
news_p

"Through Nov. 1, K-12 students in the U.S. are encouraged to enter an essay contest to name NASA's next Mars rover."

### JPL Mars Space Images - Featured Image

In [8]:
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver

/usr/local/bin/chromedriver


In [9]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [10]:
urlDomain = 'https://www.jpl.nasa.gov'
urlPath = '/spaceimages/?search=&category=Mars'
urlFull = urlDomain + urlPath
browser.visit(urlFull)

In [11]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.click_link_by_partial_text('FULL IMAGE')
fullImage=soup.find_all(id="full_image")
fullImage   

[<a class="button fancybox" data-description="Using publicly available data, astronomers have confirmed K2's first exoplanet discovery proving Kepler can still find planets." data-fancybox-group="images" data-fancybox-href="/spaceimages/images/mediumsize/PIA18904_ip.jpg" data-link="/spaceimages/details.php?id=PIA18904" data-title="Reborn Kepler Discovers First K2 Exoplanet (Artist Concept)" id="full_image">
 					FULL IMAGE
 				  </a>]

In [12]:
imagePath=fullImage[0]['data-fancybox-href']

In [13]:
featured_image_url= urlDomain + imagePath

In [14]:
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18904_ip.jpg'

### Mars Weather

In [15]:
tpage = requests.get("https://twitter.com/marswxreport?lang=en")
tsoup = BeautifulSoup(tpage.content, 'html.parser')

In [16]:
latestTweet=tsoup.find_all('p',class_='TweetTextSize')
latestTweet[0].text

'InSight sol 335 (2019-11-05) low -100.6ºC (-149.1ºF) high -24.3ºC (-11.8ºF)\nwinds from the SSE at 4.9 m/s (10.9 mph) gusting to 19.9 m/s (44.5 mph)\npressure at 6.90 hPa'

In [17]:
mars_weather=latestTweet[0].text

### Mars Facts


* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

* Use Pandas to convert the data to a HTML table string.

In [18]:
url = 'https://space-facts.com/mars'

In [19]:
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
           

In [20]:
df=tables[2]
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [21]:
df=df.rename(columns={0: "description",1: "value"})
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [22]:
df=df.set_index('description')
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [23]:
df.to_html('table.html')

### Mars Hemispheres


* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.

* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.

* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.

* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [84]:
urlDomain = 'https://astrogeology.usgs.gov'
urlPath = '/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
urlFull = urlDomain + urlPath
browser.visit(urlFull)

In [85]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
itemLinks = soup.find_all('a',class_="itemLink product-item")
itemLinks

[<a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a>,
 <a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiaparelli_enhanced.tif_thumb.png"/></a>,
 <a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><h3>Schiaparelli Hemisphere Enhanced</h3></a>,
 <a class="itemLink product-item" href="/search/map/Mars/Viking/syrtis_major_enhanced"><img alt="Syrtis Major Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/aae41197e40d6d4f3ea557f8cfe51d15_syrtis_major_enhanced.tif_thumb.png"/></a>,

In [86]:
myList = []
myKeys = ['title', 'img_url']
myDict ={}
myText = ""
for item in itemLinks:
    myValues = []
    myLink = urlDomain + item['href']
    if item.text != "":
        myText = item.text
        
        if myText.endswith(' Enhanced'):
            myText = myText[:-len(' Enhanced')]
            
        myValues.append(myText)
        
        browser.visit(myLink)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        subItem = soup.find_all('img',class_='wide-image')
        myValues.append(urlDomain + subItem[0]['src'])
        myDict = {myKeys[0]:myValues[0], myKeys[1]:myValues[1]}
        myList.append(myDict)
        


In [87]:
print(myList)

[{'title': 'Cerberus Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
