# Mission to Mars

## Step 1 - Scraping


In [1]:
# import dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import requests
import pandas as pd
import pymongo
import time

In [2]:
# Initialize browser
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

### NASA Mars News

In [3]:
# URL of page to be scraped - NASA Mars News Site 
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)
time.sleep(5)

In [4]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup_nasa = BeautifulSoup(html, 'html.parser')

In [5]:
# Collect the latest News Title from NASA Mars News Site
news_info = []
  
news_title = soup_nasa.find('div', class_='content_title').find('a').text
news_title

"Robotic Toolkit Added to NASA's Mars 2020 Rover"

In [6]:
# Latest news paragraph text from NASA Mars News Site
news_p = soup_nasa.find('div', class_='article_teaser_body').text
news_p

"The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "

In [7]:
# Append the retreived information into a list of dictionaries 
news_info.append({"news_title" : news_title, "news Paragraph" : news_p})
news_info

[{'news_title': "Robotic Toolkit Added to NASA's Mars 2020 Rover",
  'news Paragraph': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "}]

###  JPL Mars Space Images - Featured Image

In [8]:
# URL of page to be scraped for featured image - JPL Mars Space Images
jpl_url_image = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(jpl_url_image)
time.sleep(5)

In [9]:
# Browse through the page
# Find the image url for the current Featured Mars and click the full image button
featured_full_image = browser.find_by_id('full_image')
featured_full_image.click()

In [10]:
# Browse through the page
time.sleep(5)

# Find the more info button and click
more_info_images = browser.find_link_by_partial_text('more info')
more_info_images.click()

In [11]:
# Using BeautifulSoup create an object and parse with 'html.parser'
html = browser.html
image_soup = BeautifulSoup(html, 'html.parser')

In [12]:
# find the related image url
image_url = image_soup.find('figure', class_='lede').find('img')['src']
image_url

'/spaceimages/images/largesize/PIA18614_hires.jpg'

In [13]:
# Use the base url to create an full url
JPL_link = 'https://www.jpl.nasa.gov'
featured_image_url = JPL_link + image_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18614_hires.jpg'

### Mars Weather

In [14]:
# URL of page to be scraped - Mars Weather
mars_twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_twitter_url)
time.sleep(5)

In [15]:
# Create BeautifulSoup object; parse with 'html.parser'
html_weather = browser.html

mars_soup = BeautifulSoup(html_weather, 'html.parser')

In [16]:
# Find all elements that contain tweets
latest_tweets = mars_soup.find_all('div', class_='js-tweet-text-container')
latest_tweets 

[<div class="js-tweet-text-container">
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)
 winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)
 pressure at 7.70 hPa<a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/MhPPOHJg3m">pic.twitter.com/MhPPOHJg3m</a></p>
 </div>, <div class="js-tweet-text-container">
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 260 (2019-08-20) low -101.7ºC (-151.1ºF) high -28.6ºC (-19.5ºF)
 pressure at 7.60 hPa</p>
 </div>, <div class="js-tweet-text-container">
 <p class="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text" data-aria-label-part="0" lang="en">InSight sol 259 (2019-08-19) low -101.0ºC (-149.8ºF) high -27.1ºC (-16.9ºF)
 winds from the SW at 4.3 m/s (9.6 mph) gusting to 1

In [17]:
# Loop through latest_tweets to extract for the weather report

for tweet in latest_tweets: 
    
    weather_tweet = tweet.find('p').text
    
    if 'Sol' and 'pressure' in weather_tweet:
        
        print(weather_tweet)
        
        break
        
    else: 
        
        pass

InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)
winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)
pressure at 7.70 hPapic.twitter.com/MhPPOHJg3m


In [18]:
latest_weather_tweet = weather_tweet.split("hPa")[0]+"hPa"
print(latest_weather_tweet)

InSight sol 261 (2019-08-21) low -102.4ºC (-152.4ºF) high -26.6ºC (-15.8ºF)
winds from the SSE at 4.9 m/s (11.0 mph) gusting to 16.0 m/s (35.8 mph)
pressure at 7.70 hPa


### Mars Facts

In [19]:
# URL of page to be scraped - Mars Facts
mars_facts_url = 'http://space-facts.com/mars/'
browser.visit(mars_facts_url)
time.sleep(5)

In [20]:
# Use Panda's `read_html` to parse the url
#mars_facts = pd.read_html(mars_facts_url)
mars_facts = pd.read_html(browser.html)
mars_facts 

[  Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:    -153 to 20 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [21]:
# Find the mars facts DataFrame in the list of DataFrames 
mars_df = mars_facts[1]
mars_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [22]:
# Set columns to ['Description', 'Value']
mars_df.columns = ['Description', 'Value']
mars_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
# Set the index to the `Description` column without row indexing
mars_df.set_index('Description', inplace=True)

In [24]:
# Use pandas to generate Html Tables from dataframes and save as html file
mars_fact_table = mars_df.to_html('mars_fact_table.html')
mars_fact_table 

### Mars Hemispheres

In [25]:
# URL of page to be scraped - Mars Hemispheres
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)
time.sleep(5)

In [26]:
# Create BeautifulSoup object; parse with 'html.parser'
html_hemispheres = browser.html

# Parse HTML with Beautiful Soup
hemispheres_soup = BeautifulSoup(html_hemispheres, 'html.parser')

In [27]:
# Retreive all items that contain mars hemispheres information
items = hemispheres_soup.find_all('div', class_='item')

# Create an empty list to hold dictionaries of hemisphere title with the image url string
hemisphere_image_urls = []

# Store the main_ul 
hemispheres_main_url = 'https://astrogeology.usgs.gov'

In [28]:
# Loop through the items previously stored
for item in items: 
    # Store title
    title = item.find('h3').text
    
    # Store link that leads to full image website
    partial_img_url = item.find('a', class_='itemLink product-item')['href']
    
    # Visit the link that contains the full image website 
    browser.visit(hemispheres_main_url + partial_img_url)
    
    # HTML Object of individual hemisphere information website 
    partial_img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    individual_soup = BeautifulSoup( partial_img_html, 'html.parser')
    
    # Retrieve full image source 
    img_url = hemispheres_main_url + individual_soup.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemi_urls = {"title" : title, "img_url" : img_url}
    #hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
    
    # Append the retreived information 
    hemisphere_image_urls.append(hemi_urls)

# Display hemisphere_image_urls
hemisphere_image_urls


[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]