## Mission to Mars Scrapping and API

In [49]:
#Import libraries 
import pandas as pd

from bs4 import BeautifulSoup
import requests
import pymongo

from splinter import Browser
from splinter.exceptions import ElementDoesNotExist

In [50]:
#Splinter is an open source tool for testing web applications using Python. 
#It lets you automate browser actions, such as visiting URLs and interacting with their items.
# https://splinter.readthedocs.io/en/latest/drivers/chrome.html
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

/usr/local/bin/chromedriver


## NASA News

In [51]:
#Define and visit the url from the news section
url_news = 'https://mars.nasa.gov/news/'
browser.visit(url_news)

# Create and parse the HTML Object for news
html_news = browser.html
soup = BeautifulSoup(html_news, 'html.parser')

In [52]:
# Collect the latest News Title and Paragraph Text
news_title = soup.find('div', class_='content_title').find('a').text
news_paragraph = soup.find('div', class_='article_teaser_body').text

print(news_title)
print(news_paragraph)

NASA's Mars 2020 Gets a Dose of Space Here on Earth
NASA's Mars 2020 spacecraft has completed tests that are the best Earthly approximations of what the spacecraft will endure during launch and interplanetary cruise.


## Mars Feature Image

In [53]:
#Define and visit the url from the featured images section
image_url_featured = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url_featured)

In [54]:
# Create and parse the HTML Object for the featured image
html_featured = browser.html
soup = BeautifulSoup(html_featured, 'html.parser')

# Retrievethe current Featured Mars image url  
images = soup.find('div', class_='carousel_items').find("article").get("style")
featured_image_url =images.replace('background-image: url(','').replace(');', '').replace(');', '').replace("'","")
featured_image_url = f"https://www.jpl.nasa.gov{featured_image_url}"
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16883-1920x1200.jpg'

## Mars Weather

In [38]:
# Weather url
weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(weather_url)


In [40]:
# Create and parse the HTML Object for the featured image
html_weather = browser.html
soup = BeautifulSoup(html_weather, 'html.parser')

# Find all tweets
latest_tweets = soup.find_all('div', class_='js-tweet-text-container')

# Get only the first tweet regarding weather
weather=""
for tweet in latest_tweets: 
    weather_tweet = tweet.find('p').text
    if 'Sol' and "high" and "low" and 'pressure' in weather_tweet:
        weather =weather+ weather_tweet
        break
        
weather=weather.replace('\nwinds', ' winds')
weather=weather.replace('\npressure', ' pressure')
weather=weather.replace('.twitter.com/7XARGO6DS6', '')
weather

'InSight sol 174 (2019-05-24) low -101.1ºC (-149.9ºF) high -21.3ºC (-6.4ºF) winds from the SW at 4.3 m/s (9.6 mph) gusting to 16.3 m/s (36.5 mph) pressure at 7.50 hPapic'

## Mars Facts

In [20]:
facts_url = 'http://space-facts.com/mars/'

# Read with pandas
facts = pd.read_html(facts_url)
facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [22]:
#Extract first element of the list to gather the dataframe
facts_df = facts[0]
facts_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [23]:
facts_df = facts_df.rename(columns={0:"Description", 1:"Value"})
facts_df

Unnamed: 0,Description,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [27]:
# Set the index to the `Description` column without row indexing
facts_df.set_index("Description", inplace=True)
facts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [29]:
# Save the dataframe to render in html
facts_df_html=facts_df.to_html()

facts_df_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>

## Hemisphers titles and images

In [46]:
# url for hemispheres 
hemispheres_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemispheres_url)

In [47]:
# Create and parse the HTML Object 
html_hemispheres = browser.html
soup = BeautifulSoup(html_hemispheres, 'html.parser')

# Call all the div containing the image and the description
items = soup.find_all('div', class_='item')
hemisphere_image_urls=[]

In [48]:
for i in items: 
    hemisphere_title = i.find('h3').text
    
    # Store link that leads to full image website
    hemisphere_image = i.find('a', class_='itemLink product-item')['href']
    
    # Call the page were the jpg image is stored
    browser.visit('https://astrogeology.usgs.gov' + hemisphere_image)  
    big_img_html = browser.html
    soup = BeautifulSoup( big_img_html, 'html.parser')
    
    # Get the image
    jpg_image = 'https://astrogeology.usgs.gov' + soup.find('img', class_='wide-image').get('src')
    
    # Append the retreived information into a list of dictionaries 
    hemisphere_image_urls.append({"title" : hemisphere_title, "image" : jpg_image})
    

# Display hemisphere_image_urls
hemisphere_image_urls

[{'image': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'image': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [20]:
storage = {
        "news_title": news_title,
        "news_paragraph": news_paragraph,
        "featured_image": featured_image_url,
        "weather_data": weather,
        "mars_facts": facts_df_html,
        "hemispheres": hemisphere_image_urls
    }