# Mission to Mars

In [1]:
# Dependencies
import pandas as pd
import time as time
from splinter import Browser
from bs4 import BeautifulSoup
from pprint import pprint

In [2]:
# Initialize the chrome browser
# @NOTE: Replace the path with your actual path to the chromedriver
executable_path = {"executable_path": "chrome_driver\chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

# This delay varies based on the user's internet speed and how fast the JavaScript loads on the destination page
# 5 seems to work every time.  3 doesn't work at GT consistently.
sleep_delay = 5

In [22]:
# Function for navigating to a web page (we'll reuse this several times)
def init_page(url):
    # url = "https://mars.nasa.gov/news/"
    browser.visit(url)
    time.sleep(sleep_delay)

    html = browser.html
    ret_soup = BeautifulSoup(html, "html.parser")
    return ret_soup

# Function for following links on pages
def click_link(button_text):
    browser.click_link_by_partial_text(button_text)
    time.sleep(sleep_delay)
    
    html = browser.html
    ret_soup = BeautifulSoup(html, "html.parser")
    return ret_soup

# Function cleans up text by removing "\", removing " Enhanced" and replacing " with '
def clean_text(text_to_clean):
    cleaned_text = text_to_clean.replace("\'", "'")
    cleaned_text = cleaned_text.replace('"', "'")
    cleaned_text = cleaned_text.replace(' Enhanced', "")
    cleaned_text = cleaned_text.replace("\n", "")
    
    return cleaned_text

In [4]:
soup = init_page("https://mars.nasa.gov/news/")

news_title = clean_text(soup.find_all("div", class_="content_title")[1].find("a").text)
print(news_title)

news_p = clean_text(soup.find_all("div", class_="image_and_description_container")[0].find("div", class_="article_teaser_body").text)
pprint(news_p)

Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover
("NASA chose a seventh-grader from Virginia as winner of the agency's 'Name "
 "the Rover' essay contest. Alexander Mather's entry for 'Perseverance' was "
 'voted tops among 28,000 entries. ')


In [5]:
base_url = "https://www.jpl.nasa.gov"
url = base_url + "/spaceimages/?search=&category=Mars"
soup = init_page(url)
soup = click_link("FULL IMAGE")
soup = click_link("more info")



In [6]:
featured_image_url = soup.find("figure", class_="lede").find("a")["href"]
featured_image_url = base_url + featured_image_url

print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17900_hires.jpg


In [23]:
soup = init_page("https://twitter.com/marswxreport?lang=en")
for i in range(500):
    mars_weather = soup.find_all("span", class_="css-901oao css-16my406 r-1qd0xha r-ad9z0x r-bcqeeo r-qvutc0")[i].text
    if "InSight sol" in mars_weather:
        break
    # end if
# end for

# Remove the "\n" characters
mars_weather = clean_text(mars_weather)

pprint(mars_weather)

('InSight sol 455 (2020-03-08) low -95.4ºC (-139.8ºF) high -13.0ºC '
 '(8.5ºF)winds from the SSE at 6.0 m/s (13.5 mph) gusting to 20.7 m/s (46.2 '
 'mph)pressure at 6.40 hPa')


In [24]:
soup = init_page("https://space-facts.com/mars/")
mars_table = str(soup.find("table", class_="tablepress tablepress-id-p-mars"))
pprint(mars_table)

('<table class="tablepress tablepress-id-p-mars" '
 'id="tablepress-p-mars"><tbody><tr class="row-1 odd"><td '
 'class="column-1"><strong>Equatorial Diameter:</strong></td><td '
 'class="column-2">6,792 km<br/></td></tr><tr class="row-2 even"><td '
 'class="column-1"><strong>Polar Diameter:</strong></td><td '
 'class="column-2">6,752 km<br/></td></tr><tr class="row-3 odd"><td '
 'class="column-1"><strong>Mass:</strong></td><td class="column-2">6.39 × '
 '10^23 kg<br/> (0.11 Earths)</td></tr><tr class="row-4 even"><td '
 'class="column-1"><strong>Moons:</strong></td><td class="column-2">2 (<a '
 'href="https://space-facts.com/moons/phobos/">Phobos</a> &amp; <a '
 'href="https://space-facts.com/moons/deimos/">Deimos</a>)</td></tr><tr '
 'class="row-5 odd"><td class="column-1"><strong>Orbit '
 'Distance:</strong></td><td class="column-2">227,943,824 km<br/> (1.38 '
 'AU)</td></tr><tr class="row-6 even"><td class="column-1"><strong>Orbit '
 'Period:</strong></td><td class="column-2">687 da

In [25]:
hemisphere_image_urls = []
base_url = "https://astrogeology.usgs.gov"

for i in range(4):
    soup = init_page("https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars")
    title = soup.find("div", class_="collapsible results").find_all("h3")[i].text
    
    soup = click_link(title)
    img_url = base_url + soup.find("img", class_="wide-image")["src"]
    title = clean_text(title)
    
    hemisphere_image_urls.append({"title": title, "img_url": img_url})

pprint(hemisphere_image_urls)

[{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
  'title': 'Valles Marineris Hemisphere'}]


In [33]:
# Create a dictionary that contains all of our return values
dict_mars = {
    "news_title": news_title,
    "news_p": news_p,
    "mars_weather": mars_weather,
    "featured_image_url": featured_image_url,
    "hemisphere_image_urls": hemisphere_image_urls,
    "mars_table": mars_table}

pprint(dict_mars)

{'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17900_hires.jpg',
 'hemisphere_image_urls': [{'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg',
                            'title': 'Cerberus Hemisphere'},
                           {'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg',
                            'title': 'Schiaparelli Hemisphere'},
                           {'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg',
                            'title': 'Syrtis Major Hemisphere'},
                           {'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg',
                            'title': 'Valles Marineris Hemisphere'}],
 'mars_table': '<table class

In [45]:
# pprint(dict_mars["news_title"])
# pprint(dict_mars["news_p"])
# pprint(dict_mars["featured_image_url"])
# pprint(dict_mars["mars_weather"])
# pprint(dict_mars["mars_table"])
# pprint(dict_mars["hemisphere_image_urls"][0]["title"])
# pprint(dict_mars["hemisphere_image_urls"][0]["img_url"])
# pprint(dict_mars["hemisphere_image_urls"][1]["title"])
# pprint(dict_mars["hemisphere_image_urls"][1]["img_url"])
# pprint(dict_mars["hemisphere_image_urls"][2]["title"])
# pprint(dict_mars["hemisphere_image_urls"][2]["img_url"])
# pprint(dict_mars["hemisphere_image_urls"][3]["title"])
# pprint(dict_mars["hemisphere_image_urls"][3]["img_url"])

'Cerberus Hemisphere'
'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'
'Schiaparelli Hemisphere'
'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'
'Syrtis Major Hemisphere'
'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'
'Valles Marineris Hemisphere'
'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'


In [46]:
# import pymongo

In [47]:
# # Initialize PyMongo to work with MongoDBs
# conn = 'mongodb://localhost:27017'
# client = pymongo.MongoClient(conn)

# # Define database and collection
# db = client.mars_db

# # Refresh database (i.e. drop table/collection)
# db.mars_data.drop()

# # Re-create the collection
# mars_collection = db.mars_data
# mars_collection.insert_one(dict_mars)

<pymongo.results.InsertOneResult at 0x25bc67cce88>

In [53]:
# conn = 'mongodb://localhost:27017'
# client = pymongo.MongoClient(conn)
# # Define database and collection
# db = client.mars_db

# # Re-create the collection
# mars_collection = db.mars_data.find()

In [55]:
# pprint(mars_collection[0]["news_title"])

"Virginia Middle School Student Earns Honor of Naming NASA's Next Mars Rover"
