In [63]:
import pymongo
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests
import time

## Splinter scraping I don't like how this works. Especially with the browser loadtime

In [64]:
def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=True)


In [65]:
def scrape():
    browser = init_browser()
    listings = {}

    url = "https://mars.nasa.gov/news/"
    browser.visit(url)

    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    
    time.sleep(5)

    listings["headline"] = soup.find("div", class_="content_title").get_text()
    listings["teaser"] = soup.find("div", class_="rollover_description_inner").get_text()
    
    return listings


In [66]:
scrape()

{'headline': '\n\nNASA Announces Landing Site for Mars 2020 Rover\n\n',
 'teaser': '\nAfter a five-year search, NASA has chosen Jezero Crater as the landing site for its upcoming Mars 2020 rover mission.\n'}

## Requests scraping of mars.nasa.gov


In [67]:
# I like this one better because I don't have to wait for the browser to open/load

# this user agent is necessary as the python version does not have these classes
request_headers={"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"}

def mars_nasa_scrape():
    url="https://mars.nasa.gov/news/"
    listings={}
    response=requests.get(url, headers=request_headers)
    soup_requests = BeautifulSoup(response.text, 'lxml')
    listings["headline"] =soup_requests.find("div", class_="content_title").get_text().strip()
    listings["teaser"] = soup_requests.find("div", class_="rollover_description_inner").get_text().strip()
    return listings

mars_nasa_scrape()

{'headline': 'Why This Martian Full Moon Looks Like Candy',
 'teaser': "For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera."}

## JPL Images

In [68]:
# I looked at the full size one and feel that they're too big, so I'm doing the medium size. If I chose to do the full size, 
# I'd just take the ID from the url and substitute it in https://photojournal.jpl.nasa.gov/jpeg/PIA19631.jpg
def jpl_image_scrape():
    browser = init_browser()
    url="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    JPL_img=soup.find("a", id="full_image")    
    url_path=JPL_img['data-fancybox-href']
    full_url=f"https://www.jpl.nasa.gov{url_path}"
    return full_url



In [69]:
jpl_image_scrape()

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18811_ip.jpg'

## Twitter Weather Scraping

In [70]:

def weather_scrape():
    url="https://twitter.com/marswxreport?lang=en"
    response=requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    weather=soup.find("p",class_="tweet-text").get_text()
    weather=weather.split("pic.twitter.com")[0]
    weather=weather.replace("\n", " ")
    return weather

weather_scrape()

'InSight sol 167 (2019-05-17) low -100.5ºC (-148.9ºF) high -20.4ºC (-4.6ºF) winds from the SW at 4.7 m/s (10.6 mph) gusting to 13.5 m/s (30.3 mph) pressure at 7.50 hPa'

## Scrape mars facts

In [71]:
pandas_url="https://space-facts.com/mars/"

mars_facts=pd.read_html(pandas_url)[0]
mars_facts.columns=["Description", "Value"]
mars_facts.set_index("Description", inplace=True)
mars_facts

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


## Mars Hemisphere

In [56]:

hemisphere_image_urls = [
    {"title": "Valles Marineris Hemisphere", "img_url": "https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg"},
    {"title": "Cerberus Hemisphere", "img_url": "https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg"},
    {"title": "Schiaparelli Hemisphere", "img_url": "https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg"},
    {"title": "Syrtis Major Hemisphere", "img_url": "https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg"},
]

