In [1]:
#import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
from selenium import webdriver

import requests
import pymongo

In [2]:
#Start the connection to a non-relational database
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

#set up the database
db = client.mars_db
collection = db.mars_info

In [3]:
#URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
#Retrieve the page with the requests and then use Soup to parse on 'lxml'
response = requests.get(url)
soup = bs(response.text, 'lxml')

In [4]:
#get the paragraph
news_p = soup.find(class_= 'rollover_description_inner').text
print(news_p)


For the first time, NASA's Mars Odyssey orbiter has caught the Martian moon Phobos during a full moon phase. Each color in this new image represents a temperature range detected by Odyssey's infrared camera.



In [5]:
#get the title of the paragraph
news_title = soup.find(class_= 'content_title').text
print(news_title)



Why This Martian Full Moon Looks Like Candy




In [6]:
#Setting up the webdriver to navigate the site and find the image 

executable_path = {"executable_path" : "/usr/local/bin/chromedriver"}
browser= Browser("chrome", **executable_path, headless=False)


In [7]:
#Website to visit for the image using splinter
img_url ='https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

In [8]:
#get the image. 
image_resp = requests.get(img_url)
image_soup = bs(image_resp.text, 'lxml')
image = image_soup.find('li', class_ = 'slide')
featured_img_url = "https://www.jpl.nasa.gov" + image.a['data-fancybox-href']


In [9]:
print(featured_img_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23225_hires.jpg


In [10]:
#Scraping Twitter
twitter_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(twitter_url)

html = browser.html
soup = bs(html, "html.parser")

In [11]:
#Print Mars weather
mars_weather = soup.find(class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
print(mars_weather)

InSight sol 170 (2019-05-20) low -100.9ºC (-149.6ºF) high -21.2ºC (-6.1ºF)
winds from the SW at 4.7 m/s (10.5 mph) gusting to 14.6 m/s (32.6 mph)
pressure at 7.50 hPapic.twitter.com/md3upbYmBI


In [12]:
#Scrape a table of facts from the spacefacts website
facts_url = 'https://space-facts.com/mars/'

tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [13]:
#Make the tables into a dataframe
facts_df = tables[0]
facts_df.columns = ["Feature", "Description"]
facts_df.head()

Unnamed: 0,Feature,Description
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"


In [14]:
#convert to an html string
html_table = facts_df.to_html()

In [15]:
#visit the site using splinter for the hemispheres
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#list of hemi names
h_list = ['Cerberus', 'Schiaparelli', 'Syrtis', 'Valles']
#List for Hemi urls
hemisphere_img_urls = []

#open browser
browser.visit(hemi_url)

#for loop to grab the hemisphere stuff
for hemi in h_list:
    browser.click_link_by_partial_text(hemi)
    h_html = browser.html
    hemi_soup = bs(h_html, 'html.parser')
    url = hemi_soup.find('div', class_='downloads').ul.li.a['href']
    name = hemi_soup.title.text.partition(' Enhanced')[0]
    hemisphere_img_urls.append({'title':name, "img_url": url})
    browser.back() 

#close the browser
browser.quit()


In [16]:
hemisphere_img_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [17]:
###for flask look up flask pymongo

In [18]:
 mars_data = {
        "News_title": news_title,
        "News_paragraph": news_p,
        "JPL_featured_img": featured_img_url,
        "mars_weather": mars_weather,
        "mars_facts": html_table,
        "Cerberus": hemisphere_img_urls[0],
        "Schiaparelli": hemisphere_img_urls[1],
        "Syrtis Major": hemisphere_img_urls[2],
        "Valles Marineris": hemisphere_img_urls[3]
        }

In [19]:
mongo = collection.insert_many([
        {"News_title": news_title},
        {"News_paragraph": news_p},
        {"JPL_featured_img": featured_img_url},
        {"mars_weather": mars_weather},
        {"mars_facts": html_table},
        {"Cerberus": hemisphere_img_urls[0]},
        {"Schiaparelli": hemisphere_img_urls[1]},
        {"Syrtis Major": hemisphere_img_urls[2]},
        {"Valles Marineris": hemisphere_img_urls}[3]])

In [20]:
print(mongo)

<pymongo.results.InsertManyResult object at 0x1240339c8>
