# Mars Web Scraping Project (using Selenium, MongoDB and Flask)

In [178]:
# Dependencies
from bs4 import BeautifulSoup as bs
import requests
import pymongo
from splinter import Browser
import pandas as pd

#Selenium
executable_path = {'executable_path':'C:/ChromeDriver/chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

#PyMongo
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

# Latest Mars News from Nasa

In [122]:
#Use this url as the scraping target
nasa_news_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

#Use url client
nasaresponse = requests.get(nasa_news_url)

#Just checking to see that it works
#print(nasaresponse)

In [123]:
#Make some Nasa news soup, Mix can contents with 1 can of water
nasa_news_soup = bs(nasaresponse.text, 'html.parser')
#Response for viewing
#print(nasa_news_soup.prettify())

In [124]:
#Getting the list element closest to the p value and storing in a variable typically we would want to get the list, but since
#the project is just calling for the first we can get away with .find instead of .find_all
#Selecting text and stripping it
news_p = nasa_news_soup.find("div", class_="rollover_description_inner").text.strip()
news_p

'Nominees include four JPL projects: the solar system and climate websites, InSight social media, and a 360-degree Earth video. Public voting closes April 18, 2019.'

In [125]:
#Getting the list element closes to the tile and storing in a variable, again we can get away with .find
#Selecting text and stripping it
news_title = nasa_news_soup.find("div", class_="content_title").text.strip()
news_title

'NASA Garners 7 Webby Award Nominations'

#  JPL Featured Mars Image

In [137]:
#Use this url as the scraping target
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars#submit"

browser.visit(jpl_url)
html = browser.html
jpl_soup = bs(html, 'html.parser')
#jpl_soup

In [128]:
#looking click 'FULL IMAGE' and then 'more info'
browser.click_link_by_partial_text("FULL IMAGE")
browser.click_link_by_partial_text("more info")

In [130]:
#Feed HTMl and Parse
html = browser.html
jpl_feature_soup = bs(html, 'html.parser')
jpl_feature_image_links = jpl_feature_soup.find_all('div', class_='download_tiff')

In [138]:
#Empty list to store output of for loop
link_list = []

#Getting all links
for item in jpl_feature_image_links:
    link = item.find('a')['href']
    link_list.append(link)

In [139]:
#Getting the second item and appending to the first part of the url
jpgend = link_list[1]
featured_image_url = ('https:' + jpgend)
#Output
featured_image_url

'https://photojournal.jpl.nasa.gov/jpeg/PIA17924.jpg'

# Latest Mars Weather Report - Twitter

In [140]:
#TWITTER

In [141]:
#Acquiring scrape target
mars_twitter = 'https://twitter.com/marswxreport?lang=en'

#Use url client
twitterresponse = requests.get(mars_twitter)
#twitterresponse

In [142]:
#Make some twitter weather soup
mars_weather_parsed = bs(twitterresponse.text, 'html.parser')

In [143]:
#selecting the top tweet and stripping text
mars_weather = mars_weather_parsed.find('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text.strip()

In [144]:
#Output
mars_weather

'InSight sol 140 (2019-04-19) low -98.6ºC (-145.5ºF) high -18.0ºC (-0.4ºF)\nwinds from the W at 4.0 m/s (8.9 mph) gusting to 14.2 m/s (31.8 mph)\npressure at 7.40 hPapic.twitter.com/4YBCvCijXM'

# Mars Facts from Space-Facts.com

In [173]:
#MARS FACTS

In [174]:
#Acquiring scraping target
mars_facts_url = 'https://space-facts.com/mars/'
factstable = pd.read_html(mars_facts_url)
#reference first item in list
factstable_ = factstable[0]

In [177]:
#df to html
mars_facts = factstable_.to_html('mars_facts.html', index=False, header=False)

# Mars Hemisphere Images from USGS.gov

In [152]:
#HEMISPHERES

In [157]:
#ACQUIRING TARGET URL
mars_hemisphere_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(mars_hemisphere_url)
html = browser.html
hemisphere_soup = bs(html, 'html.parser')
#hemisphere_soup

In [158]:
hemisphere_results = hemisphere_soup.find_all("div", class_="description")
#hemisphere_results

In [163]:
#Holding titles and the links to be used to gather the full image url
title = []
link_list = []
for result in hemisphere_results:
    title_text = result.find('h3').text.strip()
    link = result.a['href']
    
    title.append(title_text)
    link_list.append(link)
title
#link_list

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [161]:
img_url = []
i = 0
j = 0
while i < 4:
    page = 'https://astrogeology.usgs.gov' + link_list[j]
    browser.visit(page)
    html = browser.html
    hemisphere_page_soup = bs(html, 'html.parser')
    hemisphere_downloads = hemisphere_page_soup.find("div", class_='downloads')
    hemisphere_url = hemisphere_downloads.ul.a['href']
    img_url.append(hemisphere_url)
    i+=1
    j+=1
img_url

['http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [172]:
#Create a list of dictionaries based on title_list and img_url
dict0 = dict({ "title" : title[0], "img_url": img_url[0]})
dict1 = dict({ "title" : title[1], "img_url": img_url[1]})
dict2 = dict({ "title" : title[2], "img_url": img_url[2]})
dict3 = dict({ "title" : title[3], "img_url": img_url[3]})

hemisphere_image_urls = [dict0,dict1,dict2,dict3]
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

# Store Scraped Data in MongoDB