## HW#13 Web Scraping and Document Databases
## Mission to Mars
### Due February 25, 2018
### Step 1 - Web Scraping and Analysis

In [2]:
#Import dependencies
from bs4 import BeautifulSoup
from splinter import Browser
import pandas as pd
import requests
import time
import re

### NASA Mars News

In [3]:
#Retrieve page with the requests module
browser = Browser('chrome', headless=False) 
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html=browser.html

In [4]:
#Create a BeautifulSoup object and parse html
soup = BeautifulSoup(html, 'html.parser')

#Extract latest news title and paragraph
#print(soup.prettify())

In [5]:
news_title= soup.find('div', class_='content_title').get_text()
print(news_title)
news_paragraph=soup.find('div', class_='rollover_description_inner').get_text()
print(news_paragraph)

NASA InSight Mission to Mars Arrives at Launch Site
NASA's InSight spacecraft has arrived at Vandenberg Air Force Base in central California to begin final preparations for a launch this May.


### JPL Mars Space Images

In [6]:
#Use splinter to retrieve featured image from JPL page
executable_path = {'executable_path': '/usr/local/bin/chromedriver'} 
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

#Click through the pages to reach the link containing the high res jpg image
time.sleep(2)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(2)
browser.click_link_by_partial_text('more info')
time.sleep(2)
browser.click_link_by_partial_text('.jpg')

In [7]:
#Retrieve image url
html=browser.html
soup=BeautifulSoup(html,'html.parser')
#print(soup.prettify())
featured_image_url=soup.find('img').get('src')
print(featured_image_url)

https://photojournal.jpl.nasa.gov/jpeg/PIA18886.jpg


### Mars Weather

In [8]:
#Scrape the Mars Weather Twitter page. Save latest weather tweet text as variable
url='https://twitter.com/marswxreport?lang=en'
html=requests.get(url)
soup = BeautifulSoup(html.text, 'html.parser')
#print(soup.prettify())
mars_weather=soup.find_all(string=re.compile("Sol"), class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")[0].text
mars_weather

'Sol 1978 (Feb 28, 2018), Sunny, high -14C/6F, low -78C/-108F, pressure at 7.27 hPa, daylight 05:37-17:25'

### Mars Facts

In [9]:
#Scrape HTML Mars Planet Profile
url='https://space-facts.com/mars/'
table=pd.read_html(url)
#table
#type(table)
table_df=table[0]
table_df=table_df.rename(columns={0:'Mars Planet Profile', 1: ''})
table_df.set_index('Mars Planet Profile', inplace=True)
table_df

Mars Planet Profile,Unnamed: 1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
#Convert table to html table
table=table_df.to_html()
#table

facts_table=table.replace('\n','')
#facts_table

table_df.to_html('facts_table.html')
#!open facts_table.html

In [20]:
# facts_table.html.replace('/n',"")
    #Strip items from html table
soup=BeautifulSoup(open('facts_table.html'), 'html.parser')
table_description=[]
table_values={}
for item in soup.table('tr'):
    #print(item.text)
    table_description.append(item.text.strip(":"))
table_values=dict([(k,v) for k,v in zip (table_description[::2], table_description[1::2])])
print (table_values)
#print(table_description)

{'\n\n\n': '\nMars Planet Profile\n\n', '\nEquatorial Diameter:\n6,792 km\n': '\nPolar Diameter:\n6,752 km\n', '\nMass:\n6.42 x 10^23 kg (10.7% Earth)\n': '\nMoons:\n2 (Phobos & Deimos)\n', '\nOrbit Distance:\n227,943,824 km (1.52 AU)\n': '\nOrbit Period:\n687 days (1.9 years)\n', '\nSurface Temperature:\n-153 to 20 °C\n': '\nFirst Record:\n2nd millennium BC\n'}


### Mars Hemispheres

In [18]:
#Scrape Mars's hemispheres images from USGS Astrogeology site
executable_path = {'executable_path': '/usr/local/bin/chromedriver'} 
browser = Browser('chrome', **executable_path, headless=False)
base_url='https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
# landing_url ='https://astrogeology.usgs.gov'
#url="https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced"
browser.visit(base_url)

html=browser.html
soup = BeautifulSoup(html, 'html.parser')

In [19]:
#Create a dictionary containing hemisphere titles and images urls
hemisphere_image_urls=[]
hemisphere_dict={'title':[], 'img_url':[]}
x=soup.find_all('h3')
#print(x)

for i in x:
    y=i.get_text()
    title=y.strip('Enhanced')
    browser.click_link_by_partial_text(y)
    url=browser.find_link_by_partial_href('download')['href']
    hemisphere_dict={'title':title, 'img_url':url}
    hemisphere_image_urls.append(hemisphere_dict)
    browser.visit(base_url)
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere ', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]
