In [11]:
from bs4 import BeautifulSoup as bs 
import pandas as pd 
import requests 
import time

# import splinter and set the chromedriver path
from splinter import Browser 


### Define useful functions
* visit_scrape_soup - Visit the url and scrape into soup
* scrape_hemisphere - Visit the USGS Astrogeology site to obtain high resolution images for the requested Mars hemisphere.

In [12]:
sleeptime = 0.5
def visit_scrape_soup(b, url):
    # visit the url and scrape into soup
    b.visit(url)
    time.sleep(sleeptime)
    # scrape the browser into soup
    html = b.html
    return bs(html, 'lxml')
# def visit_scrape_soup(b, url)

def scrape_hemisphere(b, num):
    # Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
    
    # design an xpath selector to grab the hemisphere images 
    xpath = '//div[@class="collapsible results"]/div[@class="item"]/a/img'
    
     # Find links to hemisphere image thumbnails and click on the requested one
    hresults = b.find_by_xpath(xpath)
    hresults[num].click()
    time.sleep(sleeptime)

    # Scrape the browser into soup
    html = b.html
    soup = bs(html, 'lxml')

    # Save title and url in dict
    imgdict = dict()
    imgdict["title"] = soup.find("h2", class_="title").text.strip("Enhanced").strip()
    imgdict["url"] = soup.find("div", class_="downloads").ul.li.a["href"]

    # Go back to previous page
    b.back()
    return imgdict
# def scrape_hemisphere(b, num) 

# NASA Mars News 
* Scraped the [NASA Mars News Site](https://mars.nasa.gov/news/) and collected the latest News Title and Paragragh Text.

In [13]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless= False)

In [14]:
# Visit the URL and scrape
mnewsurl = "https://mars.nasa.gov/news/"
nsoup = visit_scrape_soup(browser, mnewsurl)

# Find the first news title and paragraph
news = nsoup.find("li", class_="slide")
news_title = news.find('div', class_='content_title').text
news_p = news.find('div', class_='rollover_description_inner').text

print(f'{news_title} : \n{news_p}')

InSight Is the Newest Mars Weather Service : 
By collecting data around the clock, NASA's lander will provide unique science about the Martian surface.


### Mars Space Images - Featured Image
* Visited the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
* Used splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.

* Made sure to find the image url to the full size .jpg image.

* Made sure to save a complete url string for this image.

In [15]:
jplurl = "https://www.jpl.nasa.gov"
# Visit the URL and scrape
img_search_url = f"{jplurl}/spaceimages/?search=&category=Mars"
imgsoup = visit_scrape_soup(browser, img_search_url)

# Find path to wallpaper size image of the current Featured Mars Image
imgitem = imgsoup.find("article", class_="carousel_item")
imgpath = imgitem['style'].split("'")[1]
imgurl = f"{jplurl}{imgpath}"

print(imgurl)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA18289-1920x1200.jpg


## Mars Weather
* Visit the Mars Weather twitter account and scrape the latest Mars weather tweet from the page.

In [16]:
# Visit the URL
wurl = "https://twitter.com/marswxreport?lang=en"
wsoup = visit_scrape_soup(browser, wurl)

# Get list of tweets
tlist = wsoup.find_all("li", class_="js-stream-item")
wtext = None
wkeywords = {'Sol', 'pressure', 'daylight'}

w_text = []
for i in tlist:
    mwtext = t.find(class_="tweet-text").get_text()
    w_text.append(mwtext)
    
print(w_text[0])

# Loop through and find the most recent weather tweet
# for t in tlist:
#     if t.div["data-screen-name"] == "MarsWxReport":
#         mwtext = t.find(class_="tweet-text").a.previousSibling
#         if wkeywords.issubset(set(mwtext.split())):
#             wtext = mwtext
#             break

# print(wtext)

InSight sol 80 (2019-02-16), high -16/3F, low -95/-139F, pressure at 7.23hPa, winds from the WNW at 10.7 mph gusting to 32.3 mph



## Mars Facts
* Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [31]:
# Use Panda's `read_html` to parse the url
furl = "http://space-facts.com/mars/"
ftables = pd.read_html(furl)

# Get the table dataframe and update column names
fdf = ftables[0]
fdf.columns = ['Parameter', 'Value']

# Use to_html to generate HTML tables from dataframe.
fhtml = fdf.to_html(index=False,
                    justify="center",
                    classes="table table-striped table-hover table-dark table-bordered table-sm")

# Strip unwanted newlines to clean up the table.
fhtml = fhtml.replace('\n', '')

# print(fhtml)
fdf

Unnamed: 0,Parameter,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [20]:
# url = "http://space-facts.com/mars/"

In [29]:
# df = pd.read_html(furl)[0]
# df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [30]:
# df.columns = ["descr", "value"]
# df.set_index("descr", inplace=True)
# df

Unnamed: 0_level_0,value
descr,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [None]:
# df.to_html(class)

## Mars Hemispheres
* Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.

In [19]:
hurl = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
# Visit the URL
browser.visit(hurl)
time.sleep(sleeptime)

# list of dicts to save results
himglist = list()
hnum = 0
while hnum < 4:
    himglist.append(scrape_hemisphere(browser, hnum))
    hnum += 1

print(himglist)

[{'title': 'Cerberus Hemisphere', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere', 'url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [32]:
# Close the browser after scraping
browser.quit()