# Dependencies

In [220]:
# Import Package
import time
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup as bs

In [221]:
# Chrome Driver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

# 1 - Web Scraping

## Instruction

### 1.1 - NASA Mars News
* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest) and collect the latest News Title and Paragraph Text. 

### 1.2 - Mars Space Images
* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
* Make sure to find the image url to the full size .jpg image.
* Make sure to save a complete url string for this image.

### 1.3 - Mars Weather
* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather.

### 1.4 - Mars Facts
* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
* Use Pandas to convert the data to a HTML table string.

### 1.5 - Mars Hemispheres
* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.
* click into each of the links to the hemispheres in order to find the image url to the full resolution image.
* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

## 1.1 - NASA Mars News

In [222]:
## Scraping URL
Mars_News_URL = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [223]:
## Find the latest news information include: news date, news title and news article. 

def get_mars_news(URL):
    try:
        
        browser.visit(URL)
        html_string = browser.html
        soup = bs(html_string, 'lxml')

        # Find the latest news content
        news_list = soup.body.find('ul',class_='item_list')
        latest_news = news_list.find('li',class_='slide')
        latest_news_content = latest_news.find('div',class_='list_text')

        # Find latest news Date, Title and Paragraph
        latest_news_date = latest_news_content.find('div',class_='list_date').text
        latest_news_title = latest_news_content.find('div',class_='content_title').text
        latest_news_article = latest_news_content.find('div',class_='article_teaser_body').text
        
        return {"news_date":latest_news_date,"news_title":latest_news_title,"news_article":latest_news_article}
        
    except:
        
        print("Web Scraping Fail")

In [224]:
print(get_mars_news(Mars_News_URL))

{'news_date': 'February 13, 2019', 'news_title': "Six Things to Know About NASA's Opportunity Rover", 'news_article': "Opportunity's mission is complete. Here are highlights from its time on Mars."}


## 1.2 - Mars Space Images

In [225]:
## Scraping URL
Mars_Image_URL = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

In [238]:
## Function to scrape the image URL from JPL page. 
def get_mars_image(URL):
    
    # Press the full image button
    browser.visit(URL)
    button = browser.find_by_id("full_image")
    button.click()
    time.sleep(1) 

    html_string = browser.html

    # Web scraping find the first image
    soup = bs(html_string, 'lxml')

    fancy_box = soup.find('div',id='fancybox-thumbs')
    image_object_list = fancy_box.find_all('a',class_='ready')
    
    image_select = 0
    count = 0
    
    for image_object in image_object_list: 
        
        try: 
        
            select_image_url = "https://www.jpl.nasa.gov" + image_object.img['src']          
            image_select = 1  
            return select_image_url
        
        except:   
            
            count = count + 1
            print(f"Try the {count}-th time, fail")
        
        if image_select == 1:     
            break       

In [239]:
print(get_mars_image(Mars_Image_URL))

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA20063_ip.jpg


## 1.3 - Mars Weather

In [228]:
## Scraping URL
Mars_Weather_URL = "https://twitter.com/marswxreport?lang=en"

In [229]:
## Function to scrape the Mars weather information. 
def get_mars_weather(URL):

    try:
        
        browser.visit(URL)
        html_string = browser.html
        soup = bs(html_string, 'lxml')
        
        weather_container_list = soup.find_all('li',class_='js-stream-item')
        
        for weather_container in weather_container_list: 
            
            weather_publisher_name = weather_container.find('strong',class_='fullname').text.strip()
            
            if weather_publisher_name == "Mars Weather":
        
                mars_weather = weather_container.find('div',class_='js-tweet-text-container').text.strip()
            
                break
        
        return mars_weather.split('pic')[0]
        
    except:
        
        print("Web Scraping Fail")

In [230]:
print(get_mars_weather(Mars_Weather_URL))

Sol 2319 (2019-02-13), high -17C/1F, low -72C/-97F, pressure at 8.12 hPa, daylight 06:46-18:52


## 1.4 - Mars Facts

In [231]:
## Scraping URL
Mars_Facts_URL = "https://space-facts.com/mars/"

In [232]:
## Function to scrape the Mars Facts information. 
def get_mars_facts(URL): 
    
    try:
        
        browser.visit(URL)
        html_string = browser.html
        soup = bs(html_string, 'lxml')

        mars_fact = {}
        table = soup.find('table',id='tablepress-mars')
        
        for table_row in table.find_all('tr'):
            
            table_row_value = table_row.find_all('td')
            
            key = table_row_value[0].text.strip().replace(':','')
            value = table_row_value[1].text.strip()
            
            mars_fact = {**mars_fact,**{key:value}}
            
        return mars_fact
    
    except:
        
        print("Web Scraping Fail")

In [233]:
print(get_mars_facts(Mars_Facts_URL))

{'Equatorial Diameter': '6,792 km', 'Polar Diameter': '6,752 km', 'Mass': '6.42 x 10^23 kg (10.7% Earth)', 'Moons': '2 (Phobos & Deimos)', 'Orbit Distance': '227,943,824 km (1.52 AU)', 'Orbit Period': '687 days (1.9 years)', 'Surface Temperature': '-153 to 20 °C', 'First Record': '2nd millennium BC', 'Recorded By': 'Egyptian astronomers'}


## 1.5 - Mars Hemispheres

In [234]:
## Scraping URL
Mars_Hemispheres_URL = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

In [235]:
## Function to scrape the images for each of Mar's hemispheres.
def get_mars_hemispheres(URL):
    
    mars_hemispheres = []
    
    try:
        browser.visit(URL)     
        html_string = browser.html
        soup = bs(html_string, 'lxml')
        
        image_group = soup.find('div',class_='collapsible results')
        
        image_object_list = image_group.find_all('div',class_='item')
        
        for image_object in image_object_list:
            
            image_title = image_object.find('h3').text.strip()
            
            # Extract full image
            full_image_url = 'https://astrogeology.usgs.gov'+ image_object.find('a')['href'] 
            browser.visit(full_image_url)

            full_image_html_string = browser.html
            full_image_soup = bs(full_image_html_string, 'lxml')
            
            image_url='https://astrogeology.usgs.gov' + str(full_image_soup.find('img','wide-image')['src'])
            
            mars_hemispheres.append({"image_title": image_title, "image_url": image_url})
            
            browser.back()
            
        return mars_hemispheres   
        
    except:
        
        print("Web Scraping Fail")

In [236]:
print(get_mars_hemispheres(Mars_Hemispheres_URL))

[{'image_title': 'Cerberus Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'image_title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'image_title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'image_title': 'Valles Marineris Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
