# Import Dependencies

In [1]:
from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser
from datetime import datetime
import json
import time
import pymongo

# Create Funtion to Web Scrape Individual Pages

### Function 1: Mars Exploration Program News

In [2]:
def scrape_URL_1():
    #Leverage Splinter to Web Scrape
    executable_path = {'executable_path': r"/home/bdr/Desktop/chromedriver"}
    browser = Browser('chrome', **executable_path, headless=True)
    
    #*** URL 1: NASA Mars News ****
    url = 'https://mars.nasa.gov/news/'
    browser.visit(url)
    time.sleep(4)

    #Parse HTML
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
    titles = soup.find_all(class_="content_title")
    texts = soup.find_all(class_="article_teaser_body")
    
    # Close Browser
    browser.quit()
    
    # Filter for First Title with a Link
    firstTitle = "" 
    for title in titles:
        if title.a:
            firstTitle = title
            break
    
    # Assign Target Elements to Variables
    newsTitle = firstTitle.a.text.strip()
    newsLink = "https://mars.nasa.gov" + firstTitle.a['href']
    newsText = texts[0].text.strip()
    
    # Create Dictionary with Target Information
    rtnDict = {
        "newsTitle": newsTitle,
        "newsLink": newsLink,
        "newsText": newsText
    }
    
    return rtnDict

In [3]:
mars_news = scrape_URL_1()
mars_news

{'newsTitle': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface",
 'newsLink': 'https://mars.nasa.gov/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/',
 'newsText': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.'}

### Function 2: JPL Image

In [4]:
def scrape_URL_2():
    #Leverage Splinter to Web Scrape
    executable_path = {'executable_path': r"/home/bdr/Desktop/chromedriver"}
    browser = Browser('chrome', **executable_path, headless=True)
    
    #*** URL 2: JPL Mars Space Images ****
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)
    time.sleep(4)
    
    #Parse HTML
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
 
    # Close Browser
    browser.quit()

    #Capture Image
    images = soup.find_all(class_="carousel_item")
    imageURL = 'https://www.jpl.nasa.gov' + images[0]["style"].split(" ")[1].split("'")[1]
    
    # Create Dictionary with Target Information
    rtnDict = {
        "featureImageURL": imageURL
    }
    
    return rtnDict

In [5]:
jpl_image = scrape_URL_2()
jpl_image

{'featureImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA12826-1920x1200.jpg'}

### Function 3: Mars Weather

In [6]:
def scrape_URL_3():
    #Leverage Splinter to Web Scrape
    executable_path = {'executable_path': r"/home/bdr/Desktop/chromedriver"}
    browser = Browser('chrome', **executable_path, headless=True)
    
    #*** URL 3: Mars Weather ****
    url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(url)
    time.sleep(4)
    
    #Parse HTML
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
    
    # Close Browser
    browser.quit()    
    
    #Capture Weather
    allTweets_Maybe = soup.find_all("span")
    tweetText = ""
    for tweet in allTweets_Maybe:
        if tweet.text:
            if "InSight sol" in tweet.text:
                tweetText = tweet.text
                break
                
    allLinks_Maybe = soup.find_all("a")
    tweetLink = ""
    for link in allLinks_Maybe:
        if link['href']:
            if "status" in link["href"]:
                tweetLink = "https://www.twitter.com" + link["href"]
                break
 

    
    # Create Dictionary with Target Information
    rtnDict = {
        "tweetWeatherURL": tweetLink,
        "tweetWeatherText": tweetText
    }
    
    return rtnDict

In [7]:
mars_weather = scrape_URL_3()
mars_weather

{'tweetWeatherURL': 'https://www.twitter.com/MarsWxReport/status/1277283412705972226',
 'tweetWeatherText': 'InSight sol 564 (2020-06-28) low -89.3ºC (-128.8ºF) high -4.3ºC (24.2ºF)\nwinds from the SSW at 5.0 m/s (11.3 mph) gusting to 15.4 m/s (34.4 mph)\npressure at 7.70 hPa'}

### Function 4: Mars Facts

In [8]:
def scrape_URL_4():
    #Leverage Splinter to Web Scrape
    executable_path = {'executable_path': r"/home/bdr/Desktop/chromedriver"}
    browser = Browser('chrome', **executable_path, headless=True)
    
    #*** URL 4: Mars Facts ****
    url = 'https://space-facts.com/mars/'
    browser.visit(url)
    time.sleep(4)
    
    #Parse HTML
    #Returns List of Tables: Target Table is the First
    html = browser.html
    dfs = pd.read_html(html)
    stats = dfs[0]
    stats.columns = ["Attribute", "Value"]
    
    #JSONify Table
    data_stats = json.loads(stats.to_json(orient="records"))
    
    #Close Browser
    browser.quit()    
 

    # Create Dictionary with Target Information
    rtnDict = {
        "marsStats": data_stats,
        "dateScraped": datetime.now()
    }
    
    return rtnDict

In [9]:
mars_facts = scrape_URL_4()
mars_facts

{'marsStats': [{'Attribute': 'Equatorial Diameter:', 'Value': '6,792 km'},
  {'Attribute': 'Polar Diameter:', 'Value': '6,752 km'},
  {'Attribute': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
  {'Attribute': 'Moons:', 'Value': '2 (Phobos & Deimos)'},
  {'Attribute': 'Orbit Distance:', 'Value': '227,943,824 km (1.38 AU)'},
  {'Attribute': 'Orbit Period:', 'Value': '687 days (1.9 years)'},
  {'Attribute': 'Surface Temperature:', 'Value': '-87 to -5 °C'},
  {'Attribute': 'First Record:', 'Value': '2nd millennium BC'},
  {'Attribute': 'Recorded By:', 'Value': 'Egyptian astronomers'}],
 'dateScraped': datetime.datetime(2020, 6, 28, 22, 37, 44, 43735)}

# Merge Dictionaries

In [10]:
def Merge(dict1, dict2, dict3, dict4): 

    merged_dict = {**dict1, **dict2, **dict3, **dict4} 

    return merged_dict

In [11]:
mars_info = Merge(mars_news, jpl_image, mars_weather, mars_facts)
mars_info

{'newsTitle': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface",
 'newsLink': 'https://mars.nasa.gov/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/',
 'newsText': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.',
 'featureImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA12826-1920x1200.jpg',
 'tweetWeatherURL': 'https://www.twitter.com/MarsWxReport/status/1277283412705972226',
 'tweetWeatherText': 'InSight sol 564 (2020-06-28) low -89.3ºC (-128.8ºF) high -4.3ºC (24.2ºF)\nwinds from the SSW at 5.0 m/s (11.3 mph) gusting to 15.4 m/s (34.4 mph)\npressure at 7.70 hPa',
 'marsStats': [{'Attribute': 'Equatorial Diameter:', 'Value': '6,792 km'},
  {'Attribute': 'Polar Diameter:', 'Value': '6,752 km'},
  {'Attribute': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
  {'Attribute': 'Moons:', 'Value': '

# Web Scrape Function

### Leveraging individual functions

In [12]:
def webSrapeMars():
    mars_news = scrape_URL_1()
    jpl_image = scrape_URL_2()
    mars_weather = scrape_URL_3()
    mars_facts = scrape_URL_4()
    
    #consolidate individual dictionaries
    mars_info = Merge(mars_news, jpl_image, mars_weather, mars_facts)
    
    return mars_info

In [13]:
full_scrape = webSrapeMars()
full_scrape

{'newsTitle': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface",
 'newsLink': 'https://mars.nasa.gov/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/',
 'newsText': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.',
 'featureImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA12826-1920x1200.jpg',
 'tweetWeatherURL': 'https://www.twitter.com/MarsWxReport/status/1277283412705972226',
 'tweetWeatherText': 'InSight sol 564 (2020-06-28) low -89.3ºC (-128.8ºF) high -4.3ºC (24.2ºF)\nwinds from the SSW at 5.0 m/s (11.3 mph) gusting to 15.4 m/s (34.4 mph)\npressure at 7.70 hPa',
 'marsStats': [{'Attribute': 'Equatorial Diameter:', 'Value': '6,792 km'},
  {'Attribute': 'Polar Diameter:', 'Value': '6,752 km'},
  {'Attribute': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
  {'Attribute': 'Moons:', 'Value': '

# Function: Publish Dictionary to Mongo Database

In [14]:
def publishToMongo (dict):
    
    #Create Connection
    conn = 'mongodb://localhost:27017'
    client = pymongo.MongoClient(conn)
    
    #Connect to Databse
    db = client.mars_news
    
    #Insert Document into Collection
    db.mars.insert_one(dict)
    
    #Disconnect from Database
    #conn.close()

In [15]:
publishToMongo(full_scrape)

# Function: Validate Records in Mongo Database

In [16]:
def outputFieldsMongo():

    #Create Connection
    conn = 'mongodb://localhost:27017'
    client = pymongo.MongoClient(conn)
    
    #Connect to Databse
    db = client.mars_news
    
    #Insert Document into Collection
    records = db.mars.find()
    
    #Disconnect from Database
    #conn.close()
    
    for record in records:
        print(record)

In [17]:
outputFieldsMongo()

{'_id': ObjectId('5ef7ee9ed816f0537a4a2c8c'), 'newsTitle': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface", 'newsLink': 'https://mars.nasa.gov/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/', 'newsText': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.', 'featureImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17832-1920x1200.jpg', 'tweetWeatherURL': 'https://www.twitter.com/MarsWxReport/status/1276921019371053056', 'tweetWeatherText': 'InSight sol 563 (2020-06-27) low -89.5ºC (-129.1ºF) high -3.9ºC (25.0ºF)\nwinds from the SW at 5.5 m/s (12.3 mph) gusting to 18.5 m/s (41.3 mph)\npressure at 7.60 hPa', 'marsStats': [{'Attribute': 'Equatorial Diameter:', 'Value': '6,792 km'}, {'Attribute': 'Polar Diameter:', 'Value': '6,752 km'}, {'Attribute': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'}, {'