# Import Dependencies

In [12]:
from bs4 import BeautifulSoup
import pandas as pd
from splinter import Browser
from datetime import datetime
import json
import time
import pymongo

# MARS NEWS

# Web Scrape: Using Splinter 

In [2]:
#browser.quit()

In [3]:
def scrapeMarsInfo():
    #Using Splinter to Web Scrape
    executable_path = {'executable_path': r"/home/bdr/Desktop/chromedriver"}
    browser = Browser('chrome', **executable_path, headless=False)
    
    #*** URL 1: NASA Mars News ****
    url = 'https://mars.nasa.gov/news/'
    browser.visit(url)
    time.sleep(4)

    #Parse HTML
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
    titles = soup.find_all(class_="content_title")
    texts = soup.find_all(class_="article_teaser_body")
    
    # Filter for First Title with a Link
    firstTitle = "" 
    for title in titles:
        if title.a:
            firstTitle = title
            break
    
    # Assign Target Elements to Variables
    newsTitle = firstTitle.a.text.strip()
    newsLink = "https://mars.nasa.gov" + firstTitle.a['href']
    newsText = texts[0].text.strip()
    
    #*** URL 2: JPL Mars Space Images ****
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)
    time.sleep(4)
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
    
    #Capture Image
    images = soup.find_all(class_="carousel_item")
    imageURL = 'https://www.jpl.nasa.gov' + images[0]["style"].split(" ")[1].split("'")[1]
    
    #*** URL 3: Mars Weather ****
    url = 'https://twitter.com/marswxreport?lang=en'
    browser.visit(url)
    time.sleep(4)
    html = browser.html
    soup = BeautifulSoup(html, "lxml")
    
    #Capture Weather
    allTweets_Maybe = soup.find_all("span")
    tweetText = ""
    for tweet in allTweets_Maybe:
        if tweet.text:
            if "InSight sol" in tweet.text:
                tweetText = tweet.text
                break
                
    allLinks_Maybe = soup.find_all("a")
    tweetLink = ""
    for link in allLinks_Maybe:
        if link['href']:
            if "status" in link["href"]:
                tweetLink = "https://www.twitter.com" + link["href"]
                break
    
    #*** URL 4: Mars Facts ****
    url = 'https://space-facts.com/mars/'
    browser.visit(url)
    time.sleep(4)
    html = browser.html
    dfs = pd.read_html(html)
    stats = dfs[0]
    stats.columns = ["Attribute", "Value"]
    
    #format and save
    #data_html = stats.to_html(index=False)
    data_stats = json.loads(stats.to_json(orient="records"))

    # GET NEW INFORMATION HERE    
    
    # Close Browser
    browser.quit()
    
    # Create Dictionary with Target Information
    rtnDict = {
        "newsTitle": newsTitle,
        "newsLink": newsLink,
        "newsText": newsText,
        "featureImageURL": imageURL,
        "tweetWeatherURL": tweetLink,
        "tweetWeatherText": tweetText,
        #"marsStatsHTML": data_html,
        "marsStats": data_stats,
        "dateScraped": datetime.now()
    }
    
    return rtnDict
    

In [4]:
marsNews = scrapeMarsInfo()

In [5]:
marsNews

{'newsTitle': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface",
 'newsLink': 'https://mars.nasa.gov/news/8699/how-nasas-mars-helicopter-will-reach-the-red-planets-surface/',
 'newsText': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.',
 'featureImageURL': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA17832-1920x1200.jpg',
 'tweetWeatherURL': 'https://www.twitter.com/MarsWxReport/status/1276921019371053056',
 'tweetWeatherText': 'InSight sol 563 (2020-06-27) low -89.5ºC (-129.1ºF) high -3.9ºC (25.0ºF)\nwinds from the SW at 5.5 m/s (12.3 mph) gusting to 18.5 m/s (41.3 mph)\npressure at 7.60 hPa',
 'marsStats': [{'Attribute': 'Equatorial Diameter:', 'Value': '6,792 km'},
  {'Attribute': 'Polar Diameter:', 'Value': '6,752 km'},
  {'Attribute': 'Mass:', 'Value': '6.39 × 10^23 kg (0.11 Earths)'},
  {'Attribute': 'Moons:', 'Value': '2

# Function 1: Web Scrape Mars News Portal

# Publish JSON to Mongo Database

### Create connection variable

In [13]:
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)
db = client.mars_news

### Connect to a database. Will create one if not already available.

### Inserts Document into Database

In [17]:
type(marsNews)

dict

In [19]:
db.mars.insert_one(marsNews)

<pymongo.results.InsertOneResult at 0x7fb9a07190f0>