In [1]:
#*******************************************************************************************
 #
 #  File Name:  MarsNewsPy.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, MarsNewsPy.ipynb, uses the Python modules,
 #      Splinter and Beautiful Soup to web scrape titles and previews from news articles 
 #      in a Mars website.
 #
 #      The following command in Mac Terminal will suppress the warning/information 
 #      message upon launch of the Google Chrome browser:
 #
 #      xattr -d com.apple.quarantine /usr/local/bin/chromedriver
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  09/13/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine

import json
    
from bs4 import BeautifulSoup as soup
from splinter import Browser

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'MarsNewsPy.ipynb'

#CONSTANT_MARS_NEWS_URL \
#    = 'https://static.bc-edx.com/data/web/mars_news/index.html'

CONSTANT_MARS_NEWS_URL \
    = 'https://mars.nasa.gov/news/'

CONSTANT_MARS_NEWS_JSON_FILE \
    = './Resources/MarsNewsData.json'


log_subroutine \
    .SetLogMode \
        (False)

log_subroutine \
    .SetDebugMode \
        (False)

log_subroutine \
    .SetImageMode \
        (False)


log_subroutine \
    .BeginProgramExecution \
        ('MarsNewsPy')

# **Section 1: Visit the  [Mars News Website]('https://mars.nasa.gov/news/')**

## **1.1: Splinter Web Driver**

In [3]:
chromeBrowserSplinterWebDriver \
    = Browser \
        ('chrome')


log_function \
    .DebugReturnObjectWriteObject \
        (chromeBrowserSplinterWebDriver)

## **1.2: Automated Browsing with Splinter**

In [4]:
chromeBrowserSplinterWebDriver \
    .visit \
        (CONSTANT_MARS_NEWS_URL)

# **Section 2: Scrape the [Mars News Website]('https://mars.nasa.gov/news/')**

## **2.1: Scrape HTML to String**

In [5]:
webPageHTMLStringVariable \
    = chromeBrowserSplinterWebDriver \
        .html


log_function \
    .DebugReturnObjectWriteObject \
        (webPageHTMLStringVariable)

## **2.2: Beautiful Soup Object**

In [6]:
marsNewsBeautifulSoupObject \
    = soup \
        (webPageHTMLStringVariable, 
         'html.parser')


log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsBeautifulSoupObject)

## **2.3: Text Element Retrieval**

In [7]:
textElementsBSResultSetObject \
    = marsNewsBeautifulSoupObject \
        .find_all \
            ('div', 
              class_ \
                 = 'list_text')


log_function \
    .DebugReturnObjectWriteObject \
        (textElementsBSResultSetObject)

# **Section 3: Store the Results**

## **3.1: Text Extraction and Storage in Dictionary List**

In [8]:
# This line of code creates an empty List to store the Dictionaries 
# holding title and preview text.
marsNewsDictionaryList \
    = []


# This repetition loop moves through all the elements in the webpage's HTML.
for htmlElement in textElementsBSResultSetObject:

    # These lines of code extract the title and preview text 
    # from the HTML elements.
    titleStringVariable \
        = htmlElement \
            .find \
                ('div',
                 class_ = 'content_title') \
            .text
    
    previewStringVariable \
        = htmlElement \
            .find \
                ('div', 
                 class_ = 'article_teaser_body') \
            .text
    
    
    # This line of code stores each title and preview in the temporary 
    # Dictionary.
    marsNewsDictionary \
        = {'Title': titleStringVariable,
           'Preview': previewStringVariable}
    
    # This line of code appends the Dictionary to the List of Dictionaries.
    marsNewsDictionaryList \
        .append \
            (marsNewsDictionary)
    
    
log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsDictionaryList)

## **3.2: Display Dictionary List**

In [9]:
for index, marsNewsDictionary in enumerate(marsNewsDictionaryList):
    log_subroutine \
        .PrintAndLogWriteText \
            (f'NEWS ARTICLE #{index + 1}:\n'
             + f"TITLE: {marsNewsDictionary['Title']}\n" \
             + f"PREVIEW: {marsNewsDictionary['Preview']}\n")

NEWS ARTICLE #1:
TITLE: NASA Uses Two Worlds to Test Future Mars Helicopter Designs
PREVIEW: Engineers will go beyond the ends of the Earth to find more performance for future Mars helicopters.

NEWS ARTICLE #2:
TITLE: NASA's Mars Fleet Will Still Conduct Science While Lying Low
PREVIEW: Rovers and orbiters will continue collecting limited data during a two-week communications pause due to the position of Earth, the Sun, and the Red Planet.

NEWS ARTICLE #3:
TITLE: NASA's Curiosity Rover Clocks 4,000 Days on Mars
PREVIEW: The mission team is making sure the robotic scientist is staying strong, despite wear and tear from its 11-year journey.

NEWS ARTICLE #4:
TITLE: NASA Technologies Receive Multiple Nods in TIME Inventions of 2023
PREVIEW: MOXIE, the oxygen-generating experiment aboard NASA’s Perseverance Mars rover, is among the technologies honored by the magazine.

NEWS ARTICLE #5:
TITLE: NASA Is Locating Ice on Mars With This New Map
PREVIEW: The map could help the agency decide wh

# **Section 4: Save the Data**

## **4.1: Export Dictionary List to JSON file**

In [10]:
with open(CONSTANT_MARS_NEWS_JSON_FILE, 'w') as jsonFile:
    
    for dictionary in marsNewsDictionaryList:
        
        jsonDataString \
            = json \
                .dumps \
                    (dictionary)
        
        jsonFile \
            .write \
                (jsonDataString)
        
        
    jsonFile \
        .close()

## **4.2: Close Browser**

In [11]:
chromeBrowserSplinterWebDriver \
    .quit()

In [12]:
#log_subroutine \
#    .EndProgramExecution()