In [1]:
#*******************************************************************************************
 #
 #  File Name:  MarsNewsPy.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, MarsNewsPy.ipynb, uses the Python modules,
 #      Splinter and Beautiful Soup to web scrape titles and previews from news articles 
 #      in a Mars website.
 #
 #      The following command in Mac Terminal will suppress the warning/information 
 #      message upon launch of the Google Chrome browser:
 #
 #      xattr -d com.apple.quarantine /usr/local/bin/chromedriver
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  09/13/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine

import json
    
from bs4 import BeautifulSoup as soup
from splinter import Browser

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'MarsNewsPy.ipynb'

#CONSTANT_MARS_NEWS_URL \
#    = 'https://static.bc-edx.com/data/web/mars_news/index.html'

CONSTANT_MARS_NEWS_URL \
    = 'https://mars.nasa.gov/news/'

CONSTANT_MARS_NEWS_JSON_FILE \
    = './Resources/MarsNewsData.json'


log_subroutine \
    .SetLogMode \
        (False)

log_subroutine \
    .SetDebugMode \
        (False)

log_subroutine \
    .SetImageMode \
        (False)


log_subroutine \
    .BeginProgramExecution \
        ('MarsNewsPy')

# **Section 1: Visit the  [Mars News Website](https://static.bc-edx.com/data/web/mars_news/index.html)**

## **Splinter Web Driver**

In [3]:
chromeBrowserSplinterWebDriver \
    = Browser \
        ('chrome')


log_function \
    .DebugReturnObjectWriteObject \
        (chromeBrowserSplinterWebDriver)

## **Automated Browsing with Splinter**

In [4]:
chromeBrowserSplinterWebDriver \
    .visit \
        (CONSTANT_MARS_NEWS_URL)

# **Section 2: Scrape the [Mars News Website](https://static.bc-edx.com/data/web/mars_news/index.html)**

## **Scrape HTML to String**

In [5]:
webPageHTMLStringVariable \
    = chromeBrowserSplinterWebDriver \
        .html


log_function \
    .DebugReturnObjectWriteObject \
        (webPageHTMLStringVariable)

## **Beautiful Soup Object**

In [6]:
marsNewsBeautifulSoupObject \
    = soup \
        (webPageHTMLStringVariable, 
         'html.parser')


log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsBeautifulSoupObject)

## **Text Element Retrieval**

In [7]:
textElementsBSResultSetObject \
    = marsNewsBeautifulSoupObject \
        .find_all \
            ('div', 
              class_ \
                 = 'list_text')


log_function \
    .DebugReturnObjectWriteObject \
        (textElementsBSResultSetObject)

# **Section 2: Store the Results**

## **Text Extraction and Storage in Dictionary List**

In [8]:
# This line of code creates an empty List to store the Dictionaries 
# holding title and preview text.
marsNewsDictionaryList \
    = []


# This repetition loop moves through all the elements in the webpage's HTML.
for htmlElement in textElementsBSResultSetObject:

    # These lines of code extract the title and preview text 
    # from the HTML elements.
    titleStringVariable \
        = htmlElement \
            .find \
                ('div',
                 class_ = 'content_title') \
            .text
    
    previewStringVariable \
        = htmlElement \
            .find \
                ('div', 
                 class_ = 'article_teaser_body') \
            .text
    
    
    # This line of code stores each title and preview in the temporary 
    # Dictionary.
    marsNewsDictionary \
        = {'Title': titleStringVariable,
           'Preview': previewStringVariable}
    
    # This line of code appends the Dictionary to the List of Dictionaries.
    marsNewsDictionaryList \
        .append \
            (marsNewsDictionary)
    
    
log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsDictionaryList)

## **Display Dictionary List**

In [9]:
for index, marsNewsDictionary in enumerate(marsNewsDictionaryList):
    log_subroutine \
        .PrintAndLogWriteText \
            (f'NEWS ARTICLE #{index + 1}:\n'
             + f"TITLE: {marsNewsDictionary['Title']}\n" \
             + f"PREVIEW: {marsNewsDictionary['Preview']}\n")

NEWS ARTICLE #1:
TITLE: NASA's Perseverance Captures Dust-Filled Martian Whirlwind 
PREVIEW: The six-wheeled geologist spotted the twister as part of an atmospheric exploration of Jezero Crater.

NEWS ARTICLE #2:
TITLE: Historic Wind Tunnel Facility Testing NASA's Mars Ascent Vehicle Rocket
PREVIEW: The same facility that provided valuable testing for NASA missions to low-Earth orbit and the Moon is now helping the agency prepare to launch the first rocket from Mars.


NEWS ARTICLE #3:
TITLE: NASA Releases Independent Review's Mars Sample Return Report
PREVIEW: The agency established the board in May 2023 to evaluate the technical, cost, and schedule plans prior to confirmation of the mission’s design.

NEWS ARTICLE #4:
TITLE: Autonomous Systems Help NASA's Perseverance Do More Science on Mars
PREVIEW: A computer pilot helps NASA’s six-wheeled geologist as it searches for rock samples that could be brought to Earth for deeper investigation.

NEWS ARTICLE #5:
TITLE: NASA's Curiosity Re

# **Section 3: Save the Data**

## **Export Dictionary List to JSON file**

In [10]:
with open(CONSTANT_MARS_NEWS_JSON_FILE, 'w') as jsonFile:
    
    for dictionary in marsNewsDictionaryList:
        
        jsonDataString \
            = json \
                .dumps \
                    (dictionary)
        
        jsonFile \
            .write \
                (jsonDataString)
        
        
    jsonFile \
        .close()

## **Close Browser**

In [11]:
chromeBrowserSplinterWebDriver \
    .quit()

In [12]:
#log_subroutine \
#    .EndProgramExecution()