In [1]:
#*******************************************************************************************
 #
 #  File Name:  MarsNewsPy.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, MarsNewsPy.ipynb, uses Python, Splinter, 
 #      and Beautiful Soup to scrape titles and previews from news articles about 
 #      Mars in a single website.
 #
 #      The following command in Mac Terminal will suppress the warning/information 
 #      message upon launch of the Google Chrome browser:
 #
 #      xattr -d com.apple.quarantine /usr/local/bin/chromedriver
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  09/13/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine

import json
    
from bs4 import BeautifulSoup as soup
from splinter import Browser

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'MarsNewsPy.ipynb'

CONSTANT_MARS_NEWS_URL \
    = 'https://static.bc-edx.com/data/web/mars_news/index.html'

CONSTANT_MARS_NEWS_JSON_FILE \
    = './Resources/MarsNewsData.json'


log_subroutine \
    .SetLogMode \
        (False)

log_subroutine \
    .SetDebugMode \
        (False)

log_subroutine \
    .SetImageMode \
        (False)


log_subroutine \
    .BeginProgramExecution \
        ('MarsNewsPy')

# **Section 1: Visit the  [Mars News Website](https://static.bc-edx.com/data/web/mars_news/index.html)**

## **Splinter Web Driver**

In [3]:
chromeBrowserSplinterWebDriver \
    = Browser \
        ('chrome')


log_function \
    .DebugReturnObjectWriteObject \
        (chromeBrowserSplinterWebDriver)

## **Automated Browsing with Splinter for the [Mars News Website](https://static.bc-edx.com/data/web/mars_news/index.html)**

In [4]:
chromeBrowserSplinterWebDriver \
    .visit \
        (CONSTANT_MARS_NEWS_URL)

# **Section 2: Scrape the [Mars News Website](https://static.bc-edx.com/data/web/mars_news/index.html)**

## **Scrape HTML to String**

In [5]:
webPageHTMLStringVariable \
    = chromeBrowserSplinterWebDriver \
        .html


log_function \
    .DebugReturnObjectWriteObject \
        (webPageHTMLStringVariable)

## **Beautiful Soup Object**

In [6]:
marsNewsBeautifulSoupObject \
    = soup \
        (webPageHTMLStringVariable, 
         'html.parser')


log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsBeautifulSoupObject)

## **Text Element Retrieval**

In [7]:
textElementsBSResultSetObject \
    = marsNewsBeautifulSoupObject \
        .find_all \
            ('div', 
              class_ \
                 = 'list_text')


log_function \
    .DebugReturnObjectWriteObject \
        (textElementsBSResultSetObject)

# **Section 2: Store the Results**

## **Text Extraction and Storage in Dictionary List**

In [8]:
# This line of code creates an empty list to store the dictionaries holding title 
# and preview text.
marsNewsDictionaryList \
    = []


# This repetition loop moves through all the elements in the HTML.
for htmlElement in textElementsBSResultSetObject:

    # These lines of code extract the title and preview text from the HTML elements.
    titleStringVariable \
        = htmlElement \
            .find \
                ('div',
                 class_ \
                     = 'content_title') \
            .text
    
    previewStringVariable \
        = htmlElement \
            .find \
                ('div', 
                 class_ \
                     = 'article_teaser_body') \
            .text
    
    
    # This line of code stores each title and preview in the temporary dictionary.
    marsNewsDictionary \
        = {'Title': 
               titleStringVariable,
           'Preview':
               previewStringVariable}
    
    # This line of code appends the dictionary to the list of dictionaries.
    marsNewsDictionaryList \
        .append \
            (marsNewsDictionary)
    
    
log_function \
    .DebugReturnObjectWriteObject \
        (marsNewsDictionaryList)

## **Display Dictionary List**

In [9]:
for index, marsNewsDictionary in enumerate(marsNewsDictionaryList):
    log_subroutine \
        .PrintAndLogWriteText \
            (f'NEWS ARTICLE #{index + 1}:\n'
             + f"TITLE: {marsNewsDictionary['Title']}\n" \
             + f"PREVIEW: {marsNewsDictionary['Preview']}\n")

NEWS ARTICLE #1:
TITLE: NASA's MAVEN Observes Martian Light Show Caused by Major Solar Storm
PREVIEW: For the first time in its eight years orbiting Mars, NASA’s MAVEN mission witnessed two different types of ultraviolet aurorae simultaneously, the result of solar storms that began on Aug. 27.

NEWS ARTICLE #2:
TITLE: NASA Prepares to Say 'Farewell' to InSight Spacecraft
PREVIEW: A closer look at what goes into wrapping up the mission as the spacecraft’s power supply continues to dwindle.

NEWS ARTICLE #3:
TITLE: NASA and ESA Agree on Next Steps to Return Mars Samples to Earth
PREVIEW: The agency’s Perseverance rover will establish the first sample depot on Mars.

NEWS ARTICLE #4:
TITLE: NASA's InSight Lander Detects Stunning Meteoroid Impact on Mars
PREVIEW: The agency’s lander felt the ground shake during the impact while cameras aboard the Mars Reconnaissance Orbiter spotted the yawning new crater from space.

NEWS ARTICLE #5:
TITLE: NASA To Host Briefing on InSight, Mars Reconnaiss

# **Section 3: Save the Data**

## **Export Dictionary List to JSON file**

In [10]:
with open(CONSTANT_MARS_NEWS_JSON_FILE, 'w') as jsonFile:
    
    for dictionary in marsNewsDictionaryList:
        
        jsonDataString \
            = json \
                .dumps \
                    (dictionary)
        
        jsonFile \
            .write \
                (jsonDataString)
        
        
    jsonFile \
        .close()

## **Close Browser**

In [11]:
chromeBrowserSplinterWebDriver \
    .quit()

In [12]:
#log_subroutine \
#    .EndProgramExecution()