In [1]:
#*******************************************************************************************
 #
 #  File Name:  PyAviationAccidentsExtract.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, PyAviationAccidentsExtract.ipynb, uses the
 #      the Python modules, Splinter and Beautiful Soup to web scrape aviation accident 
 #      information from the Aviation Safety Network website.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  10/05/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

import PyFunctions as function
import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine
import PySubRoutines as subroutine

import PyAviationAccidentsConstants as local_constant
import PyAviationAccidentsFunctions as local_function

import pandas as pd

import os
import re

from bs4 import BeautifulSoup
from splinter import Browser

from datetime import datetime as dt

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'PyAviationAccidentsExtract.ipynb'

In [3]:
log_subroutine \
    .SetLogMode \
        (False)

log_subroutine \
    .SetDebugMode \
        (False)

log_subroutine \
    .SetImageMode \
        (False)


log_subroutine \
    .BeginProgramExecution \
        ('PyAviationAccidentsExtract')

## **1.1: Visit and Scrape the [Aviation Safety Network](https://aviation-safety.net)**

In [4]:
nowDatetimeObject = dt.now()

log_subroutine \
    .PrintAndLogWriteText \
        ('\033[1m' \
         + f'WEBPAGE SCRAPING PROCESS BEGINS... ' \
         + f"{nowDatetimeObject.strftime('%H:%M:%S')}" \
         + '\n\n'
         + '\033[0m')

aviationAccidentDictionary \
    = {'accident_id': None,
       'status': None,
       'acc_date': None,
       'acc_time': None,
       'acc_datetime': None,
       'type': None,
       'operator': None,
       'registration': None,
       'msn': None,
       'first_flight': None,
       'crew_fatalities': None,
       'crew_occupants': None,
       'passenger_fatalities': None,
       'passenger_occupants': None,
       'total_fatalities': None,
       'total_occupants': None,
       'aircraft_damage': None,
       'phase': None,
       'nature': None,
       'depart_airport': None,
       'destination_airport': None,
       'flight_number': None,
       'location': None,
       'classifications': None}

aviationAccidentDocumentDictionary \
    = {'accident_id': None, 
       'narrative': None}

aviationAccidentDictionaryList \
    = []

aviationAccidentNarrativesDictionaryList \
    = []

# This is the Splinter web driver for the Google Chrome browser.
chromeBrowserSplinterWebDriver \
    = Browser \
        ('chrome')

log_function \
    .DebugReturnObjectWriteObject \
        (chromeBrowserSplinterWebDriver)

accidentIDInteger \
    = 100

yearCounterInteger \
    = 0

decadeCounterInteger \
    = 0


# This repetition loop runs from the first year to the last year.
for yearIndexInteger in range (local_constant.BEGIN_YEAR, 
                               local_constant.END_YEAR + 1):
    
    pageIndexInteger = 1
    
    yearCounterInteger += 1
    
    
    # This repetition loop runs through all the pages of entries 
    # for each year and has an upper limit of 100 pages.
    while (pageIndexInteger <= 100):
        
        # This String is the URL for the primary webpage of accident 
        # entries.
        primaryDatabaseURLString \
            = local_constant.AVIATION_SAFETY_NET_MAIN_URL \
              + local_constant.AVIATION_SAFETY_NET_DATABASE_PRIMARY_URL \
              + str(yearIndexInteger) \
              + '/' \
              + str(pageIndexInteger)
        
        log_subroutine \
            .PrintAndLogWriteText \
                ('\033[1m' \
                 + f'{primaryDatabaseURLString}\n' \
                 + '\033[0m')
        
        
        # This line of code uses Splinter to visit the current primary 
        # webpage.
        chromeBrowserSplinterWebDriver \
            .visit \
                (primaryDatabaseURLString)
        
        
        # This line of code extracts the HTML from the current primary 
        # webpage and stores it in a String.
        primaryWebPageHTMLStringVariable \
            = chromeBrowserSplinterWebDriver \
                .html

        log_function \
            .DebugReturnObjectWriteObject \
                (primaryWebPageHTMLStringVariable)
        
        
        # This line of code converts the HTML string to a Beautiful Soup 
        # Object for parsing.
        primaryAviationSafetyBeautifulSoupObject \
            = BeautifulSoup \
                (primaryWebPageHTMLStringVariable, 
                 'html.parser')

        
        # This line of code looks for the one table with class, hp.
        primaryAviationAccidentsTableBSElementObject \
            = primaryAviationSafetyBeautifulSoupObject \
                .find \
                    ('table', 
                     class_ = 'hp')

        
        # If the script cannot find a table, then it exits the loop.
        if primaryAviationAccidentsTableBSElementObject == None:
            
            break
        
        
        # This line of code finds all the entries for aviation accidents 
        # in the table.
        tableDataBSResultSetObject \
            = primaryAviationAccidentsTableBSElementObject \
                .find_all \
                    ('tr', 
                     class_ = 'list')
        
        
        # This repetition loop moves through all the aviation accident 
        # entries in the table.
        for rowBSElement in tableDataBSResultSetObject:
           
            # This line of code extracts all the elements in a table row.
            rowBSResultSetObject \
                = rowBSElement \
                    .find_all \
                        ('td')
        
            # These lines of code extract the date of an accident and 
            # converts them to date objects before adding them to a 
            # List.
            accidentDateObject \
                = dt.strptime \
                        (rowBSResultSetObject[0].text, '%d %b %Y') \
                    .date()
            
            
            # This line of code ensures that the entry occurred in the 
            # specified date range.
            if int(accidentDateObject.strftime('%Y')) \
                    < int(local_constant.BEGIN_YEAR) \
                or int(accidentDateObject.strftime('%Y')) \
                        > int(local_constant.END_YEAR):
                
                continue
                
            
            # These lines of code extract the link suffix to the details 
            # webpage for the table entry and creates the URL.
            tempBeautifulSoupObject \
                = BeautifulSoup \
                    (str(rowBSElement))
            
            for element in tempBeautifulSoupObject.find_all ('a', href = True):
                        
                secondaryDatabaseURLSuffixString \
                    = element['href']
            
            secondaryDatabaseURLString \
                = local_constant.AVIATION_SAFETY_NET_MAIN_URL \
                  + secondaryDatabaseURLSuffixString
        
        
            # This line of code uses Splinter to visit the secondary webpage.
            chromeBrowserSplinterWebDriver \
                .visit \
                    (secondaryDatabaseURLString)
            
            
            # This line of code extracts the HTML from the secondary webpage 
            # and stores it in a String variable.
            secondaryWebPageHTMLStringVariable \
                = chromeBrowserSplinterWebDriver \
                    .html
            
            
            # This line of code converts the HTML string to a Beautiful Soup 
            # Object for parsing.
            secondaryAviationSafetyBeautifulSoupObject \
                = BeautifulSoup \
                    (secondaryWebPageHTMLStringVariable, 
                     'html.parser')
            
            # This line of code looks for the first table with class, hp.
            secondaryAviationAccidentsTableBSElementObject \
                = secondaryAviationSafetyBeautifulSoupObject \
                    .find \
                        ('tbody')
            
            
            textElementsStringList \
                = []
            
            # This repetition loop creates a List of all the HTML elements 
            # in a table row.
            for element in secondaryAviationAccidentsTableBSElementObject:
                
                if element.text != '\n':
                    
                    textElementsStringList \
                        .append \
                            (element.text)

            
            # This line of code assigns the unique accident ID to the
            # dataFrame row Dictionary.
            aviationAccidentDictionary \
                ['accident_id'] \
                    = 'ACC' + str(accidentIDInteger)
            
            # This function call returns the status from the webpage 
            # and assigns the value to a Dictionary element.            
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Status:',
                             True))
            
            aviationAccidentDictionary \
                ['status'] \
                    = tempString.strip()
            
            # This line of code assigns the Date object
            # to a Dictionary element.
            aviationAccidentDictionary \
                ['acc_date'] \
                    = accidentDateObject
            
            # This function call returns the date from the webpage and 
            # assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Time:',
                             False))
                
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString:
            
                aviationAccidentDictionary \
                    ['acc_time'] \
                        = tempString.strip()
                
            else:
                
                aviationAccidentDictionary \
                    ['acc_time'] \
                        = '00:00'
                
            # This function call creates a datetime object from the 
            # scraped information and assigns the value to a Dictionary 
            # element.
            aviationAccidentDictionary \
                ['acc_datetime'] \
                    = local_function \
                        .ReturnDateTimeFromString \
                            (accidentDateObject,
                             aviationAccidentDictionary['acc_time'])

            # This function call returns the aircraft type from the 
            # webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Type:',
                             True))
            
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString:
                
                aviationAccidentDictionary \
                    ['type'] \
                        = tempString.strip()
                
            else:
                
                continue
            
            # This function call returns the aircraft operator
            # from the webpage and assigns the value to a 
            # Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Operator:',
                             True))
                
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString:
                
                aviationAccidentDictionary \
                    ['operator'] \
                        = tempString.strip()
                
            else:
                
                continue
            
            # This function call returns the registration from the 
            # webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Registration:'))
            
            aviationAccidentDictionary \
                ['registration'] \
                    = tempString.strip()
     
            # This function call returns the manufacturer's serial 
            # number from the webpage and assigns the value to a 
            # Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'MSN:'))

            aviationAccidentDictionary \
                ['msn'] \
                    = tempString.strip()
            
            # This function call returns the aircraft's first flight 
            # year or date from the webpage and assigns the value to 
            # a Dictionary element.
            tempString \
                   = str \
                        (local_function \
                            .ReturnHtmlElementTextFromList \
                                (textElementsStringList,
                                 'First flight:'))
            
            tempString \
                = tempString.strip()
            
            if len(tempString) > 4:
                
                aviationAccidentDictionary \
                    ['first_flight'] \
                        = tempString[0:4] 
            
            else:
            
                aviationAccidentDictionary \
                    ['first_flight'] \
                        = tempString
                         
            # These lines find the crew occupant and fatality numbers 
            # from the webpage and assigns the values to Dictionary 
            # elements.           
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Crew:'))

            resultsIntegerList \
                = local_function \
                    .ReturnFatalitiesAndOccupantsFromString \
                        (tempString)

            aviationAccidentDictionary \
                ['crew_fatalities'] \
                    = resultsIntegerList[0]
                
            aviationAccidentDictionary \
                ['crew_occupants'] \
                    = resultsIntegerList[1]

            # These lines find the passenger occupant and fatality 
            # numbers from the webpage and assigns the values to 
            # Dictionary elements.           
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Passengers:'))
          
            resultsIntegerList \
                = local_function \
                    .ReturnFatalitiesAndOccupantsFromString \
                        (tempString)
            
            aviationAccidentDictionary \
                ['passenger_fatalities'] \
                    = resultsIntegerList[0]
                
            aviationAccidentDictionary \
                ['passenger_occupants'] \
                    = resultsIntegerList[1]
            
            # These lines find the total occupant and fatality 
            # numbers from the webpage and assigns the values to 
            # Dictionary elements.           
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Total:'))

            resultsIntegerList \
                = local_function \
                    .ReturnFatalitiesAndOccupantsFromString \
                        (tempString)
            
            aviationAccidentDictionary \
                ['total_fatalities'] \
                    = resultsIntegerList[0]
                
            aviationAccidentDictionary \
                ['total_occupants'] \
                    = resultsIntegerList[1]
                         
            if aviationAccidentDictionary['crew_occupants'] == 0 \
                and aviationAccidentDictionary['passenger_occupants'] == 0 \
                and aviationAccidentDictionary['total_occupants'] == 0:
                         
                continue

            # This function call returns the aircraft damage from 
            # the webpage and assigns the value to a Dictionary 
            # element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Aircraft damage:'))
            
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString \
                and '-' not in tempString \
                and '?' not in tempString:
                         
                aviationAccidentDictionary \
                    ['aircraft_damage'] \
                        = tempString.strip()
                         
            else:
                
                aviationAccidentDictionary \
                    ['aircraft_damage'] \
                        = 'Unknown'
                         
            # This function call returns the flight phase from the 
            # webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Phase:'))
                         
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString \
                and '-' not in tempString \
                and '?' not in tempString:
                         
                aviationAccidentDictionary \
                    ['phase'] \
                        = tempString.strip()
                         
            else:
                
                aviationAccidentDictionary \
                    ['phase'] \
                        = 'Unknown (UNK)'
                         
            # This function call returns the nature of the flight from 
            # the webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Nature:'))
            
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString \
                and '-' not in tempString \
                and '?' not in tempString:
                
                aviationAccidentDictionary \
                    ['nature'] \
                        = tempString.strip()
            
            else:
                         
                aviationAccidentDictionary \
                    ['nature'] \
                        = 'Unknown'
                
            # This function call returns the departure airport from 
            # the webpage and assigns the value to a Dictionary 
            # element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Departure airport:'))
                
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString \
                and '-' not in tempString \
                and '?' not in tempString:
            
                aviationAccidentDictionary \
                    ['depart_airport'] \
                         = tempString.strip()
                         
            else:
            
                continue
                
            # This function call returns the destination airport from
            # the webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Destination airport:'))
                
            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString \
                and '-' not in tempString \
                and '?' not in tempString:
                         
                aviationAccidentDictionary \
                    ['destination_airport'] \
                        = tempString.strip()
                         
            else:
                
                continue
            
            # This function call returns the flight number from the 
            # webpage and assigns the value to a Dictionary element.
            tempString \
                = str \
                    (local_function \
                     .ReturnHtmlElementTextFromList \
                        (textElementsStringList,
                         'Flightnumber:'))
                        
            aviationAccidentDictionary \
                ['flight_number'] \
                    = tempString.strip()
                         
            # These lines of code return the accident location from
            # the webpage and assign the value to a Dictionary element.
            tempString \
                = local_function \
                        .ReturnHtmlElementTextFromList \
                            (textElementsStringList,
                             'Location:')
            
            tempString \
                = str \
                    (local_function \
                        .ReturnFormattedAccidentLocationString \
                            (tempString))
                         

            if 'unknown' not in tempString \
                and 'Unknown' not in tempString \
                and 'none' not in tempString \
                and 'None' not in tempString:
                         
                aviationAccidentDictionary \
                    ['location'] \
                        = tempString.strip()
                         
            else:
                
                continue
            
            
            # These lines of code return the accident classifications
            # from the webpage and assign the values to a Dictionary 
            # element.
            classificationsHtmlList \
                = secondaryAviationSafetyBeautifulSoupObject \
                    .findAll \
                        ('a', href = re.compile('/database/event/'))

            aviationAccidentDictionary \
                ['classifications'] \
                    = local_function \
                        .ReturnClassificationsListFromHTML \
                            (classificationsHtmlList)
            
            if len(aviationAccidentDictionary['classifications']) == 0:
                
                aviationAccidentDictionary \
                    ['classifications'] \
                        = 'None'
                
            elif len(aviationAccidentDictionary['classifications']) == 1:
            
                tempString \
                    = aviationAccidentDictionary \
                        ['classifications'][0]
                         
                         
                aviationAccidentDictionary \
                    ['classifications'] \
                        = tempString.strip()

            # These lines of code append the completed accident Dictionary 
            # to a List.
            tempDictionary \
                = aviationAccidentDictionary \
                    .copy()
            
            aviationAccidentDictionaryList \
                .append \
                    (tempDictionary)
            
            
            # This line of code assigns the unique accident ID to the
            # document Dictionary.
            aviationAccidentDocumentDictionary \
                ['accident_id'] \
                    = 'ACC' + str(accidentIDInteger)
            
            # These lines of code return the accident narrative from
            # the webpage and assign the value to a document Dictionary 
            # element.
            aviationAccidentDocumentDictionary \
                ['narrative'] \
                    = str \
                        (secondaryAviationSafetyBeautifulSoupObject \
                            .find \
                                ('span', 
                                 lang = 'en-US') \
                            .text)
            
            # These lines of code append the completed document Dictionary 
            # to a List.
            tempDictionary \
                = aviationAccidentDocumentDictionary \
                    .copy()
            
            aviationAccidentNarrativesDictionaryList \
                .append \
                    (tempDictionary)          
            
            # This line of code increments the unique accident identifier for
            # the next entry
            accidentIDInteger += 1
            
        # This line of code increments the webpage number for a single year.
        pageIndexInteger += 1
        
    
    # Every 10 years, the script writes the Lists to intermediate files and
    # resets the data structures.
    if yearCounterInteger == 10:
        
        decadeCounterInteger \
            += 1
        
        aviationAccidentsDataFrame \
            = pd.DataFrame \
                .from_dict \
                    (aviationAccidentDictionaryList)
        
        aviationAccidentsNarrativesDataFrame \
            = pd.DataFrame \
                .from_dict \
                    (aviationAccidentNarrativesDictionaryList)
        
        
        aviationAccidentsDataFrame \
            .to_csv \
                (local_constant.AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE \
                    + str(decadeCounterInteger))
        
        aviationAccidentsNarrativesDataFrame \
            .to_csv \
                (local_constant.AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE \
                    + str(decadeCounterInteger))
        
        
        aviationAccidentDictionaryList \
            = []
        
        aviationAccidentNarrativesDictionaryList \
            = []
        
        
        aviationAccidentsDataFrame \
            = None
        
        aviationAccidentsNarrativesDataFrame \
            = None
        
        
        yearCounterInteger \
            = 0

        
nowDatetimeObject \
    = dt.now()


log_subroutine \
    .PrintAndLogWriteText \
        ('\033[1m' \
         + f'WEBPAGE SCRAPING PROCESS COMPLETE... ' \
         + f"{nowDatetimeObject.strftime('%H:%M:%S')}" \
         + '\n\n'
         + '\033[0m')

[1mWEBPAGE SCRAPING PROCESS BEGINS... 14:49:19

[0m
[1mhttps://aviation-safety.net/database/year/1970/1
[0m
[1mhttps://aviation-safety.net/database/year/1970/2
[0m
[1mhttps://aviation-safety.net/database/year/1970/3
[0m
[1mhttps://aviation-safety.net/database/year/1970/4
[0m
[1mhttps://aviation-safety.net/database/year/1970/5
[0m
[1mhttps://aviation-safety.net/database/year/1971/1
[0m
[1mhttps://aviation-safety.net/database/year/1971/2
[0m
[1mhttps://aviation-safety.net/database/year/1971/3
[0m
[1mhttps://aviation-safety.net/database/year/1971/4
[0m
[1mhttps://aviation-safety.net/database/year/1972/1
[0m
[1mhttps://aviation-safety.net/database/year/1972/2
[0m
[1mhttps://aviation-safety.net/database/year/1972/3
[0m
[1mhttps://aviation-safety.net/database/year/1972/4
[0m
[1mhttps://aviation-safety.net/database/year/1972/5
[0m
[1mhttps://aviation-safety.net/database/year/1973/1
[0m
[1mhttps://aviation-safety.net/database/year/1973/2
[0m
[1mhttps://aviati

[1mhttps://aviation-safety.net/database/year/2004/4
[0m
[1mhttps://aviation-safety.net/database/year/2005/1
[0m
[1mhttps://aviation-safety.net/database/year/2005/2
[0m
[1mhttps://aviation-safety.net/database/year/2005/3
[0m
[1mhttps://aviation-safety.net/database/year/2005/4
[0m
[1mhttps://aviation-safety.net/database/year/2006/1
[0m
[1mhttps://aviation-safety.net/database/year/2006/2
[0m
[1mhttps://aviation-safety.net/database/year/2006/3
[0m
[1mhttps://aviation-safety.net/database/year/2006/4
[0m
[1mhttps://aviation-safety.net/database/year/2007/1
[0m
[1mhttps://aviation-safety.net/database/year/2007/2
[0m
[1mhttps://aviation-safety.net/database/year/2007/3
[0m
[1mhttps://aviation-safety.net/database/year/2007/4
[0m
[1mhttps://aviation-safety.net/database/year/2008/1
[0m
[1mhttps://aviation-safety.net/database/year/2008/2
[0m
[1mhttps://aviation-safety.net/database/year/2008/3
[0m
[1mhttps://aviation-safety.net/database/year/2008/4
[0m
[1mhttps://av

## **1.2: Merge CSV Files into Single DataFrames**

In [5]:
if yearCounterInteger != 0:
    
    aviationAccidentsDataFrame \
        = pd.DataFrame \
            .from_dict \
                (aviationAccidentDictionaryList)
    
    aviationAccidentsNarrativesDataFrame \
        = pd.DataFrame \
            .from_dict \
                (aviationAccidentNarrativesDictionaryList)
    
    
    firstIndexInteger = 1
    
else:
    
    aviationAccidentsDataFrame \
        = function. \
            ReturnCSVFileAsDataFrame \
                (local_constant \
                    .AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE \
                        + '1')
    
    aviationAccidentsNarrativesDataFrame \
        = function. \
            ReturnCSVFileAsDataFrame \
                (local_constant \
                    .AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE \
                        + '1')
    
    
    aviationAccidentsDataFrame \
        = aviationAccidentsDataFrame \
            .drop \
                (aviationAccidentsDataFrame.columns[0], 
                 axis = 1)
    
    aviationAccidentsNarrativesDataFrame \
        = aviationAccidentsNarrativesDataFrame \
            .drop \
                (aviationAccidentsNarrativesDataFrame.columns[0], 
                 axis = 1)


    os.remove \
        (local_constant \
            .AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE \
                + '1')

    os.remove \
        (local_constant \
            .AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE \
                + '1')
    
    
    firstIndexInteger = 2
    
    
for indexInteger in range (firstIndexInteger, decadeCounterInteger+1):
    
    tempDataFrame \
        = function. \
            ReturnCSVFileAsDataFrame \
                (local_constant \
                    .AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE \
                        + str(indexInteger))
    
    tempDataFrame \
        = tempDataFrame \
            .drop \
                (tempDataFrame.columns[0], 
                 axis = 1)
    
    aviationAccidentsDataFrame \
        = pd.concat \
            ([aviationAccidentsDataFrame.copy(), 
              tempDataFrame.copy()])
    
    
    tempDataFrame \
        = function. \
            ReturnCSVFileAsDataFrame \
                (local_constant \
                    .AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE \
                        + str(indexInteger))
  
    tempDataFrame \
        = tempDataFrame \
            .drop \
                (tempDataFrame.columns[0], 
                 axis = 1)
    
    aviationAccidentsNarrativesDataFrame \
        = pd.concat \
            ([aviationAccidentsNarrativesDataFrame.copy(), 
              tempDataFrame.copy()])
    
    
    os.remove \
        (local_constant \
            .AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE \
                + str(indexInteger))

    os.remove \
        (local_constant \
            .AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE \
                + str(indexInteger))
    
    
log_function \
    .DebugReturnObjectWriteObject \
        (aviationAccidentsDataFrame)

log_function \
    .DebugReturnObjectWriteObject \
        (aviationAccidentsNarrativesDataFrame)

In [6]:
log_subroutine \
    .PrintAndLogWriteText \
        ('\033[1m' \
         + 'There are {:,} complete aviation accident records ' \
            .format(len(aviationAccidentsDataFrame)) \
         + 'from {:} ' \
            .format(local_constant.BEGIN_YEAR) \
         + 'to {:}.'
            .format(local_constant.END_YEAR) \
         + '\n\n'
         + '\033[0m')

[1mThere are 2,304 complete aviation accident records from 1970 to 2022.

[0m


## **1.3: Display DataFrames**

In [7]:
captionString \
    = 'Table 1.3.1: Aviation Accident Scraped Data (1970-2022) - First'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentsDataFrame.head(5),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

accident_id,status,acc_date,acc_time,acc_datetime,type,operator,registration,msn,first_flight,crew_fatalities,crew_occupants,passenger_fatalities,passenger_occupants,total_fatalities,total_occupants,aircraft_damage,phase,nature,depart_airport,destination_airport,flight_number,location,classifications
ACC2278,,2020-01-09,00:00,2020-01-09 00:00:00,Lockheed C-130BZ Hercules,South African Air Force - SAAF,403,3750,1962,0,8,0,59,0,67,Substantial,Landing (LDG),Military,"Beni Airport (BNC/FZNP), Congo (Democratic Republic)","Goma Airport (GOM/FZNA), Congo (Democratic Republic)",,Goma Airport (GOM) (Congo (Democratic Republic)),
ACC2279,Information verified through authorities or other official sources.,2020-01-19,16:50,2020-01-19 16:50:00,Fairchild SA227-AC Metro III,Perimeter Aviation,C-GWVH,AC-714B,1988,0,2,0,12,0,14,Substantial,Landing (LDG),Domestic Scheduled Passenger,"Thompson Airport, MB (YTH/CYTH), Canada","Shamattawa Airport, MB (ZTM/CZTM), Canada",YP415,"Shamattawa Airport, MB (ZTM) (Canada)",Runway excursion (veer-off)
ACC2280,Accident investigation report completed and information captured,2020-01-20,11:09,2020-01-20 11:09:00,de Havilland Canada DHC-8-314 Dash 8,Air Inuit,C-GXAI,481,1997,0,3,0,42,0,45,Substantial,Landing (LDG),Domestic Scheduled Passenger,"Québec City Jean Lesage International Airport, QC (YQB/CYQB), Canada","Schefferville Airport, QC (YKL/CYKL), Canada",AIE820,"Schefferville Airport, QC (YKL) (Canada)","['Landing after unstabilized approach', 'Tailstrike', 'Runway mishap']"
ACC2281,Accident investigation report completed and information captured,2020-01-23,10:53,2020-01-23 10:53:00,Cessna S550 Citation S/II,South African Civil Aviation Authority,ZS-CAR,S550-0078,1986,3,3,0,0,3,3,Destroyed,En route (ENR),Survey/research,"George Airport (GRJ/FAGG), South Africa","George Airport (GRJ/FAGG), South Africa",,5 km (3.1 mls) NW of Friemersheim (South Africa),"['VFR flight in IMC', 'Loss of control']"
ACC2282,Accident investigation report completed and information captured,2020-01-23,13:15,2020-01-23 13:15:00,Lockheed EC-130Q Hercules,Coulson Aviation,N134CG,4904,1981,3,3,0,0,3,3,Destroyed,Maneuvering (MNV),Fire fighting,"Richmond RAAF Base, NSW (XRH/YSRI), Australia","Richmond RAAF Base, NSW (XRH/YSRI), Australia",,"near Cooma, NSW (Australia)",Loss of control


In [8]:
captionString \
    = 'Table 1.3.2: Aviation Accident Scraped Data (1970-2022) - Last'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentsDataFrame.tail(5),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

accident_id,status,acc_date,acc_time,acc_datetime,type,operator,registration,msn,first_flight,crew_fatalities,crew_occupants,passenger_fatalities,passenger_occupants,total_fatalities,total_occupants,aircraft_damage,phase,nature,depart_airport,destination_airport,flight_number,location,classifications
ACC2273,Information verified through authorities or other official sources.,2019-11-24,09:08,2019-11-24 09:08:00,Dornier 228-201,Busy Bee Congo,9S-GNH,8030,1984,2,2,19,20,21,22,Destroyed,Takeoff (TOF),Domestic Scheduled Passenger,"Goma Airport (GOM/FZNA), Congo (Democratic Republic)","Beni Airport (BNC/FZNP), Congo (Democratic Republic)",,1 km (0.6 mls) S of Goma Airport (GOM) (Congo (Democratic Republic)),Loss of control
ACC2274,Accident investigation report completed and information captured,2019-12-01,08:30,2019-12-01 08:30:00,Beechcraft B99 Airliner,Flamingo Air Charter,C6-FER,U-164,1975,0,2,0,12,0,14,Substantial,Landing (LDG),Non Scheduled Passenger,"Staniel Cay Airport (TYM/MYES), Bahamas","Black Point Airstrip (MYEB), Bahamas",,Black Point Airstrip (Bahamas),Undershoot/overshoot
ACC2275,Accident investigation report completed and information captured,2019-12-03,09:10,2019-12-03 09:10:00,Basler BT-67 Turbo 67 (DC-3T),North Star Air,C-FKAL,13840,1943,0,2,0,0,0,2,Substantial,Approach (APR),Cargo,"Red Lake Airport, ON (YRL/CYRL), Canada","Sachigo Lake Airport, ON (ZPB/CZPB), Canada",,"0,5 km (0.3 mls) SW of Sachigo Lake Airport, ON (ZPB) (Canada)","['VFR flight in IMC', 'Controlled Flight Into Terrain (CFIT) - Ground']"
ACC2276,Accident investigation report completed and information captured,2019-12-27,07:21,2019-12-27 07:21:00,Fokker 100,Bek Air,UP-F1007,11496,1996,1,5,11,93,12,98,Destroyed,Takeoff (TOF),Domestic Scheduled Passenger,"Almaty Airport (ALA/UAAA), Kazakhstan","Nursultan Nazarbayev International Airport (NQZ/UACC), Kazakhstan",Z92100,near Almaty Airport (ALA) (Kazakhstan),"['Icing', 'Loss of control']"
ACC2277,,2019-12-28,14:34,2019-12-28 14:34:00,Let L-410UVP-E,Air Fast Congo,9S-GDX,871816,1987,0,2,0,16,0,18,Substantial,Landing (LDG),Domestic Scheduled Passenger,"Lubumbashi International Airport (FBM/FZQA), Congo (Democratic Republic)","Kamina Airport (KMN/FZSA), Congo (Democratic Republic)",,Kamina Airport (KMN) (Congo (Democratic Republic)),Runway excursion (veer-off)


In [9]:
captionString \
    = 'Table 1.3.3: Aviation Accident Narratives (1970-2022) - First'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentsNarrativesDataFrame.head(5),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

accident_id,narrative
ACC2278,"A South African Air Force (SAAF) Lockheed C-130BZ Hercules of SAAF 28 sqn crash landed at Goma International Airport (GOM/FZNA). A fire erupted after the left hand wing, outboard of the no.1 engine, broke. The fire was quickly contained.There were no fatalities. The military transport plane was carrying 8 crew members and 59 South African troops."
ACC2279,"A Metro III operated by Perimeter Aviation as flight 415, departed Thompson (CYTH) for Shamattawa (CZTM), Canada, with 12 passengers and 2 crew members.During the landing rollout on runway 19 at approximately 60 knots, aircraft directional control was lost and the aircraft encountered a runway excursion to the right and collided with a snow berm at a groundspeed of less than 20 knots. The right hand propeller made contact with a snow berm and shattered upon impact. There were no injuries. A NOTAM was issued closing the airport until the aircraft was removed from the runway.The aircraft sustained damage to the left engine nacelle, nose gear doors, and right propeller. The reported runway condition at the time of occurrence was 100% compacted snow. Data retrieved from the aircraft's navigation system indicated the winds were 255 degrees at 18 knots on short final prior to touchdown."
ACC2280,"Air Inuit flight 820, a DHC-8-314, suffered a tail strike on landing at Schefferville Airport, Canada. The aircraft had departed Montréal-Pierre Elliott Trudeau Airport (CYUL) at 07:06 hours local time on a multi-stop flight to Salluit. The accident occurred on the second leg, from Montréal to Schefferville."
ACC2281,"A Cessna S550 Citation S/II of the South African Civil Aviation Authority crashed into the Outeniqua mountains, near the town of Friemersheim. The three occupants were killed and the aircraft was destroyed.The Citation departed Port Elizabeth Airport (FAPE) on a positioning flight to George Airport (FAGG). On approach to FAGG, the flying crew requested to carry out a calibration flight for the very high frequency omnidirectional range (VOR) beacon at FAGG. Due to inclement weather conditions at the time, they were not cleared to conduct VOR calibration. As a result, they decided to land and refuel the aircraft before commencing with the calibration of the Instrument Landing System (ILS) on runway 11 at FAGG. The flying crew requested take-off from runway 11 and an early right turn to intercept radial 250°, 17 nautical miles (nm) DME arc to radial 330° at 3000 feet (ft) climbing to 4000ft. The air traffic control (ATC) granted their request.Radar data indicated that at 10:42, the aircraft took off from runway 11 and, once airborne, made a right-hand turn to intercept radial 250° using the George VOR (GRV VOR). The aircraft climbed to 3000ft. Once the aircraft reached 17nm on the DME from the GRV VOR (DME is co-located with the VOR), it commenced with a right-hand turn to intercept radial 330° while maintaining 17nm DME arc.At 10:46, the ATC at FAGG advised the flying crew that they were now exiting controlled airspace and were advised to broadcast on the special rules frequency. The crew acknowledged the advisory to change frequency and there was no further communication. The aircraft was still being monitored by ATC using secondary surveillance radar.At 10:50, radar data showed the aircraft crossing radial 310° and entered a climb from 3000ft, reaching 3900ft. As the aircraft levelled off at 3900ft, a rapid descent occurred, and the aircraft lost 1500ft in approximately 9 seconds. Three seconds prior to impact, the aircraft nose pitched up before impacting a ridge at 2192ft."
ACC2282,"A Lockheed C-130 Hercules firefighting aircraft impacted terrain and burst into flames near Cooma, north-east of the Snowy Mountains in Australia. All three crew members died in the accident.The aircraft, contracted to the New South Wales Rural Fire Service, departed Richmond RAAF Base, Australia at 12:05 local time. The crew had been tasked with a fire retardant drop over the Adaminaby Complex bush fire.After approaching the Adaminaby complex fire, the drop was unable to be completed and the aircraft was diverted to a secondary tasking, to drop retardant on the Good Good fire. Witnesses reported seeing the aircraft complete a number of circuits, prior to completing the retardant drop. The drop was conducted on a heading of about 190°, at about 200 ft above ground level, with a drop time of approximately 2 seconds. The crew released about 1,200 US gallons (4,500 L) of fire retardant during the drop.Witness videos taken of the aircraft leading up to the accident showed a number of passes conducted at varying heights prior to the retardant drop. Following the retardant drop, the aircraft was observed to bank left, before becoming obscured by smoke after about 5 seconds. A further 15 seconds after this, the aircraft was seen flying at a very low height above the ground, in a left wing down attitude. Shortly after, at about 13:16, the aircraft collided with terrain and a post-impact fuel-fed fire ensued. The three crew were fatally injured and the aircraft was destroyed."


In [10]:
captionString \
    = 'Table 1.3.4: Aviation Accident Narratives (1970-2022) - Last'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentsNarrativesDataFrame.tail(5),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

accident_id,narrative
ACC2273,"A Busy Bee Congo Dornier 228-200 aircraft crashed shortly after takeoff from Goma, D.R. Congo. The aircraft crashed onto buildings in the Birere neighbourhood south of the airport and a fire erupted.Of the 20 passengers and two crew members on board, just one passenger survived. On the ground a family of six was killed. The air traffic controller reported that he noticed the wings were rocking shortly after the aircraft became airborne from runway 17. When climbing at a shallow angle, the flight contacted the controller, requesting a turn back to runway 35, without stating the nature of their emergency."
ACC2274,"A Beechcraft 99 suffered a runway excursion after landing on runway 12 at Black Point, Bahamas.On touchdown on the area preceding the useable portion of runway 11 at Black Point, the right main gear entered an area approximately 1-3 inches in depth. It subsequently struck an exposed section of asphalt (46 mm thick) on the unusable portion of the beginning of the runway.Given the speed of the aircraft at the time of the touch down the right main landing gear collapsed after striking the exposed surface. The disabled aircraft traveled an additional distance of approximately 1,527 ft down the runway before exiting the runway and coming to a stop approximately 20 feet in bushes lining the side of the runway.The aircraft sustained damages to the right wing, trailing and leading edges, right propeller, right main gear and fuselage. There were no injuries reported."
ACC2275,"The Basler BT-67 aircraft operated by North Star Air, collided with terrain approximately 500 metres southwest of the threshold of runway 10 of Sachigo Lake Airport, Ontario, Canada. The aircraft sustained substantial damage. The 2 flight crew members received no injuries.The aircraft was operated on a daytime VFR cargo flight from Red Lake Airport (CYRL) to Sachigo Lake Airport (CZPB). No weather reports for the destination were available, but the captain learned that an airport 30 nm to the southeast reported IMC conditions with an overcast ceiling at 500 feet AGL, with an expected improval to 1500 feet AGL with a possible fluctuation to 700 feet AGL and decreased visibility by 09:00. The flight departed CYRL under visual flight rules (VFR) at 08:00. Shortly after takeoff, the aircraft entered, and climbed above, the cloud layers before reaching the planned cruising altitude, which was not in accordance with the applicable regulations for VFR flights. Before commencing the descent to CZPB, the pilots obtained the 08:00 hourly weather report at at the nearby airport, which had remained generally unchanged, and elected to carry out a visual approach to runway 10. The captain initiated a descent through the cloud layers by reference to the flight instruments.Once the aircraft broke out of cloud at very low level, the aircraft was not in a position to continue with the planned visual approach. The captain made low-level manoeuvres in an attempt to land, flying a large 360° turn, as low as 100 feet AGL (i.e. about 400 feet below the required minimum altitude), and then flew a manoeuvre similar to a left-hand circuit, which brought the aircraft within close proximity to a significant obstacle (a 150-foot tall tower), in meteorological conditions below the VFR minimum requirements. Given that the captain had not briefed the first officer, the latter was unaware of the captains intentions and began calling out airspeed and altitudes. On the last attempt, during the low-level downwind leg, when the aircraft passed abeam the threshold of runway 10, the captain initiated a left-hand turn and began descending. About 10 seconds later, the aircraft collided with terrain, in a near wings-level attitude, approximately 650 feet southwest of the threshold of runway 10. The aircraft slid 350 feet southward along the ground before it came to a rest on a southwesterly heading.The captain likely experienced attentional narrowing while carrying out a high-workload visual approach at very low altitude in IMC. This most likely resulted in an inadvertent but controlled descent that was not detected until the aircraft collided with terrain.The uninjured pilots evacuated the aircraft via the right-hand cockpit window. The aircraft sustained substantial damage; however, there was no post-impact fire."
ACC2276,"Bek Air flight 2100, a Fokker 100, crashed into a building during takeoff from Almaty Airport, Kazakhstan. The aircraft was operating a scheduled domestic service to Nursultan. The captain was Pilot Flying. Air temperature was -12°C and prior to departure the horizontal stabilizer was de-iced; the wings were not de-iced. Flaps were set at 0° and the aircraft commenced the takeoff roll from runway 05R at 07:20 local time.The aircraft took off at 07:20:36 at a speed of 148 knots. Immediately after the separation of the aircraft from the runway, it rolled from 5° right to 19° to the left without an increase in indicated airspeed and with an increase in pitch angle to 14°.At 07:20:42, gaining a height of 20 feet (6 meters), the aircraft lost altitude and the IAS decreased to 130 knots as it rolled from 14° to the right to 11° to the left.At 07:20:50, the plane contacted the runway with its tail and landed on the main landing gear. The aircraft ran along the runway for 15 seconds with the nose gear raised, without a significant increase in speed.The aircraft became airborne again at a speed of 138 knots with the tail touching the runway. The crew retracted the landing gear. The aircraft again rolled from left to right with the pitch angle increasing to 19°. The IAS decreased to 130 knots and at 07:21:09, after gaining a height of 11 feet (3.3 meters), the aircraft banked right, descended and slid over the ground until it went through the airport perimeter fence. At 07:21:12, at a speed of 147 knots, the aircraft collided with a house.The Vice-Minister of Health reported that 12 occupants had died and 47 were injured, of which 8 in extremely serious condition."
ACC2277,"A Let L-410 suffered a runway excursion while landing on the wet gravel airstrip of Kamina Airport in D.R. Congo.The aircraft swung to the right and went off the runway. The nose landing gear collapsed and the aircraft sustained damage to the nose section, left main landing gear and left hand wing tip tank.The passengers, provincial deputies, were not injured in the accident."


## **1.4: Export DataFrames to CSV Files**

In [11]:
aviationAccidentsDataFrame \
    .to_csv \
        (local_constant.AVIATION_ACCIDENTS_DATA_ONE_CSV_FILE)

In [12]:
aviationAccidentsNarrativesDataFrame \
    .to_csv \
        (local_constant.AVIATION_ACCIDENTS_NARRATIVE_ONE_CSV_FILE)

## **1.5: Close Browser**

In [13]:
chromeBrowserSplinterWebDriver \
    .quit()

In [14]:
#log_subroutine \
#    .EndProgramExecution()