In [1]:
#*******************************************************************************************
 #
 #  File Name:  AviationAccidentsSetupPy.ipynb
 #
 #  File Description:
 #      This interactive Python notebook, AviationAccidentsSetupPy, uses the Python modules,
 #      Splinter and Beautiful Soup to web scrape aviation accident information from a
 #      website.
 #
 #
 #  Date            Description                             Programmer
 #  ----------      ------------------------------------    ------------------
 #  10/05/2023      Initial Development                     N. James George
 #
 #******************************************************************************************/

import PyFunctions as function
import PySubRoutines as subroutine
import PyLogFunctions as log_function
import PyLogSubRoutines as log_subroutine
import PyAviationAccidentsSetupConstants as local_constant

import datetime as dt
import pandas as pd

from bs4 import BeautifulSoup as soup
from splinter import Browser

In [2]:
CONSTANT_LOCAL_FILE_NAME \
    = 'PyAviationAccidentsSetup.ipynb'

In [3]:
log_subroutine \
    .SetLogMode \
        (False)

log_subroutine \
    .SetDebugMode \
        (False)

log_subroutine \
    .SetImageMode \
        (False)


log_subroutine \
    .BeginProgramExecution \
        ('PyAviationAccidentsSetup')

## **1.1: Visit and Scrape the [Aviation Safety Network](https://aviation-safety.net)**

In [4]:
headerFlagBoolean = False
    
headersStringList = []

tableDataDictionaryList = []
    
    
chromeBrowserSplinterWebDriver \
    = Browser \
        ('chrome')


log_subroutine \
    .PrintAndLogWriteText \
        ('\033[1m' \
         + f'WEBPAGE SCRAPING PROCESS BEGINS...\n\n' \
         + '\033[0m')


for yearIndex in range (local_constant.BEGIN_YEAR, local_constant.END_YEAR+1):

    pageIndex = 1
    
    
    while (pageIndex <= 100):
    
        urlString \
            = local_constant.AVIATION_ACCIDENTS_URL + str(yearIndex) + '/' + str(pageIndex)
        
        
        log_subroutine \
            .PrintAndLogWriteText \
                ('\033[1m' \
                 + f'{urlString}\n' \
                 + '\033[0m')
        
        
        chromeBrowserSplinterWebDriver \
            .visit \
                (urlString)
    
        webPageHTMLStringVariable \
            = chromeBrowserSplinterWebDriver \
                .html

        aviationSafetyBeautifulSoupObject \
            = soup \
                (webPageHTMLStringVariable, 
                 'html.parser')
    
        aviationSafetyTableDataBSElementsObject \
            = aviationSafetyBeautifulSoupObject \
                .find \
                    ('table', 
                     class_ = 'hp')

        if aviationSafetyTableDataBSElementsObject == None:
            
            break
           
        
        if headerFlagBoolean == False:

            headerBSResultSetObject \
                = aviationSafetyTableDataBSElementsObject \
                    .find_all('th')

            for headerElement in headerBSResultSetObject:
    
                if headerElement.text != '\xa0' \
                    and headerElement.text != 'reg.':
        
                    headersStringList \
                        .append \
                            (headerElement.text)
            
            headerFlagBoolean = True;
            

        tableDataBSResultSetObject \
            = aviationSafetyTableDataBSElementsObject \
                .find_all \
                    ('tr', 
                     class_ = 'list')


        for row in tableDataBSResultSetObject:

            rowDataBSResultSetObject \
                = row \
                    .find_all \
                        ('td')

            dataElementList \
                = []
    
            for index, dataElement in enumerate(rowDataBSResultSetObject):
                
                if index != 2 and index <= 7:
                
                    if len(dataElement.text) == 0 \
                        and index == 4:
                    
                        dataElementList \
                            .append \
                                ('n/a')
                     
                    elif len(dataElement.text) == 0 \
                        and index == 5:
                    
                        dataElementList \
                            .append \
                                ('unknown')
                    
                        
                    elif len(dataElement.text) != 0:
                    
                        dataElementList \
                            .append \
                                (dataElement.text)

                   
            tableDataDictionaryList \
                .append \
                    (dataElementList)
        
        pageIndex += 1
        
        
log_subroutine \
    .PrintAndLogWriteText \
        ('\033[1m' \
         + f'\nWEBPAGE SCRAPING PROCESS COMPLETE\n' \
         + '\033[0m')


log_function \
    .DebugReturnObjectWriteObject \
        (headersStringList)

log_function \
    .DebugReturnObjectWriteObject \
        (tableDataDictionaryList)

[1mWEBPAGE SCRAPING PROCESS BEGINS...

[0m
[1mhttps://aviation-safety.net/database/year/1972/1
[0m
[1mhttps://aviation-safety.net/database/year/1972/2
[0m
[1mhttps://aviation-safety.net/database/year/1972/3
[0m
[1mhttps://aviation-safety.net/database/year/1972/4
[0m
[1mhttps://aviation-safety.net/database/year/1972/5
[0m
[1mhttps://aviation-safety.net/database/year/1973/1
[0m
[1mhttps://aviation-safety.net/database/year/1973/2
[0m
[1mhttps://aviation-safety.net/database/year/1973/3
[0m
[1mhttps://aviation-safety.net/database/year/1973/4
[0m
[1mhttps://aviation-safety.net/database/year/1974/1
[0m
[1mhttps://aviation-safety.net/database/year/1974/2
[0m
[1mhttps://aviation-safety.net/database/year/1974/3
[0m
[1mhttps://aviation-safety.net/database/year/1974/4
[0m
[1mhttps://aviation-safety.net/database/year/1975/1
[0m
[1mhttps://aviation-safety.net/database/year/1975/2
[0m
[1mhttps://aviation-safety.net/database/year/1975/3
[0m
[1mhttps://aviation-safety

[1mhttps://aviation-safety.net/database/year/2007/1
[0m
[1mhttps://aviation-safety.net/database/year/2007/2
[0m
[1mhttps://aviation-safety.net/database/year/2007/3
[0m
[1mhttps://aviation-safety.net/database/year/2007/4
[0m
[1mhttps://aviation-safety.net/database/year/2008/1
[0m
[1mhttps://aviation-safety.net/database/year/2008/2
[0m
[1mhttps://aviation-safety.net/database/year/2008/3
[0m
[1mhttps://aviation-safety.net/database/year/2008/4
[0m
[1mhttps://aviation-safety.net/database/year/2009/1
[0m
[1mhttps://aviation-safety.net/database/year/2009/2
[0m
[1mhttps://aviation-safety.net/database/year/2009/3
[0m
[1mhttps://aviation-safety.net/database/year/2009/4
[0m
[1mhttps://aviation-safety.net/database/year/2010/1
[0m
[1mhttps://aviation-safety.net/database/year/2010/2
[0m
[1mhttps://aviation-safety.net/database/year/2010/3
[0m
[1mhttps://aviation-safety.net/database/year/2011/1
[0m
[1mhttps://aviation-safety.net/database/year/2011/2
[0m
[1mhttps://av

## **1.2: Save to DataFrame**

In [5]:
aviationAccidentDataFrame \
    = pd.DataFrame \
        (tableDataDictionaryList, 
         columns = headersStringList)


log_function \
    .DebugReturnObjectWriteObject \
        (aviationAccidentDataFrame)

## **1.3: Display DataFrame**

In [6]:
captionString \
    = 'Table 1.3.1: Aviation Accident Data (First)'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentDataFrame.head(12),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

acc. date,type,operator,fat.,location,dmg
5 Jan 1972,Douglas C-47B-20-DK Dakota 4MFP,McKenzie Air,0.0,"Norman Wells Airport, NT (YVQ)",sub
6 Jan 1972,Hawker Siddeley HS-748-230 Srs. 2A,SAESA - Servicios Aéreos Especiales SA,23.0,40 km NW of Chetumal,w/o
7 Jan 1972,Boeing 727-200,Pacific Southwest Airlines - PSA,0.0,Havana-José Martí International Airport (HAV),non
7 Jan 1972,Sud Aviation SE-210 Caravelle VI-R,Iberia,104.0,ca 15 km W of Ibiza Airport (IBZ),dst
7 Jan 1972,Convair CV-340-68B,Saudi Arabian Airlines,0.0,Jeddah International Airport,w/o
8 Jan 1972,de Havilland Canada U-1A Otter (DHC-3),Khmer Air Force,,within Cambodia,w/o
9 Jan 1972,Lockheed L-188A Electra,Air Manila International,0.0,Manila International Airport (MNL),w/o
12 Jan 1972,Boeing 727,Braniff International Airways,0.0,"Dallas-Love Field, TX (DAL)",unk
15 Jan 1972,Lockheed EC-130G Hercules,United States Navy,0.0,"near Patuxent River-NAS, MD (NHK)",w/o
15 Jan 1972,Lockheed KC-130F Hercules,United States Marine Corps,,"Lake City, FL",w/o


In [7]:
captionString \
    = 'Table 1.3.2: Aviation Accident Data (Last)'

currentStylerObject \
    = function \
        .ReturnStylerObjectStandardFormat \
            (aviationAccidentDataFrame.tail(12),
             captionString)

log_function \
    .ReturnStylerObjectSavePNGImage \
        (currentStylerObject,
         captionString)

acc. date,type,operator,fat.,location,dmg
20 Nov 2022,Boeing 737-4Q8 (SF),Bluebird Cargo,0,Paris-Charles de Gaulle Airport (CDG),sub
25 Nov 2022,Cirrus SF50 Vision Jet G2,Verijet,0,"1,6 km SW of Indianapolis Regional Airport, IN",sub
29 Nov 2022,Learjet 45,Jett Aircraft,0,"Batesville Regional Airport, AR (BVX)",sub
30 Nov 2022,Learjet 36,Aery Aviation,0,"Newport News/Williamsburg International Airport, VA (PHF)",sub
1 Dec 2022,Learjet 35A,"REVA, Inc.",0,Guantánamo Naval Air Station (NBW),sub
15 Dec 2022,Swearingen SA226-TC Metro II,Key Lime Air,0,"Wichita Dwight D. Eisenhower National Airport, KS (ICT)",sub
17 Dec 2022,de Havilland Canada DHC-8-402QPF Dash 8,Blue Bird Aviation,0,Abudwak Airstrip,sub
23 Dec 2022,de Havilland Canada DHC-6 Twin Otter 300,Rimbun Air,0,Moanemani Airport (ONI),sub
26 Dec 2022,Boeing 737-3Z0,Tarco Aviation,0,Mogadishu Aden Adde International Airport (MGQ),sub
27 Dec 2022,Embraer EMB-505 Phenom 300,Skystallion LLC,0,"Hawthorne Airport, CA (HHR)",sub


## **1.4: Export DataFrame to CSV file**

In [8]:
aviationAccidentDataFrame \
    .to_csv \
        (local_constant.AVIATION_ACCIDENTS_DATA_FILE)

## **1.5: Export DataFrame to CSV file**

In [9]:
chromeBrowserSplinterWebDriver \
    .quit()

In [10]:
#log_subroutine \
#    .EndProgramExecution()