TITLE: NOTAM Scraper
DATE: February 2023
VERSION: 1.0
DEVELOPER: Parker Hornstein (phornstein@esri.com)
REQUIREMENTS: ArcGIS Notebooks
LICENSE:

Copyright © 2023 Esri

All rights reserved under the copyright laws of the United States and applicable international laws, treaties, and conventions.
You may freely redistribute and use this sample code, with or without modification, provided you include the original copyright notice and use restrictions.

Disclaimer: THE SAMPLE CODE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ESRI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
SUSTAINED BY YOU OR A THIRD PARTY, HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT ARISING IN ANY WAY OUT OF
THE USE OF THIS SAMPLE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

For additional information, contact:

Esri
Attn: Contracts and Legal Services Department
380 New York Street
Redlands, California, 92373-8100
USA
email: contracts@esri.com

In [None]:
'''Parameters'''
URL = 'https://tfr.faa.gov/tfr2/list.jsp' #FAA US NOTAMs, do not change
BASEURL = 'https://tfr.faa.gov' #FAA URL, do not change
NOTAM_DIR = r'/arcgis/home/notams' #processing folder in your ArcGIS Notebooks Kernal used exclusively for shapefiles
NOTAM_LAYER_ID = '' #layer ID to NOTAM Feature Service

In [None]:
import requests, os, zipfile, arcpy, json, shutil
from datetime import datetime as dt
from arcgis.features import Feature
from arcgis.gis import GIS
from bs4 import BeautifulSoup

gis = GIS('home')

In [None]:
class NOTAM():
    '''Overhead class for manipulating data used to create NOTAM features'''
    def __init__(self):
        self.notam_number = None
        self.location = None
        self.issue_date = None
        self.begin_dtg = None
        self.end_dtg = None
        self.reason = None
        self.type = None
        self.supercedes = None
        self.contact = None
        self.airspace_definition = None
        self.altitude = None
        self.text = None
        self.url = None
        self.geom = None
   
    @classmethod
    def fromDict(cls,data):
        notam = cls()
        notam.notam_number = data.get('NOTAMNumber')
        notam.issue_date = data.get('IssueDate')
        notam.location = data.get('Location')
        notam.begin_dtg = data.get('BeginningDateandTime')
        notam.end_dtg = data.get('EndingDateandTime')
        notam.reason = data.get('ReasonforNOTAM')
        notam.type = data.get('Type')
        notam.supercedes = data.get('ReplacedNOTAMs')
        notam.contact = data.get('PilotsMayContact')
        notam.airspace_definition = data.get('AirspaceDefinition')
        notam.altitude = data.get('Altitude')
        notam.text = data.get('text')
        notam.url = data.get('url')
        notam.geom = data.get('geom')
        notam.convertTimeFields()
        return notam
    
    def toDict(self):
        return self.__dict__
        
    def convertTimeFields(self):
        try:
            self.issue_date = dt.strptime(self.issue_date,'%B %d, %Y at %H%M %Z')
        except:
            self.issue_date = None
        try:
            self.begin_dtg = dt.strptime(self.begin_dtg,'%B %d, %Y at %H%M %Z')
        except:
            self.begin_dtg = None
        try:
            self.end_dtg = dt.strptime(self.end_dtg,'%B %d, %Y at %H%M %Z')
        except:
            self.end_dtg = None
            
    def toFeature(self):
        self.text = self.text[:2000]
        attr = self.toDict().copy()
        geom = json.loads(self.geom)
        del attr['geom']
        return Feature(geom,attr)

In [None]:
def cleanText(text):
    '''Clean text scraped from HTML'''
    text = text.strip().replace(' ','')\
                .replace(':','')\
                .replace('(','')\
                .replace(')','')\
                .replace('\n','')
    return text

def searchFolder(folder_path,file_types=[]):
    '''
    Crawls a directory and all sub directories looking for files of specified extensions.
    '''
    return [os.path.join(folder_path,f) for f in os.listdir(folder_path) if f.endswith(tuple(file_types))]

def cleanNOTAMDir():
    '''
    Empty all files in the NOTAM processing dir
    '''
    for filename in os.listdir(NOTAM_DIR):
            file_path = os.path.join(NOTAM_DIR, filename)
            try:
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print('Failed to delete %s. Reason: %s' % (file_path, e))

In [None]:
'''Request main FAA page and use BeautifulSoup to parse the HTML'''
response = requests.get(URL)
soup = BeautifulSoup(response.content,'html.parser')
table = soup.find_all('table')[2]

In [None]:
'''Find href to all NOTAM specific pages'''
notam_urls = []
for a in table.find_all('a', href=True):
    if a['href'].startswith('../save_pages'):
        notam_url = a['href']
        notam_url = notam_url.replace('..',BASEURL)
        notam_urls.append(notam_url)

before = len(notam_urls)
notam_urls = [*set(notam_urls)] #deduplicate URLs, webscraping can be messy so the href may exist multiple times
f'REMOVED {before-len(notam_urls)} DUPLICATE URLS'

In [None]:
'''Process the NOTAM specific page for data'''

#textual keys to search for
keys = ['NOTAMNumber','IssueDate','Location','BeginningDateandTime','EndingDateandTime',
        'ReasonforNOTAM','Type','ReplacedNOTAMs','PilotsMayContact','AirspaceDefinition',
        'Altitude']

notams = []
for u in notam_urls:
    try:
        data = {'url':u}
        geom = None
        notam_site = requests.get(u)
        notam_soup = BeautifulSoup(notam_site.content,'html.parser')
        message = notam_soup.find('form',{'name':'meas'})
        message_tr = message.find_all('tr')
        for mes_tr in message_tr:
            shp_zip_path = None
            tables = mes_tr.find_all("table")

            for tbl in tables:            
                tds = tbl.find_all('td')

                #process shapefile for geometry if it exists
                for a in tds[1].find_all('a', href=True):
                    if a['href'].endswith('.shp.zip'):
                        shp_url = 'https://tfr.faa.gov/save_pages/' + a['href']
                        shp = requests.get(shp_url)
                        shp_zip_path = os.path.join(NOTAM_DIR,'shapefile.shp.zip')
                        with open(shp_zip_path, 'wb') as outfile:
                            outfile.write(shp.content)
                        #break

                    if not shp_zip_path is None and os.path.isfile(shp_zip_path):
                        with zipfile.ZipFile(shp_zip_path,'r') as z:
                            z.extractall(NOTAM_DIR)

                        shp_path = searchFolder(NOTAM_DIR,['.shp'])[0]
                        with arcpy.da.SearchCursor(shp_path,['SHAPE@']) as cursor:
                            for row in cursor:
                                geom = row[0].projectAs(arcpy.SpatialReference(4326)).JSON

                cleanNOTAMDir()

                #process NOTAM text
                for i in range(len(tds)):                
                    text = cleanText(tds[i].text)

                    if text in keys:
                        value = tds[i+1].text.strip()
                        value = value.split('\n')[0]
                        data.update({text:value})
                    elif text.startswith('OperatingRestrictionsandRequirements'):
                        apmn = []
                        for j in range(i+2,len(tds)):
                            apmn.append(tds[j].text.strip())
                        free_text = ''.join(apmn)
                        data.update({'text':free_text,
                                    'geom':geom})

        if not geom is None: #don't store NOTAMs without Geometry
            n = NOTAM.fromDict(data)
            notams.append(n)
    except Exception as e:
        print(e)

f'FOUND {len(notams)} NOTAMS'

In [None]:
# Title: NOTAMs | Type: Feature Service |
notam_lyr = gis.content.get(NOTAM_LAYER_ID).layers[0]
notam_lyr

In [None]:
'''Try to add each NOTAM to the Feature Service'''
if len(notams):
    notam_lyr.delete_features(where='1=1')

    feats = [notm.toFeature() for notm in notams]
    for f in feats:
        try:
            notam_lyr.edit_features(adds=[f])
        except Exception as e:
            print(str(e))