### Full solution, receiving text blocks as array, outputting JSON

In [55]:
import pdftotext
import os
import re

# Example of converting a PDF file into a JSON object with page text array
pdf_docs_path = os.path.join("PDF")
one_pdf_path = os.path.join(pdf_docs_path,"Protect_Your_Home_From_Flooding_Brochure.pdf")

with open(one_pdf_path, "rb") as f:
    pdf = pdftotext.PDF(f)
    
textArray = []
for page in pdf:
    docText = re.sub(r"[^a-zA-Z0-9:.,!?%$@]+", ' ', page).strip()
    textArray.append(docText)

Now textArray is the object to be posted to a single call that returns a json object with a list item for each text instance, including disaster type and actions.

### All NLP and other dependencies for Lambda Functions

In [69]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from collections import Counter
import string
import json
nlp = spacy.load("en_core_web_sm")

### Action extraction function

In [57]:
# Defining the imperative sentence detector for action extraction
def impSentenceExtractor(someText):
    
    doc = nlp(someText)
    impSentList=[]

    # Extract sentences from block of text
    for sentence in doc.sents:
        if sentence[0].pos_=='VERB' and (sentence[0].tag_=="VB" or sentence[0].tag_=="VBG") and len(sentence) > 2:
            impSentList.append(sentence.text)
    return impSentList

### Frequent words extractor function

In [58]:
# Defining a function for frequent word extraction, returning a simple string:
def frequentClimateWordsExtractor(text):

    # Dictionary of relevant words
    dictionary = ["snow","change","climate","heatwave","adaptation","tornado","water","icestorm","risk","impact","level","community","land","management","planning","development","http","plan","infrastructure","sea","event","action","vulnerability","flood","assessment","storm","temperature", "low","rise","resource","weather","strategy","damage","effect","precipitation","hazard","ice","protection","home","flooding","erosion","environment","emission","al","winter","heat","forest","wind","mitigation","emergency","coast","shoreline","greenhouse","elevation","carbon","wave","dike","wetland","disaster","conservation","reduction","fire","rain","drainage","ground","power","stormwater","roof","rainfall","extreme","wildfire","reference","vegetation","threat","drought","disease","coastline","sewer","nature","neutral","neutrality"]

    # Loads text with linguistic annotations from Spacy
    my_doc = nlp(text)

    filteredDoc = []
    filteredList = []
    
    # Returns a list with relevant words filtered by the dictionary
    for sentence in my_doc.sents:
        for word in sentence:
            if not(word.is_stop) and (word.pos_=='NOUN' or word.pos_=='PROPN'):
                filteredDoc.append(word.text.lower())

    nounsFreqDistribution = Counter(filteredDoc)
    
    listOfWords=""
    for word in nounsFreqDistribution.most_common(300):
        if word[0] in dictionary:
            listOfWords = listOfWords + word[0] + ", "
            filteredList.append(word[0])
    return listOfWords, filteredList

### Disaster Classifier (rules-based)

In [65]:
# Trying a simple rule-based approach to disaster classification
def disasterType(key_arr):
    
    # Undefined to start with
    # disaster_class = "Undefined"
    numDetected = 0
    disaster_class = []
    
    if "carbon" in key_arr and ("neutral" in key_arr or "neutrality" in key_arr):
        disaster_class.append("Carbon Neutrality")
        numDetected+=1
        
    if "adaptation" in key_arr and ("change" in key_arr or "plan" in key_arr):
        disaster_class.append("Climate Change Adaptation")
        numDetected+=1
        
    if "drought" in key_arr:
        disaster_class.append("Drought")
        numDetected+=1
        
    if "flood" in key_arr or "flooding" in key_arr or "rainfall" in key_arr or "stormwater" in key_arr or ("sea" in key_arr and "level" in key_arr and "rise" in key_arr):
        disaster_class.append("Flooding")
        numDetected+=1
        
    if ("heat" in key_arr and "extreme" in key_arr) or "heatwave" in key_arr:
        disaster_class.append("Heatwave")
        numDetected+=1
        
    if "mitigation" in key_arr:
        disaster_class.append("Mitigation")
        numDetected+=1
        
    if "wind" in key_arr or "tornado" in key_arr:
        disaster_class.append("Severe Wind")
        numDetected+=1
        
    if "snow" in key_arr or "snowstorm" in key_arr:
        disaster_class.append("Snowstorm")
        numDetected+=1

    if "temperature" in key_arr and "low" in key_arr:
        disaster_class.append("Low Temperatures")
        numDetected+=1


    if "fire" in key_arr or "wildfire" in key_arr:
        disaster_class.append("Wildfire")
        numDetected+=1
        
    if numDetected==0:
        disaster_class.append("Undefined")
        
    if "http" in key_arr or "al" in key_arr or "reference" in key_arr:
        disaster_class = ["References"]
    
    return disaster_class

### Core function execution, processing textArray

In [66]:
# Given textArray, loop through its text items
def mainProcessor(arrayOfText):
    returnBody=[]

    i=0
    for textBlock in arrayOfText:
        pageItem = {}
        i+=1
        impSents = impSentenceExtractor(textBlock)
        keywords, wordList = frequentClimateWordsExtractor(textBlock)
        disasterClass = disasterType(wordList)
        pageItem['block']=i
        pageItem['class']=disasterClass
        pageItem['actions']=impSents
        if len(impSents)>0:
            returnBody.append(pageItem)
    
    return returnBody

In [68]:
mainProcessor(textArray)

[{'block': 1,
  'class': ['Undefined'],
  'actions': ['Protect Your Home from Flooding LOW COST PROJECTS YOU CAN DO YOURSELF']},
 {'block': 2,
  'class': ['Flooding', 'Mitigation'],
  'actions': ['Know Your Risk.',
   'Learn more details about your home s level of food risk, including the type of food zone it is in',
   'Talk to them, your neighbors, and others about any past food events and how high the water has risen in the past at or near your home.',
   'Insure Your Property.',
   'Reduce Your Risk.',
   'Decide how to prepare your family and protect your home from fooding.',
   'Consider which of the methods included in this guide are most appropriate and practical, based on your home s food risk, and create a plan to mitigate the risk to your property.']},
 {'block': 3,
  'class': ['Flooding', 'Mitigation'],
  'actions': ['Depending on the project, you may need to consult local architects, engineers, contractors, landscapers, or other experts in design and construction.',
   'Co

In [53]:
textArray

['Protect Your Home from Flooding LOW COST PROJECTS YOU CAN DO YOURSELF',
 'FLOODING IS THE MOST COMMON AND COSTLY DISASTER IN THE UNITED STATES AND CAN HAPPEN ANYWHERE. However, there are many ways to reduce your home s risk of fooding, and not all of them are diffcult or expensive. This guide briefy describes some of the smaller, lower cost actions you can take yourself or with minor assistance from others. It also suggests places you can go to fnd more information about food mitigation techniques, including some of these. You have many food mitigation options as a homeowner. As you begin to think about which might be the best for you and your home, consider following these three key steps: 1. Know Your Risk. Anywhere it can rain, it can food so we all live in a food zone, and we all live with the risk of food damage to our property. Learn more details about your home s level of food risk, including the type of food zone it is in and, if available, the potential food elevation referr