In [1]:
import PyPDF2
from tika import parser
from lxml import etree
import spacy
from spacy.lang.en import English
import bs4
import os
import pickle

In [2]:
# Tags to extract as paragraphs from the HTML text
tags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'p', 'li']

In [3]:
from nltk.corpus.reader.plaintext import CorpusReader
nlp = spacy.load("en_core_web_sm")

DOC_PATTERN = r'(?!\.)[\w\s\d\-]+\.pdf'
CAT_PATTERN = r'([\w_\s]+)/.*'
root = '/Users/nebo333/Desktop/Sample_text/FFIEC'

corpus = CorpusReader(root, DOC_PATTERN)

In [4]:
class PDFCorpusReader(CorpusReader):
    """
    A corpus reader for raw PDF documents to enable preprocessing.
    """

    def __init__(self, root, fileids=DOC_PATTERN, encoding='utf8', **kwargs):
        """
        Initialize the corpus reader. 
        Arguments are passed to the ``CorpusReader`` constructor.
        """
        
        # Initialize the NLTK corpus reader objects
        CorpusReader.__init__(self, root, fileids, encoding)
        
    def resolve(self, fileids):
        """
        Returns a list of fileids.
        """
        if fileids is not None:
            
            return self.fileids()
            
    def sizes(self, fileids=None):
        """
        Returns a list of tuples, the fileid and size on disk of the file.
        This function is used to detect oddly large files in the corpus.
        """
        # Resolve the fileids and the categories
        fileids = self.resolve(fileids)

        # Create a generator, getting every path and computing filesize
        for path in self.abspaths(fileids):
            print(os.path.getsize(path))


In [5]:
import os

class Preprocessor(object):
    """
    The preprocessor wraps an `PDFCorpusReader` and performs tokenization
    and part-of-speech tagging.
    """
    def __init__(self, root, corpus, target=None, **kwargs):
        self.corpus = corpus
        self.target = target

    def fileids(self, fileids=None):
        fileids = self.corpus.resolve(fileids)
        if fileids:
            return fileids
        return self.corpus.fileids()
    
    def abspath(self, fileid):
        # Find the directory, relative to the corpus root.
        parent = os.path.relpath(
            os.path.dirname(self.corpus.abspath(fileid)), self.corpus.root
        )

        # Compute the name parts to reconstruct
        basename  = os.path.basename(fileid)
        name, ext = os.path.splitext(basename)

        # Create the pickle file extension
        basename  = name + '.pickle'

        # Return the path to the file relative to the target.
        return os.path.normpath(os.path.join(self.target, parent, basename))
    
    def create_doc_struc(self, root, fileid):
        
        file_id = root + '/' + fileid
        file_data = parser.from_file(file_id)
        # Get files text content
        content_text = file_data['content']
        # Create doc object
        doc = nlp(content_text)
        
        doc_ls = []
        for i in doc.sents:
            sent_ls=[]
            for j in i:
                if j.is_stop == False:
                    if j.is_alpha == True:
                        token = j.text
                        pos = j.pos_
                        tup = (token,pos)
                        sent_ls.append(tup)
            doc_ls.append(sent_ls)
        yield doc_ls
            
    def process(self, fileid):
        """
        For a single file, checks the location on disk to ensure no errors,
        uses +tokenize()+ to perform the preprocessing, and writes transformed
        document as a pickle to target location.
        """
        # Compute the outpath to write the file to.
        target = self.abspath(fileid)
        parent = os.path.dirname(target)

        # Make sure the directory exists
        if not os.path.exists(parent):
            os.makedirs(parent)

        # Make sure that the parent is a directory and not a file
        if not os.path.isdir(parent):
            raise ValueError(
                "Please supply a directory to write preprocessed data to."
            )

        # Create a data structure for the pickle
        document = list(self.create_doc_struc(root,fileid))

        # Open and serialize the pickle to disk
        with open(target, 'wb') as f:
            pickle.dump(document, f, pickle.HIGHEST_PROTOCOL)

        # Clean up the document
        del document

        # Return the target fileid
        return target
    
    def transform(self, fileids=None):
        # Make the target directory if it doesn't already exist
        if not os.path.exists(self.target):
            os.makedirs(self.target)

        # Resolve the fileids to start processing
        for fileid in self.fileids(fileids):
            yield self.process(fileid)

In [6]:
myreader = PDFCorpusReader(root)

In [7]:
target = '/Users/nebo333/Desktop/Sample_text/pickle_files'

In [8]:
preproc = Preprocessor(root, myreader, target)

In [9]:
ids = myreader.fileids()

In [10]:
for i in preproc.create_doc_struc(root,ids[0]):
    print(i)

[[('Table', 'NOUN'), ('ContentsTable', 'PROPN'), ('ContentsTable', 'PROPN'), ('ContentsTable', 'PROPN'), ('Contents', 'NOUN')], [('IntroductionIntroductionIntroductionIntroduction', 'NOUN')], [('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('Responsibilities', 'PROPN')], [('Board', 'PROPN'), ('Directors', 'PROPN'), ('Senior', 'PROPN'), ('Management', 'PROPN')], [('Audit', 'PROPN'), ('Management', 'PROPN')], [('Internal', 'PROPN'), ('Audit', 'PROPN'), ('Staff', 'PROPN')], [('Operating', 'VERB'), ('Management', 'NOUN')], [('External', 'PROPN'), ('Auditors', 'NOUN'), ('Independence', 'NOUN'), ('Staffing', 'NOUN'), ('Internal', 'PROPN'), ('AuditIndependence', 'PROPN'), ('Staffing', 'PROPN'), ('Internal', 'PROPN'), ('AuditIndependence', 'PROPN'), ('Staffing', 'PROPN'), ('Internal', 'PROPN

In [11]:
for i in preproc.transform(ids):
    print(i)

/Users/nebo333/Desktop/Sample_text/pickle_files/ffiec_itbooklet_audit.pickle
/Users/nebo333/Desktop/Sample_text/pickle_files/ffiec_itbooklet_businesscontinuitymanagement.pickle
/Users/nebo333/Desktop/Sample_text/pickle_files/ffiec_itbooklet_developmentandacquisition.pickle


In [12]:
PKL_PATTERN = r'(?!\.)[\w\s\d\-]+\.pickle'

class PickledCorpusReader(PDFCorpusReader):

    def __init__(self, root, fileids=PKL_PATTERN, **kwargs):
        CorpusReader.__init__(self, root, fileids)

    def docs(self, fileids=None):
        fileids = self.resolve(fileids)
        # Load one pickled document into memory at a time.
        for path in self.abspaths(fileids):
            with open(path, 'rb') as f:
                yield pickle.load(f)[0]
    
    def paras(self, fileids=None):
        for doc in self.docs(fileids):
            for para in doc:
                yield para
                
    def sents(self, fileids=None):
        for para in self.paras(fileids):
            for sent in para:
                yield sent
                
    def tagged(self, fileids=None):
        for sent in self.sents(fileids):
            for tagged_token in sent:
                yield tagged_token

    def words(self, fileids=None):
        for tagged in self.tagged(fileids):
            yield tagged[0]
    
    ## Need to add in sentence, tuple and token yield functions


In [13]:
pic_reader = PickledCorpusReader(target)

In [14]:
ids

['ffiec_itbooklet_audit.pdf',
 'ffiec_itbooklet_businesscontinuitymanagement.pdf',
 'ffiec_itbooklet_developmentandacquisition.pdf']

In [19]:
doc_ls = []
for i in pic_reader.paras('ffiec_itbooklet_businesscontinuitymanagement.pdf'):
    para_ls=[]
    for j in i:
        wordj[0])

Table
ContentsTable
ContentsTable
ContentsTable
Contents
IntroductionIntroductionIntroductionIntroduction
Audit
Roles
ResponsibilitiesIT
Audit
Roles
ResponsibilitiesIT
Audit
Roles
ResponsibilitiesIT
Audit
Roles
Responsibilities
Board
Directors
Senior
Management
Audit
Management
Internal
Audit
Staff
Operating
Management
External
Auditors
Independence
Staffing
Internal
AuditIndependence
Staffing
Internal
AuditIndependence
Staffing
Internal
AuditIndependence
Staffing
Internal
Audit
Independence
Staffing
Internal
Audit
ProgramInternal
Audit
ProgramInternal
Audit
ProgramInternal
Audit
Program
Risk
Assessment
Risk
Based
AuditingRisk
Assessment
Risk
Based
AuditingRisk
Assessment
Risk
Based
AuditingRisk
Assessment
Risk
Based
Auditing
Program
Elements
Risk
Scoring
System
Audit
Participation
Application
Development
Acquisition
Conversions
TestingAudit
Participation
Application
Development
Acquisition
Conversions
TestingAudit
Participation
Application
Development
Acquisition
Conversions
TestingAu

institutions
complex
operations
benefits
derived
time
manager
internal
audit
audit
staff
likely
outweigh
cost
small
institutions
employees
simple
operations
costs
outweigh
benefits
institution
internal
auditor
ensure
maintains
objective
independent
internal
function
implementing
comprehensive
internal
reviews
significant
internal
controls
key
characteristic
reviews
directing
performing
review
responsible
managing
operating
controls
StaffingStaffingStaffingStaffing
Personnel
performing
audits
information
systems
knowledge
commensurate
scope
sophistication
institution
environment
possess
sufficient
analytical
skills
determine
report
root
cause
deficiencies
internal
expertise
inadequate
board
consider
qualified
external
sources
management
consultants
independent
auditors
professionals
supplement
perform
institution
internal
audit
function
institutions
person
group
responsibilities
outside
audit
function
performs
audits
Generally
institutions
approach
centralize
audit
coverage
assign
audit

data
impact
risk
assessment
scoring
Accordingly
operating
management
required
auditors
date
major
changes
departments
functions
introduction
new
product
implementation
new
system
application
conversions
significant
changes
organization
staff
Audit
Participation
Application
Development
Audit
Participation
Application
Development
Audit
Participation
Application
Development
Audit
Participation
Application
Development
Acquisition
Conversions
TestingAcquisition
Conversions
TestingAcquisition
Conversions
TestingAcquisition
Conversions
Testing
Action
SummaryAction
SummaryAction
SummaryAction
Summary
Senior
management
involve
audit
major
application
development
acquisition
conversion
testing
development
acquisition
conversion
automated
application
lengthy
complex
process
requiring
significant
degree
interaction
programming
staff
user
departments
internal
audit
process
known
system
development
life
cycle
system
development
methodology
requires
detailed
developmental
stages
ensure
applications
m

verify
internal
control
environment
TSP
use
additional
verification
monitoring
procedures
discussed
fully
Outsourcing
Technology
Services
Booklet
FFIEC
Examination
Handbook
Refer
booklet
additional
information
vendor
management
supplement
examination
coverage
booklet
Audit
Booklet
Page
EndnotesEndnotesEndnotesEndnotes
booklet
uses
terms
institution
financial
institution
describe
insured
banks
thrifts
credit
unions
technology
service
providers
provide
services
entities
Board
Governors
Federal
Reserve
System
Federal
Reserve
Board
Federal
Deposit
Insurance
Corporation
FDIC
National
Credit
Union
Administration
NCUA
Office
Comptroller
Currency
OCC
Office
Thrift
Supervision
OTS
include
Interagency
Policy
Statement
Internal
Audit
Function
Outsourcing
March
Interagency
Policy
Statement
External
Auditing
Programs
Banks
Savings
Associations
September
Interagency
Policy
Statement
Coordination
Communication
External
Auditors
Examiners
July
federal
credit
union
board
directors
required
establish
su

auditor
qualifications
Determine
outsourcing
arrangement
maintains
improves
quality
internal
audit
function
institution
internal
controls
examiner
Review
performance
contractual
criteria
audit
vendor
internal
evaluations
audit
vendor
Review
outsourced
internal
audit
reports
sample
audit
work
papers
Determine
adequate
prepared
accordance
audit
program
outsourcing
agreement
Determine
work
papers
disclose
specific
program
steps
calculations
evidence
support
procedures
conclusions
set
forth
outsourced
reports
Determine
scope
outsourced
internal
audit
procedures
adequate
Determine
key
employees
institution
audit
vendor
clearly
understand
lines
communication
internal
control
problems
matters
noted
audit
vendor
internal
audits
addressed
Determine
management
audit
vendor
revises
scope
outsourced
audit
work
appropriately
institution
environment
activities
risk
exposures
systems
change
significantly
Audit
Booklet
Page
Determine
directors
ensure
institution
effectively
manages
outsourced
internal

systems
Management
assessed
impact
external
internal
trends
factors
ability
vendor
support
continued
servicing
client
financial
institutions
vendor
provide
maintain
service
level
performance
meets
requirements
client
Management
monitors
quality
vendor
software
releases
documentation
training
provided
clients
Audit
Booklet
Page
Appendix
B
GlossaryAppendix
B
GlossaryAppendix
B
GlossaryAppendix
B
Glossary
Application
controlsApplication
controlsApplication
controlsApplication
controls
Controls
related
transactions
data
application
systems
Application
controls
ensure
completeness
accuracy
records
validity
entries
resulting
programmed
processing
manual
data
entry
Examples
application
controls
include
data
input
validation
agreement
batch
totals
encryption
data
transmitted
Application
systemApplication
systemApplication
systemApplication
system
integrated
set
computer
programs
designed
serve
defined
function
having
specific
input
processing
output
activities
general
ledger
manufacturing
reso

handbooks
FFIEC
Examination
Handbook
Business
Continuity
Management
November
BCM
governance
include
Aligning
BCM
practices
risk
appetite
Identifying
continuity
level
needed
consistent
operation
criticality
Establishing
business
continuity
policy
plans
Allocating
resources
BCM
activities
Providing
competent
management
implement
program
Monitoring
assessing
business
continuity
performance
relative
goals
Figure
depicts
typical
BCM
cycle
entities
follow
manage
business
continuity
risks
ongoing
basis
manage
risks
entity
develop
single
encompassing
BCM
policy
individual
policies
plans
different
functions
depending
size
complexity
entity
operations
effective
practice
business
continuity
related
policies
address
minimum
following
areas
scope
responsibilities
BCM
accountability
authority
guidance
develop
maintain
effective
BCM
Board
Senior
Management
Responsibilities
Action
Summary
board
senior
management
govern
business
continuity
defining
responsibilities
accountability
allocating
adequate
re

unique
plans
type
hazard
threat
Planning
threats
hazards
ensures
addressing
emergency
functions
planners
identify
common
tasks
personnel
responsible
accomplishing
tasks
Management
evaluate
potential
risks
entity
geographic
area
example
entities
located
flood
prone
areas
earthquake
zones
terrorist
targets
areas
affected
tornados
hurricanes
addition
geographic
areas
management
assess
geopolitical
risk
potential
retaliatory
cyber
attacks
example
sanctions
nation
state
increase
risk
cyber
attacks
critical
Management
coordinate
business
continuity
risk
identification
efforts
entity
Individual
business
units
larger
entities
coordinate
risk
identification
activities
identify
systemic
threats
overall
entity
Management
identify
inventory
entity
internal
external
assets
types
threats
hazards
existing
controls
important
effective
risk
identification
Refer
Handbook
Management
booklet
additional
information
Furthermore
management
identify
cyber
security
risks
refer
Handbook
Information
Security
boo

enable
restoration
critical
services
institution
critical
service
providers
fall
victim
destructive
cyber
attack
similar
event
BCM
include
ability
protect
offline
data
backups
destructive
malware
threats
corrupt
production
online
backup
versions
data
example
industry
initiative
assist
addressing
resilience
customer
account
information
Sheltered
Personnel
Resilience
dependent
personnel
availability
maintain
critical
business
processes
Personnel
unavailable
distracted
events
natural
disasters
severe
weather
events
employee
role
designated
mission
critical
management
plan
mass
absenteeism
event
disruption
Previous
catastrophic
events
Hurricane
demonstrate
personnel
availability
affects
timely
recovery
Management
plan
events
personnel
able
access
facilities
critical
personnel
available
immediately
disruption
Public
infrastructure
transportation
systems
operating
telecommunication
systems
overburdened
unavailable
management
consider
Staffing
skills
needed
operate
critical
functions
related


segments
landline
networks
greatly
increasing
probability
completion
intended
emergency
crisis
situation
landline
network
congested
probability
completing
normal
reduced
Management
request
GETS
cards
submitting
application
entity
primary
federal
regulator
Refer
DHS
Wireless
Priority
Service
webpage
FFIEC
Examination
Handbook
Business
Continuity
Management
November
troubleshooting
analysis
Change
tickets
corresponding
activity
reviewed
appropriateness
event
resolved
events
changes
properly
authorized
monitored
documented
Poorly
administered
emergency
changes
result
disruption
Additionally
interrelated
nature
systems
compound
disruptions
previously
unaffected
systems
emergency
event
systems
documentation
updated
changes
Change
management
elements
addressed
detail
Handbook
Development
Acquisition
Operations
booklets
Communications
Management
consider
plan
prepare
multiple
mechanisms
communicate
example
traditional
voice
communications
telecommunications
impaired
inoperable
management
cons

additional
branch
Refer
Notice
Branch
Closure
Fed
Reg
Policy
Statement
Office
Comptroller
Currency
Board
Governors
Federal
Reserve
System
Federal
Deposit
Insurance
Corporation
Office
Thrift
Supervision
Concerning
Branch
Closing
CFR
Subpart
C
Establishment
Relocation
Domestic
Branches
Offices
FDIC
CFR
Establishment
Maintenance
Branches
FRB
CFR
Establishment
Acquisition
Relocation
Branch
National
Bank
FFIEC
Examination
Handbook
Business
Continuity
Management
November
Payment
Systems
BCP
address
alternate
arrangements
payment
systems
fail
automated
teller
machines
ATM
funds
transfers
electronic
banking
remote
deposit
capture
mobile
capabilities
Alternate
solutions
include
manual
procedures
calling
faxing
wire
automated
clearing
house
requests
correspondent
financial
institutions
addition
web
based
systems
party
software
perform
transactions
Management
verify
redundant
electronic
payment
systems
equipment
tokens
routers
included
recovery
sites
activation
documentation
maintained
timely
pos

critical
party
service
providers
confirm
entity
personnel
understand
integration
recovery
processes
Exercise
Test
Strategies
Management
develop
exercise
testing
strategies
demonstrate
entity
ability
support
connectivity
functionality
volume
capacity
alternate
facilities
strategies
include
expectations
individual
business
lines
use
exercise
testing
methodologies
scenarios
Testing
strategies
encompass
internal
external
dependencies
including
activities
outsourced
domestic
foreign
based
party
service
providers
Management
test
aspects
entity
BCP
Strategies
include
multi
year
plan
execute
specific
depth
breadth
exercises
tests
identify
gaps
program
different
methodologies
scenarios
time
Expectations
testing
internal
external
recovery
dependencies
Assumptions
methodologies
exercises
develop
test
strategies
Lessons
learned
natural
disasters
events
critical
business
functions
testing
strategies
include
transaction
processing
functional
testing
assess
recoverability
infrastructure
capacity
data

Management
document
issues
identified
exercises
tests
create
action
plans
target
dates
resolving
issues
Exercise
test
results
analyzed
compared
objectives
success
criteria
exercise
test
plans
reported
appropriate
levels
management
items
remediated
management
document
decisions
accept
risks
identified
exercises
Additionally
management
test
corrective
actions
implemented
result
failed
recovery
objective
address
major
issues
encountered
Management
choose
retest
regularly
scheduled
exercise
depending
issue
severity
Business
line
management
update
BCP
based
test
results
adjust
BCM
process
including
exercise
testing
program
Finally
management
submit
regular
reports
board
exercise
testing
activities
BCP
meets
entity
recovery
resilience
objectives
Exercise
test
results
include
following
documentation
Industry
cross
market
tests
conducted
associations
Securities
Industry
Association
Bond
Market
Association
Futures
Industry
Association
associations
mentioned
illustrative
purposes
note
endorsemen

reports
board
provides
assessment
management
ability
manage
control
risks
related
continuity
resilience
Determine
audit
leverages
SOC
reports
external
artifacts
party
service
providers
appropriate
Determine
board
management
validates
auditor
qualified
carry
review
independent
business
continuity
related
functions
Evaluate
audit
coverage
business
continuity
general
controls
audit
audits
business
lines
stand
business
continuity
audit
Audit
coverage
include
following
reasonableness
comprehensiveness
BIA
business
continuity
risk
reliability
adequacy
effectiveness
continuity
resilience
controls
FFIEC
Examination
Handbook
Business
Continuity
Management
November
effectiveness
risk
mitigation
efforts
test
plans
achieve
stated
objectives
based
reasonable
assumptions
Audit
monitoring
exercises
tests
reviewing
test
plans
results
verifying
issues
identified
appropriately
escalated
Assessment
business
continuity
program
effectiveness
Objective
Determine
management
developed
appropriate
repeatable
B

Examination
Handbook
Business
Continuity
Management
November
Verify
business
line
management
retains
ownership
testing
specific
business
processes
coordinates
personnel
involved
enterprise
wide
BCM
process
support
areas
Verify
exercises
tests
occur
appropriate
intervals
significant
changes
affect
entity
operating
environment
Verify
management
developed
process
sufficiently
robust
confirm
effectiveness
entity
business
continuity
program
exercise
program
incorporate
following
policy
includes
strategies
expectations
exercise
test
planning
Roles
responsibilities
implementation
Sufficient
personnel
perform
exercise
test
provide
oversight
document
results
Precautions
safeguard
production
data
performing
backup
performing
test
test
environment
testing
non
peak
hours
Provisions
emergency
stops
concluding
exercises
tests
Verification
continuity
resilience
process
assumptions
ability
process
sufficient
volume
work
adverse
operating
conditions
Activities
commensurate
importance
business
process
E

backup
sites
core
firm
backup
sites
significant
firm
participates
industry
Department
Treasury
Hamilton
Series
FS
ISAC
CAPS
exercises
cross
market
tests
sponsored
core
firms
markets
trade
associations
Tests
incorporate
verifying
connectivity
alternate
sites
include
transaction
settlement
payment
processes
extent
practical
Determine
exercise
test
program
sufficient
demonstrate
entity
ability
meet
continuity
objectives
results
demonstrate
readiness
personnel
achieve
entity
recovery
resumption
objectives
Determine
management
accomplishes
following
Coordinate
execution
exercise
test
program
fully
exercise
business
continuity
planning
process
Analyze
compare
results
stated
objectives
Raise
issues
appropriate
personnel
assign
responsibility
resolution
FFIEC
Examination
Handbook
Business
Continuity
Management
November
Escalate
issues
resolved
timely
manner
appropriate
level
management
Prioritize
track
issues
final
resolution
Analyze
results
issues
determine
problems
traced
common
source
Docum

Incident
management
process
identifying
analyzing
correcting
disruptions
operations
preventing
future
recurrences
goal
incident
management
limit
disruption
restore
operations
quickly
possible
FFIEC
Developed
Supervisory
Purposes
Incident
response
response
organization
disaster
significant
event
significantly
impact
organization
people
ability
function
productively
incident
response
include
evacuation
facility
initiating
disaster
recovery
plan
performing
damage
assessment
measures
necessary
bring
organization
stable
status
Business
Continuity
Institute
Disaster
Recovery
Journal
Glossary
Infrastructure
System
facilities
equipment
services
needed
operation
organization
ISO
Integrated
exercise
simulation
test
effectiveness
continuity
plans
business
line
major
function
incorporates
component
module
including
external
dependencies
FFIEC
Developed
Supervisory
Purposes
Interdependencies
departments
processes
functions
party
providers
interact
successfully
complete
task
business
function
proces

March
Guidance
Response
Programs
Unauthorized
Access
Customer
Information
Customer
Notice
April
Financial
Banking
Information
Infrastructure
Committee
Interim
Policy
Sponsorship
Private
Sector
Financial
Institutions
GETS
Card
Program
August
Bank
Technology
Bulletin
Outsourcing
June
National
Credit
Union
Administration
Regulations
CFR
Appendix
Guidelines
Safeguarding
Member
Information
CFR
Appendix
Record
Preservation
Program
Record
Retention
CFR
Appendix
B
Catastrophic
Act
Preparedness
Guidelines
Guidance
NCUA
Letter
Credit
Unions
Guidance
Pandemic
January
NCUA
Letter
Credit
Unions
Evaluating
Party
Relationships
December
NCUA
Risk
Alert
Disaster
Planning
Response
April
NCUA
Letter
Credit
Unions
Influenza
Pandemic
Preparedness
March
NCUA
Letter
Credit
Unions
e
Commerce
Guide
Credit
Unions
December
NCUA
Letter
Credit
Unions
Disaster
Recovery
Business
Resumption
Contingency
Plans
December
NCUA
Letter
Credit
Unions
Diligence
Party
Service
Providers
November
NCUA
Letter
Credit
Unions
Busine

maintain
Typical
software
development
projects
include
initiation
planning
design
development
testing
implementation
maintenance
phases
organizations
include
final
disposal
phase
project
life
cycles
activities
completed
project
phase
based
project
type
project
management
methodology
projects
follow
structured
plans
clearly
define
requirements
project
phase
Alternative
Development
MethodologiesAlternative
Development
MethodologiesAlternative
Development
MethodologiesAlternative
Development
Methodologies
SDLC
provides
logical
approach
managing
sequential
series
tasks
drawback
traditional
SDLC
project
risks
adequately
controlled
tasks
completed
strictly
sequential
manner
example
traditional
SDLC
methodology
users
define
functional
requirements
pass
system
designers
Designers
complete
designs
pass
programmers
programmers
subsequently
discover
improved
ways
provide
functional
requirements
designers
redo
work
programmers
involved
planning
design
phases
able
identify
improvements
earlier
proc

meet
obligations
Development
Acquisition
Booklet
Page
Testing
StandardsTesting
StandardsTesting
StandardsTesting
Standards
Management
establish
testing
standards
require
use
predefined
comprehensive
test
plans
end
user
involvement
documented
test
results
Additionally
testing
standards
prohibit
testing
production
environments
live
data
copies
live
customer
data
tests
management
ensure
appropriate
standards
exist
protect
confidentiality
data
Management
use
test
data
generators
software
applications
generate
representative
testing
data
based
predefined
parameters
develop
appropriate
testing
data
Numerous
automated
applications
available
test
program
logic
functional
operability
network
interoperability
Documentation
StandardsDocumentation
StandardsDocumentation
StandardsDocumentation
Standards
Organizations
establish
appropriate
documentation
standards
Documentation
consists
detailed
descriptions
explanations
technology
applications
systems
procedures
Documentation
enhances
user
ability
u

compiling
feasibility
study
support
documentation
include
Business
Considerations
Strategic
business
technology
goals
objectives
Expected
benefits
measured
value
current
technology
Potential
organizational
changes
facilities
addition
reduction
end
users
technicians
managers
Budget
scheduling
personnel
constraints
Potential
business
regulatory
legal
issues
impact
feasibility
project
Functional
Requirements
End
user
functional
requirements
Internal
control
information
security
requirements
Operating
database
backup
system
requirements
type
capacity
performance
Development
Acquisition
Booklet
Page
Connectivity
requirements
stand
Local
Area
Network
Wide
Area
Network
external
Network
support
requirements
number
potential
users
type
volume
frequency
data
transfers
Interface
requirements
internal
external
applications
Project
Factors
Project
management
methodology
Risk
management
methodology
Estimated
completion
dates
projects
major
project
phases
Estimated
costs
projects
major
project
phases

Information
Security
Booklet
information
disposal
media
Development
PhaseDevelopment
PhaseDevelopment
PhaseDevelopment
Phase
development
phase
involves
converting
design
specifications
executable
programs
Effective
development
standards
include
requirements
programmers
project
participants
discuss
design
specifications
programming
begins
procedures
help
ensure
programmers
clearly
understand
program
designs
functional
requirements
Programmers
use
techniques
develop
computer
programs
large
transaction
oriented
programs
associated
financial
institutions
traditionally
developed
procedural
programming
techniques
Procedural
programming
involves
line
line
scripting
logical
instructions
combined
form
program
Primary
procedural
programming
activities
include
creation
testing
source
code
refinement
finalization
test
plans
Typically
individual
programmers
write
review
desk
test
program
modules
components
small
routines
perform
particular
task
application
Completed
components
integrated
components

EVALUATIONPROJECT
EVALUATION
Management
conduct
post
implementation
reviews
end
project
validate
completion
project
objectives
assess
project
management
activities
Management
interview
personnel
actively
involved
operational
use
product
document
address
identified
problems
Management
analyze
effectiveness
project
management
activities
comparing
things
planned
actual
costs
benefits
development
times
document
results
present
senior
management
Senior
management
informed
operational
project
management
deficiencies
Maintenance
PhaseMaintenance
PhaseMaintenance
PhaseMaintenance
Phase
maintenance
phase
involves
making
changes
hardware
software
documentation
support
operational
effectiveness
includes
making
changes
improve
system
performance
correct
problems
enhance
security
address
user
requirements
ensure
modifications
disrupt
operations
degrade
system
performance
security
organizations
establish
appropriate
change
management
standards
procedures
Change
management
referred
configuration
mana

tools
store
descriptions
structure
format
data
data
tables
Advanced
data
dictionaries
store
source
code
copies
field
record
code
descriptions
use
software
design
development
activities
Primary
issues
consider
reviewing
design
configuration
database
management
systems
include
access
controls
auditing
features
Management
restrict
direct
privileged
access
database
opposed
accessing
information
application
authorized
personnel
DBMS
journaling
feature
allows
organizations
track
data
changes
Journaling
provides
audit
trails
data
changes
facilitates
safe
recovery
data
errors
occur
available
organizations
employ
automated
auditing
tools
journaling
identify
accessed
attempted
access
database
data
changed
DBMS
validate
users
record
row
levels
log
activities
detailed
validation
levels
provide
strong
security
controls
Examiners
consider
validation
levels
assessing
adequacy
DBMS
controls
Strong
DBMS
controls
include
data
change
logs
input
validity
checks
locking
rollback
mechanisms
ability
recover


unauthorized
software
use
copyright
violations
measures
help
prevent
copyright
violations
best
control
mechanism
strict
corporate
policy
management
auditors
communicate
enforce
Management
uncompromising
attitude
copyright
violations
organization
security
administrator
responsible
monitoring
enforcing
policy
Software
Development
Specifications
Performance
StandardsSoftware
Development
Specifications
Performance
StandardsSoftware
Development
Specifications
Performance
StandardsSoftware
Development
Specifications
Performance
Standards
Contracts
development
custom
software
describe
define
expected
performance
attributes
functionality
software
contract
describe
equipment
required
operate
software
ensure
appropriate
compatibility
Vendors
required
meet
exceed
institution
internal
development
policies
standards
opening
negotiations
issuing
request
proposal
custom
software
development
organizations
clear
idea
essential
business
needs
addressed
software
adequate
understanding
organization
presen

tested
programs
Management
establish
program
approval
standards
include
procedures
verifying
test
results
inspecting
modified
code
confirming
source
object
codes
match
Emergency
ModificationsEmergency
ModificationsEmergency
ModificationsEmergency
Modifications
Development
Acquisition
Booklet
Page
Emergency
modifications
periodically
needed
correct
software
problems
restore
processing
operations
quickly
changes
completed
quickly
implemented
controlled
manner
Emergency
change
standards
include
procedures
similar
routine
change
controls
standards
include
abbreviated
change
request
evaluation
approval
procedures
ensure
changes
quickly
standards
designed
ensure
management
completes
detailed
evaluations
documentation
emergency
changes
soon
possible
implementation
possible
emergency
changes
tested
prior
implementation
management
unable
thoroughly
test
emergency
modifications
installation
critical
appropriately
backup
files
programs
established
procedures
place
Appropriate
backups
established


responsibilities
concerning
technology
systems
initiatives
Evaluate
organizational
responsibilities
ensure
board
management
Development
Acquisition
Booklet
Page
Clearly
define
appropriately
assign
responsibilities
Appropriately
assign
security
audit
quality
assurance
personnel
related
projects
Establish
appropriate
segregation
duty
compensating
controls
Establish
appropriate
project
technology
committee
board
reporting
requirements
Objective
Assess
level
characteristics
risks
associated
development
Objective
Assess
level
characteristics
risks
associated
development
Objective
Assess
level
characteristics
risks
associated
development
Objective
Assess
level
characteristics
risks
associated
development
acquisition
maintenance
activities
materially
impact
maintenance
activities
materially
impact
maintenance
activities
materially
impact
maintenance
activities
materially
impact
organization
Assess
risks
identified
objectives
evaluate
adequacy
risk
management
programs
Risk
identification
asses

availability
FlowchartsFlowchartsFlowchartsFlowcharts
Traditional
flowcharts
involve
use
geometric
symbols
diamonds
ovals
rectangles
represent
sequencing
program
logic
Software
packages
available
automatically
chart
programs
enable
programmer
chart
program
need
draw
manually
Functional
requirementsFunctional
requirementsFunctional
requirementsFunctional
requirements
business
operational
security
features
organization
wants
included
program
IterativeIterativeIterativeIterative
Repetitive
cyclical
Iterative
software
development
involves
completion
project
tasks
phases
repetitive
cycles
Tasks
phase
activities
repeated
desired
result
achieved
LAN
Acronym
Local
Area
Network
MetricMetricMetricMetric
quantitative
measurement
MilestoneMilestoneMilestoneMilestone
major
project
event
NetworkNetworkNetworkNetwork
computer
systems
grouped
share
information
Development
Acquisition
Booklet
Page
software
hardware
Object
codeObject
codeObject
codeObject
code
Software
program
instructions
compiled
tran

In [20]:
for i in pic_reader.paras('ffiec_itbooklet_businesscontinuitymanagement.pdf'):
    print(i)

[('Table', 'NOUN'), ('ContentsTable', 'PROPN'), ('ContentsTable', 'PROPN'), ('ContentsTable', 'PROPN'), ('Contents', 'NOUN')]
[('IntroductionIntroductionIntroductionIntroduction', 'NOUN')]
[('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('ResponsibilitiesIT', 'ADJ'), ('Audit', 'PROPN'), ('Roles', 'PROPN'), ('Responsibilities', 'PROPN')]
[('Board', 'PROPN'), ('Directors', 'PROPN'), ('Senior', 'PROPN'), ('Management', 'PROPN')]
[('Audit', 'PROPN'), ('Management', 'PROPN')]
[('Internal', 'PROPN'), ('Audit', 'PROPN'), ('Staff', 'PROPN')]
[('Operating', 'VERB'), ('Management', 'NOUN')]
[('External', 'PROPN'), ('Auditors', 'NOUN'), ('Independence', 'NOUN'), ('Staffing', 'NOUN'), ('Internal', 'PROPN'), ('AuditIndependence', 'PROPN'), ('Staffing', 'PROPN'), ('Internal', 'PROPN'), ('AuditIndependence', 'PROPN'), ('Staffing', 'PROPN'), ('Internal', 'PROPN'), ('Au

[('level', 'NOUN'), ('formality', 'NOUN'), ('BCM', 'PROPN'), ('ERM', 'PROPN'), ('integration', 'NOUN'), ('commensurate', 'VERB'), ('entity', 'NOUN'), ('complexity', 'NOUN'), ('risk', 'NOUN'), ('profile', 'NOUN')]
[('Figure', 'NOUN'), ('Business', 'PROPN'), ('Continuity', 'PROPN'), ('Management', 'PROPN'), ('Elements', 'PROPN'), ('Relative', 'ADJ'), ('Enterprise', 'PROPN'), ('Risk', 'PROPN'), ('Management', 'PROPN')]
[('Refer', 'VERB'), ('Department', 'PROPN'), ('Treasury', 'PROPN'), ('Department', 'PROPN'), ('Homeland', 'PROPN'), ('Security', 'PROPN')]
[('DHS', 'PROPN')]
[('Financial', 'PROPN'), ('Services', 'PROPN'), ('Sector', 'PROPN'), ('Specific', 'PROPN'), ('Plan', 'PROPN')]
[('ERM', 'PROPN'), ('process', 'NOUN'), ('effected', 'VERB'), ('entity', 'NOUN'), ('board', 'NOUN'), ('directors', 'NOUN'), ('management', 'NOUN'), ('personnel', 'NOUN'), ('applied', 'VERB'), ('strategy', 'NOUN'), ('setting', 'VERB'), ('enterprise', 'NOUN'), ('designed', 'VERB'), ('identify', 'VERB'), ('potent

[('Data', 'NOUN'), ('repository', 'NOUN'), ('maintenance', 'NOUN')]
[('Protect', 'PROPN'), ('offline', 'ADJ'), ('data', 'NOUN'), ('backups', 'NOUN'), ('destructive', 'ADJ'), ('malware', 'NOUN'), ('corrupt', 'VERB'), ('production', 'NOUN'), ('online', 'ADJ'), ('backup', 'ADJ'), ('versions', 'NOUN'), ('data', 'NOUN')]
[('FFIEC', 'PROPN'), ('Examination', 'PROPN'), ('Handbook', 'PROPN')]
[('Business', 'PROPN'), ('Continuity', 'PROPN'), ('Management', 'PROPN')]
[('November', 'PROPN')]
[('Determine', 'VERB'), ('management', 'NOUN'), ('documented', 'VERB'), ('implemented', 'VERB'), ('appropriate', 'ADJ'), ('following', 'VERB'), ('resilience', 'NOUN'), ('measures', 'NOUN'), ('personnel', 'NOUN')]
[('Staffing', 'NOUN'), ('skills', 'NOUN'), ('needed', 'VERB'), ('operate', 'VERB'), ('critical', 'ADJ'), ('functions', 'NOUN'), ('related', 'VERB'), ('business', 'NOUN'), ('continuity', 'NOUN')]
[('Lodging', 'PROPN'), ('arrangements', 'NOUN'), ('displaced', 'ADJ'), ('employees', 'NOUN'), ('families',

[('Repeatable', 'ADJ'), ('Organizations', 'NOUN'), ('Repeatable', 'ADJ'), ('capabilities', 'NOUN'), ('use', 'VERB'), ('fundamental', 'ADJ'), ('project', 'NOUN')]
[('planning', 'NOUN'), ('scheduling', 'NOUN'), ('monitoring', 'NOUN'), ('procedures', 'NOUN')]
[('Defined', 'VERB'), ('Organizations', 'NOUN'), ('Defined', 'VERB'), ('capabilities', 'NOUN'), ('use', 'VERB'), ('approved', 'VERB'), ('formalized', 'VERB'), ('management', 'NOUN'), ('development', 'NOUN'), ('standards', 'NOUN'), ('procedures', 'NOUN'), ('customized', 'VERB'), ('meet', 'VERB'), ('specific', 'ADJ'), ('project', 'NOUN'), ('requirements', 'NOUN'), ('projects', 'NOUN')]
[('Managed', 'VERB'), ('Organizations', 'NOUN'), ('Managed', 'VERB'), ('capabilities', 'NOUN'), ('measure', 'NOUN'), ('understand', 'VERB'), ('control', 'NOUN'), ('development', 'NOUN'), ('procedures', 'NOUN'), ('product', 'NOUN'), ('quality', 'NOUN')]
[('Optimizing', 'VERB'), ('Organizations', 'NOUN'), ('Optimized', 'ADJ'), ('capabilities', 'NOUN'), ('u