# PMI - Data Preparation

### Import libraries

In [1]:
import PyPDF2
import string
import nltk
from nltk.stem.porter import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import spacy
import re
import matplotlib.pyplot as plt
from collections import Counter
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize

## Extract  Figures

### Extract the list of figures

In [2]:
def extract_pdf(file_path,from_page_number,to_page_number):
    extracted_data = ""
    pdf = PyPDF2.PdfReader(file_path)
    for i in range(from_page_number,to_page_number):
        page = pdf.pages[i]
        extracted_data += page.extract_text()
    return extracted_data

In [3]:
figures_list=extract_pdf("../input/practice-standard-project-risk-management.pdf",10,12)

In [4]:
print(figures_list)

©2009 Project Management Institute. Practice Standard for Project Risk Management  LIST OF FIGURES  
 Figure 1-1. Hierarchy of PMI Project Risk Management Resources ....................................................2 
 Figure 1-2. Critical Success Factors for Project Risk Management .....................................................6  Figure 3-1. Project Risk Management Process Flow Diagram ...........................................................17  Figure 4-1. Key Areas of Focus for the Plan Risk Management Process ...........................................23  Figure 5-1. Three Perspectives of Risk Identification .........................................................................27  Figure 5-2. Cause, Risk, and Effect ......................................................................................................29  Figure 6-1. Building Risk Analysis Credibility .....................................................................................32 
 Figure 6-2. T

### Cleaning the list of figures

In [5]:
def cleaning_list_of_figures_PMI(list_of_figures):
    # Define a list of patterns and substitutions
    patterns_substitutions = [
        (r'ﬁ\s\s', 'fi'),  # Replace 'fi' with 'fi'
        (r'\d©2009 Project Management Institute\. Practice Standard for Project Risk Management\d', ''),  # Remove footer
        (r'©2009 Project Management Institute. Practice Standard for Project Risk Management', ''),  # Remove the copyright notice
        (r'\.{2,}', ' '),  # Remove points
        (r'[^A-Za-z0-9\s\,\\-]+', ''),  # Remove special characters (keep digits, punctuation, and spaces)
        (r'\s+', ' '),  # Remove extra spaces
        (r'\S*LIST OF FIGURES\S*', ''),  # Remove text and characters attached to it
        (r'\s[0-9]+\s', ' '), #Remove page numbers     
        
    ]

    # Apply all patterns and substitutions in sequence
    for pattern, substitution in patterns_substitutions:
        list_of_figures = re.sub(pattern, substitution, list_of_figures)
    return list_of_figures

In [6]:
clean_lof=cleaning_list_of_figures_PMI(figures_list)
clean_lof

'  Figure 1-1 Hierarchy of PMI Project Risk Management Resources Figure 1-2 Critical Success Factors for Project Risk Management Figure 3-1 Project Risk Management Process Flow Diagram Figure 4-1 Key Areas of Focus for the Plan Risk Management Process Figure 5-1 Three Perspectives of Risk Identification Figure 5-2 Cause, Risk, and Effect Figure 6-1 Building Risk Analysis Credibility Figure 6-2 The Perform Qualitative Risk Analysis Process Figure 7-1 Comparison of Qualitative and Quantitative Approaches Figure 7-2 Structure of a Quantitative Risk Analysis Figure 8-1 Critical Success Factors for Risk Response Planning Figure 8-2 The Steps Involved in Planning Risk Responses Figure 9-1 Schematic Representation of the Monitor and Control Risks Process xi'

In [7]:
def create_figures_list(text):
    # Define a regular expression pattern to match figures
    figure_pattern = re.compile(r'Figure\s')

    # Replace figure patterns with a delimiter
    text_with_delimiter = re.sub(figure_pattern, r'\\\\\g<0>', text)

    # Split the text based on the delimiter
    figures = text_with_delimiter.split('\\')

    # Remove empty strings from the list
    figures = [figure.strip() for figure in figures if figure.strip()]

    return figures

In [8]:
figures = create_figures_list(clean_lof)
figures

['Figure 1-1 Hierarchy of PMI Project Risk Management Resources',
 'Figure 1-2 Critical Success Factors for Project Risk Management',
 'Figure 3-1 Project Risk Management Process Flow Diagram',
 'Figure 4-1 Key Areas of Focus for the Plan Risk Management Process',
 'Figure 5-1 Three Perspectives of Risk Identification',
 'Figure 5-2 Cause, Risk, and Effect',
 'Figure 6-1 Building Risk Analysis Credibility',
 'Figure 6-2 The Perform Qualitative Risk Analysis Process',
 'Figure 7-1 Comparison of Qualitative and Quantitative Approaches',
 'Figure 7-2 Structure of a Quantitative Risk Analysis',
 'Figure 8-1 Critical Success Factors for Risk Response Planning',
 'Figure 8-2 The Steps Involved in Planning Risk Responses',
 'Figure 9-1 Schematic Representation of the Monitor and Control Risks Process xi']

#### Extract the figures and save them in a folder

In [9]:
pip install PyMuPDF


Note: you may need to restart the kernel to use updated packages.


In [10]:
# import libraries
import fitz
import io
import os
from PIL import Image

# file path you want to extract images from
def extract_images_from_pdf(file_path, figures):
    # Create a folder named "figures" if it doesn't exist
    output_folder = "figures"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # open the file
    pdf_file = fitz.open(file_path)
    i = 0
    j = 1
    # iterate over PDF pages
    for page_index in range(len(pdf_file)):
        # get the page itself
        page = pdf_file[page_index]
        image_list = page.get_images(full=True)

        # printing the number of images found in this page
        if image_list:
            print(f"[+] Found a total of {len(image_list)} images in page {page_index}")

        for image_index, img in enumerate(image_list, start=1):
            # get the XREF of the image
            xref = img[0]

            # extract the image bytes
            base_image = pdf_file.extract_image(xref)
            image_bytes = base_image["image"]

            # get the image extension
            image_ext = base_image["ext"]

            # create a PIL Image from the image bytes
            pil_image = Image.open(io.BytesIO(image_bytes))

            # use the figures list for naming if provided, otherwise use default naming
            if i < len(figures):
                image_filename = os.path.join(output_folder, f"{figures[i]}.{image_ext}")
                i = i + 1
            else:
                image_filename = os.path.join(output_folder, f"Figure D{j}.{image_ext}")
                j = j + 1

            # save the image to a file
            pil_image.save(image_filename)

            print(f"    Image {image_index} saved as {image_filename}")

    # close the PDF file
    pdf_file.close()
    return


In [11]:
file_path='../input/practice-standard-project-risk-management.pdf'
extract_images_from_pdf(file_path,figures)

[+] Found a total of 1 images in page 13
    Image 1 saved as figures\Figure 1-1 Hierarchy of PMI Project Risk Management Resources.png
[+] Found a total of 1 images in page 17
    Image 1 saved as figures\Figure 1-2 Critical Success Factors for Project Risk Management.png
[+] Found a total of 1 images in page 28
    Image 1 saved as figures\Figure 3-1 Project Risk Management Process Flow Diagram.png
[+] Found a total of 1 images in page 34
    Image 1 saved as figures\Figure 4-1 Key Areas of Focus for the Plan Risk Management Process.png
[+] Found a total of 1 images in page 38
    Image 1 saved as figures\Figure 5-1 Three Perspectives of Risk Identification.png
[+] Found a total of 1 images in page 40
    Image 1 saved as figures\Figure 5-2 Cause, Risk, and Effect.png
[+] Found a total of 1 images in page 43
    Image 1 saved as figures\Figure 6-1 Building Risk Analysis Credibility.png
[+] Found a total of 1 images in page 44
    Image 1 saved as figures\Figure 6-2 The Perform Qualit

## Loading  the Corpus

### Extracting the Data

In [12]:
extracted_data_PMI = extract_pdf("../input/practice-standard-project-risk-management.pdf",12,123)

In [13]:
print(extracted_data_PMI)

1©2009 Project Management Institute. Practice Standard for Project Risk Management1
  CHAPTER 1 
 INTRODUCTION 
 Project Management Institute (PMI) practice standards are guides to the use of a tool, technique, or process 
identiﬁ  ed in  A Guide to the Project Management Body of Knowledge   ( PMBOK   ®  Guide  – Fourth Edition) or 
other PMI standards. Practice standards are targeted at audiences who participate in the management of 
projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. 
 A PMI practice standard describes processes, activities, inputs, and outputs for a speciﬁ  c Knowledge Area. 
It provides information on what the signiﬁ  cant process, tool, or technique is, what it does, why it is signiﬁ  cant, 
when it should be performed or executed, and, if necessary for further clariﬁ  cation, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving

# PMI - Data cleaning

In [14]:
def cleaning_pmi(pmi_corpus):
    # Define a list of patterns and substitutions
    patterns_substitutions = [
        (r'ﬁ\s\s', 'fi'),  # Replace 'fi' with 'fi'
        (r'\d©2009 Project Management Institute\. Practice Standard for Project Risk Management\d', ''),  # Remove footer
        (r'©2009 Project Management Institute. Practice Standard for Project Risk Management', ''),  # Remove the copyright notice
        (r'[^A-Za-z0-9\s\.,]+', ''),  # Remove special characters (keep digits, punctuation, and spaces)
        (r'\s+', ' '),  # Remove extra spaces       
    ]

    # Apply all patterns and substitutions in sequence
    for pattern, substitution in patterns_substitutions:
        pmi_corpus = re.sub(pattern, substitution, pmi_corpus)

    return pmi_corpus

In [15]:
clean_pmi = cleaning_pmi(extracted_data_PMI)
clean_pmi



In [16]:
def remove_text(text1, clean_pmi):
    result = clean_pmi.replace(text1, '')
    return result

text1 = "This chapter includes the following sections 1.1 Purpose of the Practice Standard for Project Risk Management 1.2 Project Risk Management Definition 1.3 Role of Project Risk Management in Project Management 1.4 Good Risk Management Practice 1.5 Critical Success Factors for Project Risk Management"
clean_pmi = remove_text(text1, clean_pmi)
print(clean_pmi)



### Extract table of contents from PMI

In [17]:
table_of_contents_PMI=extract_pdf("../input/practice-standard-project-risk-management.pdf",4,9)

In [18]:
print(table_of_contents_PMI)

©2009 Project Management Institute. Practice Standard for Project Risk ManagementTABLE OF CONTENTS
 CHAPTER 1  -  INTRODUCTION  .................................................................................................. .1
 1.1 Purpose of the Practice Standard for Project Risk Management  .......................... 2
 1.2 Project Risk Management Deﬁ  nition  ...................................................................... 4
 1.3 Role of Project Risk Management in Project Management  ................................... 4
 1.4 Good Risk Management Practice  ............................................................................ 5 1.5 Critical Success Factors for Project Risk Management  ......................................... 6
 1.6 Conclusion  .............................................................................................................. .7
 CHAPTER 2  -  PRINCIPLES AND CONCEPTS ..........................................................................

### Cleaning the table of contents

In [19]:
def cleaning_table_of_contents_PMI(table_of_contents_PMI):
    # Define a list of patterns and substitutions
    patterns_substitutions = [
        (r'ﬁ\s\s', 'fi'),  # Replace 'fi' with 'fi'
        (r'\d©2009 Project Management Institute\. Practice Standard for Project Risk Management\d', ''),  # Remove footer
        (r'©2009 Project Management Institute. Practice Standard for Project Risk Management', ''),  # Remove the copyright notice
        (r'\.{2,}', ' '),  # Remove points
        (r'\s.\d\s', ' \ '),  # Remove remaining points
        (r'60B.3', r' \\ B.3'),
        (r'[^A-Za-z0-9\s\.,\\]+', ''),  # Remove special characters (keep digits, punctuation, and spaces)
        (r'(?<=\s)([0-9]+)(?=\s)', r'\1 \\ '),  # Insert backslash after digits between spaces
        (r'\s+', ' '),  # Remove extra spaces
        (r'\S*TABLE OF CONTENTS\S*', ''),  # Remove text and characters attached to it
        (r'(?<!CHAPTER) (\d+) \\ ', '  '),  # Remove page numbers
        (r'CHAPTER (\d+) \\ ', r'CHAPTER \1 '),  
        
    ]

    # Apply all patterns and substitutions in sequence
    for pattern, substitution in patterns_substitutions:
        table_of_contents_PMI = re.sub(pattern, substitution, table_of_contents_PMI)
    # Define the patterns to match
    patterns_to_remove = [re.compile(r'^\d+\.\d+\.\d+'), re.compile(r'^[A-Z]\.\d+\.\d+')]
    return table_of_contents_PMI


In [20]:
clean_toc_PMI=cleaning_table_of_contents_PMI(table_of_contents_PMI)
clean_toc_PMI

' CHAPTER 1 INTRODUCTION \\ 1.1 Purpose of the Practice Standard for Project Risk Management \\ 1.2 Project Risk Management Definition \\ 1.3 Role of Project Risk Management in Project Management \\ 1.4 Good Risk Management Practice \\ 1.5 Critical Success Factors for Project Risk Management \\ 1.6 Conclusion \\ CHAPTER 2 PRINCIPLES AND CONCEPTS \\ 2.1 Introduction \\ 2.2 Definition of Project Risk \\ 2.3 Individual Risks and Overall Project Risk \\ 2.4 Stakeholder Risk Attitudes \\ 2.5 Iterative Process \\ 2.6 Communication \\ 2.7 Responsibility for Project Risk Management \\ 2.8 Project Managers Role for Project Risk Management \\ CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEMENT PROCESSES \\ 3.1 Project Risk Management and Project Management \\ 3.2 Project Risk Management Processes \\ CHAPTER 4 PLAN RISK MANAGEMENT \\ 4.1 Purpose and Objectives of the Plan Risk Management Process \\ 4.2 Critical Success Factors for the Plan Risk Management Process \\ 4.2.1 Identify and Address Barri

### Segment PMI into chapters

In [21]:
def segment_book_into_chapters(book_text):
    # Define the pattern for chapter headings (assuming they start with "CHAPTER X - TITLE")
    pattern = re.compile(r'CHAPTER \d [A-Z ]+ |APPENDIX [A-Z ] ')

    # Find all occurrences of the pattern in the text
    chapter_headings = re.findall(pattern, book_text)

    # Split the book into chapters using the found headings
    chapter_segments = re.split(pattern, book_text)

    # Initialize an empty DataFrame
    columns = ['Chapter', 'Content']
    df = pd.DataFrame(columns=columns)

    for heading, content in zip(chapter_headings, chapter_segments[1:]):
        heading = heading.strip().upper()  # Convert heading to uppercase for consistency
        content = content.strip()

        # Check if the heading already exists in the DataFrame
        if heading in df['Chapter'].values:
            # If it exists, find the index and concatenate the content
            idx = df.index[df['Chapter'] == heading].tolist()[0]
            df.at[idx, 'Content'] += ' ' + content
        else:
            # If it's a new heading, add it to the DataFrame
            df = df.append({'Chapter': heading, 'Content': content}, ignore_index=True)

    return df

In [22]:
#Segment PMI into chapters
chapters_data = segment_book_into_chapters(clean_pmi)
chapters_data

Unnamed: 0,Chapter,Content
0,CHAPTER 1 INTRODUCTION,Project Management Institute PMI practice stan...
1,CHAPTER 2 PRINCIPLES AND CONCEPTS,2.1 Introduction This chapter introduces the k...
2,CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEM...,3.1 Project Risk Management and Project Manage...
3,CHAPTER 4 PLAN RISK MANAGEMENT,4.1 Purpose and Objectives of the Plan Risk Ma...
4,CHAPTER 5 IDENTIFY RISKS,5.1 Purpose and Objectives of the Identify Ris...
5,CHAPTER 6 PERFORM QUALITATIVE RISK ANALYSIS,6.1 Purpose and Objectives of the Perform Qual...
6,CHAPTER 7 PERFORM QUANTITATIVE RISK ANALYSIS,7.1 Purpose and Objectives of the Perform Quan...
7,CHAPTER 8 PLAN RISK RESPONSES,The Plan Risk Responses process determines eff...
8,CHAPTER 9 MONITOR AND CONTROL RISKS,The effectiveness of Project Risk Management d...
9,APPENDIX A,GUIDELINES FOR A PMI PRACTICE STANDARD A.1 Int...


In [23]:
def segment_toc_into_chapters(book_text):
    # Define the pattern for chapter headings (assuming they start with "CHAPTER X - TITLE")
    pattern = re.compile(r'CHAPTER \d [A-Z ]+ |APPENDIX [A-Z ] ')

    # Find all occurrences of the pattern in the text
    chapter_headings = re.findall(pattern, book_text)

    # Split the book into chapters using the found headings
    chapter_segments = re.split(pattern, book_text)

    # Combine the headings and content into a dictionary
    chapters_data = {}
    for heading, content in zip(chapter_headings, chapter_segments[1:]):
        heading = heading.strip()
        content = content.strip()
        key = heading.upper()  # Convert heading to uppercase for consistency
        if key in chapters_data:
            # If the heading already exists, combine the content with a single space
            chapters_data[key] += ' ' + content
        else:
            # If it's a new heading, add it to the dictionary
            chapters_data[key] = content

    return chapters_data

In [24]:
#Segment the table of contents into chapters
toc_data = segment_toc_into_chapters(clean_toc_PMI)
toc_data

{'CHAPTER 1 INTRODUCTION': '\\ 1.1 Purpose of the Practice Standard for Project Risk Management \\ 1.2 Project Risk Management Definition \\ 1.3 Role of Project Risk Management in Project Management \\ 1.4 Good Risk Management Practice \\ 1.5 Critical Success Factors for Project Risk Management \\ 1.6 Conclusion \\',
 'CHAPTER 2 PRINCIPLES AND CONCEPTS': '\\ 2.1 Introduction \\ 2.2 Definition of Project Risk \\ 2.3 Individual Risks and Overall Project Risk \\ 2.4 Stakeholder Risk Attitudes \\ 2.5 Iterative Process \\ 2.6 Communication \\ 2.7 Responsibility for Project Risk Management \\ 2.8 Project Managers Role for Project Risk Management \\',
 'CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEMENT PROCESSES': '\\ 3.1 Project Risk Management and Project Management \\ 3.2 Project Risk Management Processes \\',
 'CHAPTER 4 PLAN RISK MANAGEMENT': '\\ 4.1 Purpose and Objectives of the Plan Risk Management Process \\ 4.2 Critical Success Factors for the Plan Risk Management Process \\ 4.2.1 Id

In [25]:
def Extract_title_and_subtitles_toc(chapters_data):
    updated_chapters_data = {}
    
    for chapter, content in chapters_data.items():
        # Split the chapter into sections based on backslashes
        sections = content.split(' \\ ')        
        # Remove empty sections and remaining \\ 
        sections = [section.strip() for section in sections if section.strip()]
        sections = [re.sub(r'\\', '', section) for section in sections if section.strip()]
        # Update the chapters data with sections
        updated_chapters_data[chapter] = sections
    
    return updated_chapters_data

In [26]:
#Extract titles and subtitles from the table of contents
updated_toc_data = Extract_title_and_subtitles_toc(toc_data)
updated_toc_data

{'CHAPTER 1 INTRODUCTION': [' 1.1 Purpose of the Practice Standard for Project Risk Management',
  '1.2 Project Risk Management Definition',
  '1.3 Role of Project Risk Management in Project Management',
  '1.4 Good Risk Management Practice',
  '1.5 Critical Success Factors for Project Risk Management',
  '1.6 Conclusion '],
 'CHAPTER 2 PRINCIPLES AND CONCEPTS': [' 2.1 Introduction',
  '2.2 Definition of Project Risk',
  '2.3 Individual Risks and Overall Project Risk',
  '2.4 Stakeholder Risk Attitudes',
  '2.5 Iterative Process',
  '2.6 Communication',
  '2.7 Responsibility for Project Risk Management',
  '2.8 Project Managers Role for Project Risk Management '],
 'CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEMENT PROCESSES': [' 3.1 Project Risk Management and Project Management',
  '3.2 Project Risk Management Processes '],
 'CHAPTER 4 PLAN RISK MANAGEMENT': [' 4.1 Purpose and Objectives of the Plan Risk Management Process',
  '4.2 Critical Success Factors for the Plan Risk Managemen

In [27]:
#Extract titles from the table of contents

# Define a pattern to match section titles like '1.1', '2.2', etc.
section_pattern = re.compile(r'\s*([0-9]\.[0-9]|[A-Z]\.[0-9])\s')

# Create a new dictionary to store chapter-wise section titles
chapter_sections = {}

# Iterate through each chapter in the updated_toc_data dictionary
for chapter, sections in updated_toc_data.items():
    # Filter sections that match the section title pattern
    section_titles = [section.strip() for section in sections if section_pattern.match(section)]
    
    # Add the chapter and its corresponding section titles to the new dictionary
    chapter_sections[chapter] = section_titles
chapter_sections

{'CHAPTER 1 INTRODUCTION': ['1.1 Purpose of the Practice Standard for Project Risk Management',
  '1.2 Project Risk Management Definition',
  '1.3 Role of Project Risk Management in Project Management',
  '1.4 Good Risk Management Practice',
  '1.5 Critical Success Factors for Project Risk Management',
  '1.6 Conclusion'],
 'CHAPTER 2 PRINCIPLES AND CONCEPTS': ['2.1 Introduction',
  '2.2 Definition of Project Risk',
  '2.3 Individual Risks and Overall Project Risk',
  '2.4 Stakeholder Risk Attitudes',
  '2.5 Iterative Process',
  '2.6 Communication',
  '2.7 Responsibility for Project Risk Management',
  '2.8 Project Managers Role for Project Risk Management'],
 'CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEMENT PROCESSES': ['3.1 Project Risk Management and Project Management',
  '3.2 Project Risk Management Processes'],
 'CHAPTER 4 PLAN RISK MANAGEMENT': ['4.1 Purpose and Objectives of the Plan Risk Management Process',
  '4.2 Critical Success Factors for the Plan Risk Management Proce

### Segment each chapter into sections

In [28]:
def segment_chapter_into_sections(chapters_data):
    # Initialize an empty dictionary to store updated chapters data
    updated_chapters_data = {}

    # Iterate through each row in the DataFrame
    for index, row in chapters_data.iterrows():
        chapter = row['Chapter']
        content = row['Content']

        # Add backslashes to numbers
        updated_content = re.sub(r'\s([0-9]\.[0-9]|[A-Z]\.[0-9])\s', r' \\ \1 ', content)
        
        # Split the chapter into sections based on backslashes
        sections = updated_content.split(' \\ ')
        
        # Remove empty sections
        sections = [section.strip() for section in sections if section.strip()]
        
        # Update the chapters data with sections
        updated_chapters_data[chapter] = sections

    # Convert the dictionary to a list of dictionaries for DataFrame creation
    data_for_df = [{'Chapter': chapter, 'Section': section} for chapter, sections in updated_chapters_data.items() for section in sections]

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_for_df)

    return df



In [29]:
updated_chapters_data = segment_chapter_into_sections(chapters_data)
updated_chapters_data

Unnamed: 0,Chapter,Section
0,CHAPTER 1 INTRODUCTION,Project Management Institute PMI practice stan...
1,CHAPTER 1 INTRODUCTION,1.1 Purpose of the Practice Standard for Proje...
2,CHAPTER 1 INTRODUCTION,1.2 Project Risk Management Definition The def...
3,CHAPTER 1 INTRODUCTION,1.3 Role of Project Risk Management in Project...
4,CHAPTER 1 INTRODUCTION,1.4 Good Risk Management Practice Project Risk...
...,...,...
61,APPENDIX D,"D.4 Techniques, Examples and Templates for Per..."
62,APPENDIX D,8.0 Primavera Pertmaster Figure D15. Example o...
63,APPENDIX D,"D.5 Techniques, Examples, and Templates for Pl..."
64,APPENDIX D,"D.6 Techniques, Examples and Templates for Mon..."


In [30]:
# Create a new column 'section_title' in the DataFrame
updated_chapters_data['Section_title'] = None

# Iterate through each row in the DataFrame
for index, row in updated_chapters_data.iterrows():
    section_title = ''
    # Iterate through each chapter in the dictionary
    for chapter, section_titles in chapter_sections.items():
        # Check if the row's section matches any title in the current chapter
        for title in section_titles:
            if title.strip().lower() in row['Section'].strip().lower():
                section_title = title.strip()
                # Remove the matched title from the 'Section' column
                updated_chapters_data.at[index, 'Section'] = row['Section'].replace(title, '').strip()
                break
        if section_title:
            break
    
    # Assign the matched section title to the 'section_title' column
    updated_chapters_data.at[index, 'Section_title'] = section_title

# Reorder the columns
updated_chapters_data = updated_chapters_data[['Chapter', 'Section_title', 'Section']]

# Print the updated DataFrame
updated_chapters_data


Unnamed: 0,Chapter,Section_title,Section
0,CHAPTER 1 INTRODUCTION,,Project Management Institute PMI practice stan...
1,CHAPTER 1 INTRODUCTION,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...
2,CHAPTER 1 INTRODUCTION,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as ..."
3,CHAPTER 1 INTRODUCTION,1.3 Role of Project Risk Management in Project...,Project Risk Management is not an optional act...
4,CHAPTER 1 INTRODUCTION,1.4 Good Risk Management Practice,Project Risk Management is a valuable componen...
...,...,...,...
61,APPENDIX D,,"D.4 Techniques, Examples and Templates for Per..."
62,APPENDIX D,,8.0 Primavera Pertmaster Figure D15. Example o...
63,APPENDIX D,"D.5 Techniques, Examples, and Templates for Pl...",8 Plan Risk Responses develops the set of acti...
64,APPENDIX D,,"D.6 Techniques, Examples and Templates for Mon..."


In [31]:
#Extract sub-titles from the table of contents

# Define a pattern to match section titles like '1.1', '2.2', etc.
subsection_pattern = re.compile(r'\s*([0-9]\.[0-9]\.[0-9]+|[A-Z]\.[0-9]\.[0-9]+)\s')

# Create a new dictionary to store chapter-wise section titles
chapter_subsections = {}

# Iterate through each chapter in the updated_toc_data dictionary
for chapter, subsections in updated_toc_data.items():
    # Filter sections that match the section title pattern
    subsection_titles = [subsection.strip() for subsection in subsections if subsection_pattern.match(subsection)]
    
    # Add the chapter and its corresponding section titles to the new dictionary
    chapter_subsections[chapter] = subsection_titles
chapter_subsections

{'CHAPTER 1 INTRODUCTION': [],
 'CHAPTER 2 PRINCIPLES AND CONCEPTS': [],
 'CHAPTER 3 INTRODUCTION TO PROJECT RISK MANAGEMENT PROCESSES': [],
 'CHAPTER 4 PLAN RISK MANAGEMENT': ['4.2.1 Identify and Address Barriers to Successful Project Risk Management',
  '4.2.2 Involve Project Stakeholders in Project Risk Management',
  '4.2.3 Comply with the Organizations Objectives, Policies, and Practices',
  '4.3.1 Planning Sessions',
  '4.3.2 Templates'],
 'CHAPTER 5 IDENTIFY RISKS': ['5.2.1 Early Identification',
  '5.2.2 Iterative Identification',
  '5.2.3 Emergent Identification',
  '5.2.4 Comprehensive Identification',
  '5.2.5 Explicit Identification of Opportunities',
  '5.2.6 Multiple Perspectives',
  '5.2.7 Risks Linked to Project Objectives',
  '5.2.8 Complete Risk Statement',
  '5.2.9 Ownership and Level of Detail',
  '5.2.10 Objectivity',
  '5.3.1 Historical Review',
  '5.3.2 Current Assessments',
  '5.3.3 Creativity Techniques'],
 'CHAPTER 6 PERFORM QUALITATIVE RISK ANALYSIS': ['6.2.1

### Segment section into subsections

In [32]:
import pandas as pd
import re

def segment_sections_into_subsections(updated_clean_pmi):
    # Initialize an empty list to store data for the new DataFrame
    data_for_df = []

    # Initialize variables to keep track of current chapter and section title
    current_chapter = None
    current_section_title = None
    current_section = None

    # Iterate through each row in the DataFrame
    for index, row in updated_clean_pmi.iterrows():
        chapter = row['Chapter']
        section_title = row['Section_title']
        section = row['Section']

        # Check if the section contains the specified pattern
        matches = list(re.finditer(r'\s([0-9]\.[0-9]\.[0-9]|[A-Z]\.[0-9]\.[0-9])\s', section))

        # Check if there are any matches
        if matches:
            # Iterate through all matches in the section
            for match in matches:
                # Find the index of the current title matching the pattern
                title_index = match.start()

                # Check if the title is preceded by the word "sections"
                if title_index > 8 and (section[title_index - 8:title_index].lower() == 'sections' or section[title_index - 8:title_index].lower() == 'through'):
                    continue

                # Extract the part of the section after the current title
                section_after_title = section[title_index:]

                # Add backslashes to subsection numbers
                updated_section = re.sub(r'\s([0-9]\.[0-9]\.[0-9]+|[A-Z]\.[0-9]\.[0-9]+)\s', r' \\ \1 ', section_after_title)

                # Split the section into subsections based on backslashes
                subsections = updated_section.split(' \\ ')

                # Remove empty subsections
                subsections = [subsection.strip() for subsection in subsections if subsection.strip()]

                # If there are subsections, update current_chapter and current_section_title
                if subsections:
                    current_chapter = chapter
                    current_section_title = section_title

                # Append data for the new DataFrame for each subsection
                for subsection in subsections:
                    data_for_df.append({'Chapter': current_chapter, 'Section_title': current_section_title, 'Section': section, 'Subsection': subsection})
        else:
            # If there are no matches, add the original section with an empty Subsection
            data_for_df.append({'Chapter': chapter, 'Section_title': section_title, 'Section': section, 'Subsection': ''})

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_for_df)

    return df


In [33]:
data_for_df = segment_sections_into_subsections(updated_chapters_data)
data_for_df

Unnamed: 0,Chapter,Section_title,Section,Subsection
0,CHAPTER 1 INTRODUCTION,,Project Management Institute PMI practice stan...,
1,CHAPTER 1 INTRODUCTION,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...,
2,CHAPTER 1 INTRODUCTION,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as ...",
3,CHAPTER 1 INTRODUCTION,1.3 Role of Project Risk Management in Project...,Project Risk Management is not an optional act...,
4,CHAPTER 1 INTRODUCTION,1.4 Good Risk Management Practice,Project Risk Management is a valuable componen...,
...,...,...,...,...
292,APPENDIX D,,"D.4 Techniques, Examples and Templates for Per...",D.4.1 Techniques for Perform Quantitative Risk...
293,APPENDIX D,,8.0 Primavera Pertmaster Figure D15. Example o...,7.3.8 from Oracle Hyperion Decisioneering
294,APPENDIX D,"D.5 Techniques, Examples, and Templates for Pl...",8 Plan Risk Responses develops the set of acti...,D.5.1 Techniques for Plan Risk Response Techni...
295,APPENDIX D,,"D.6 Techniques, Examples and Templates for Mon...",D.6.1 Techniques for Monitor and Control Risks...


In [34]:
# Create a new column 'section_title' in the DataFrame
data_for_df['Subsection_title'] = None

# Iterate through each row in the DataFrame
for index, row in data_for_df.iterrows():
    subsection_title = ''
    # Iterate through each chapter in the dictionary
    for chapter, subsection_titles in chapter_subsections.items():
        # Check if the row's section matches any title in the current chapter
        for subtitle in subsection_titles:
            if subtitle.strip().lower() in row['Subsection'].strip().lower():
                subsection_title = subtitle.strip()
                # Remove the matched title from the 'Section' column
                data_for_df.at[index, 'Subsection'] = row['Subsection'].replace(subtitle, '').strip()
                break
        if section_title:
            break
    
    # Assign the matched section title to the 'section_title' column
    data_for_df.at[index, 'Subsection_title'] = subsection_title

# Reorder the columns
data_for_df = data_for_df[['Chapter', 'Section_title', 'Section','Subsection_title','Subsection']]

# Print the updated DataFrame
data_for_df

Unnamed: 0,Chapter,Section_title,Section,Subsection_title,Subsection
0,CHAPTER 1 INTRODUCTION,,Project Management Institute PMI practice stan...,,
1,CHAPTER 1 INTRODUCTION,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...,,
2,CHAPTER 1 INTRODUCTION,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as ...",,
3,CHAPTER 1 INTRODUCTION,1.3 Role of Project Risk Management in Project...,Project Risk Management is not an optional act...,,
4,CHAPTER 1 INTRODUCTION,1.4 Good Risk Management Practice,Project Risk Management is a valuable componen...,,
...,...,...,...,...,...
292,APPENDIX D,,"D.4 Techniques, Examples and Templates for Per...",D.4.1 Techniques for Perform Quantitative Risk...,Examples of techniques for Perform Quantitativ...
293,APPENDIX D,,8.0 Primavera Pertmaster Figure D15. Example o...,,7.3.8 from Oracle Hyperion Decisioneering
294,APPENDIX D,"D.5 Techniques, Examples, and Templates for Pl...",8 Plan Risk Responses develops the set of acti...,D.5.1 Techniques for Plan Risk Response,Techniques for Plan Risk Responses are given i...
295,APPENDIX D,,"D.6 Techniques, Examples and Templates for Mon...",D.6.1 Techniques for Monitor and Control Risks...,Techniques for Monitor and Control Risks are g...


### Segment subsections into subsubsections

In [35]:
def segment_subsections_into_subsubsections(updated_clean_pmi):
    # Initialize an empty list to store data for the new DataFrame
    data_for_df = []

    # Iterate through each row in the DataFrame
    for index, row in updated_clean_pmi.iterrows():
        chapter = row['Chapter']
        section_title = row['Section_title']
        section = row['Section']
        subsection_title = row['Subsection_title']
        subsection = row['Subsection']

        # Check if the subsection contains the specified pattern
        matches = list(re.finditer(r'\s([0-9]\.[0-9]\.[0-9]\.[0-9]|[A-Z]\.[0-9]\.[0-9]\.[0-9])\s', subsection))

        # Check if there are any matches
        if matches:
            # Iterate through all matches in the subsection
            for match in matches:
                # Find the index of the current title matching the pattern
                title_index = match.start()
                
                # Check if the title is preceded by certainn
                if title_index > 2 and (subsection[title_index - 2:title_index].lower() == 'and'
                                        or subsection[title_index - 2:title_index].lower() == 'in'  
                                        or subsection[title_index - 2:title_index].lower() == 'through' 
                                        or subsection[title_index - 2:title_index].lower() == 'sections'):
                    continue


                # Extract the part of the subsection after the current title
                subsection_after_title = subsection[title_index:]

                # Add backslashes to subsubsection numbers
                updated_subsection = re.sub(r'\s([0-9]\.[0-9]\.[0-9]\.[0-9]|[A-Z]\.[0-9]\.[0-9]\.[0-9])\s', r' \\ \1 ', subsection_after_title)

                # Split the subsection into subsubsections based on backslashes
                subsubsections = updated_subsection.split(' \\ ')

                # Remove empty subsubsections
                subsubsections = [subsubsection.strip() for subsubsection in subsubsections if subsubsection.strip()]
                # Remove the extracted subsections from the original section
                subsection = subsection.replace(subsection_after_title, '')

                # Append data for the new DataFrame for each subsubsection
                if subsubsections:
                    for subsubsection in subsubsections:
                        data_for_df.append({'Chapter': chapter, 'Section_title': section_title, 'Section': section,'Subsection_title': subsection_title, 'Subsection': subsection, 'Subsubsection':subsubsection})
                else:
                    # If there are no subsubsections, add the original subsection with an empty Subsubsection
                    data_for_df.append({'Chapter': chapter, 'Section_title': section_title, 'Section': section,'Subsection_title': subsection_title, 'Subsection': subsection, 'Subsubsection':''})
        else:
            # If there are no matches, add the original subsection with an empty Subsubsection
            data_for_df.append({'Chapter': chapter, 'Section_title': section_title, 'Section': section,'Subsection_title': subsection_title, 'Subsection': subsection, 'Subsubsection':''})

    # Create a DataFrame from the list of dictionaries
    df = pd.DataFrame(data_for_df)

    return df


In [36]:
updated_data_for_df = segment_subsections_into_subsubsections(data_for_df)
updated_data_for_df.head(2)

Unnamed: 0,Chapter,Section_title,Section,Subsection_title,Subsection,Subsubsection
0,CHAPTER 1 INTRODUCTION,,Project Management Institute PMI practice stan...,,,
1,CHAPTER 1 INTRODUCTION,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...,,,


In [37]:
def process_dataframe(input_df):
    
    # Remove duplicate rows
    
    new_df = pd.DataFrame(columns=['Title','Description'])
    input_df = input_df.drop_duplicates()
    
    # Create a dictionary to store the mapping of chapters to titles
    chapter_to_title = {}
    section_title_seen = set()
    subsection_title_seen = set()

    # Iterate through the DataFrame
    for index, row in input_df.iterrows():
        chapter = row['Chapter']
        section = row['Section']
        subsection = row['Subsection']
        subsubsection = row['Subsubsection']
        section_title = row['Section_title']
        subsection_title = row['Subsection_title']
        subsubsection_title = row['Subsubsection']
        

        # Check if the chapter has already been encountered
        if chapter in chapter_to_title:
            
            if section_title in section_title_seen:
               
                if subsection_title in subsection_title_seen:
                    title = subsubsection_title
                    description=subsubsection
                    
                elif subsection_title not in subsection_title_seen:
                    title = subsection_title
                    subsection_title_seen.add(subsection_title)
                    description=subsection
                    
            elif section_title not in section_title_seen:
                title = section_title
                section_title_seen.add(section_title)
                description=section
        else:
            # If not, use the Chapter as the Title and update the mapping
            title = chapter
            chapter_to_title[chapter] = title
            description=section

        # Add the Title to the new DataFrame
        new_df = new_df.append({'Title': title,'Description':description}, ignore_index=True)

    return new_df


In [38]:
df = process_dataframe(updated_data_for_df)
df.head(3)

Unnamed: 0,Title,Description
0,CHAPTER 1 INTRODUCTION,Project Management Institute PMI practice stan...
1,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as ..."


### Extract definitions

In [39]:
def extract_definitions(df):
    for index, row in df.iterrows():
        if 'Definition' in row['Title']:
            df.at[index, 'Definition'] = row['Description']
        else:
            df.at[index, 'Definition'] = '' 
    return df

In [40]:
df_after_def_extraction=extract_definitions(df)
df_after_def_extraction.head(3)

Unnamed: 0,Title,Description,Definition
0,CHAPTER 1 INTRODUCTION,Project Management Institute PMI practice stan...,
1,1.1 Purpose of the Practice Standard for Proje...,The purpose of the Practice Standard for Proje...,
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as ...","The definition of Project Risk Management, as ..."


### Extracting Concepts with Part-of-Speech Tagging

In [41]:
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [42]:
import pandas as pd
import nltk
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk import pos_tag
import re

# Function to perform POS tagging on a sentence
def pos_tagging(sentence):
    # Tokenize the words in the sentence
    words = word_tokenize(sentence)
    
    # Apply POS tagging to the tokenized words
    tagged_words = pos_tag(words)
    
    return tagged_words

# Apply sent_tokenize to the "Description" column to get sentences
df_after_def_extraction['Words_Tags'] = df_after_def_extraction['Description'].apply(lambda x: sent_tokenize(x))

# Apply POS tagging to each sentence in the "Words_Tags" column
df_after_def_extraction['Words_Tags'] = df_after_def_extraction['Words_Tags'].apply(lambda x: [pos_tagging(sentence) for sentence in x])

def filter_verbs_nouns(tagged_words):
    verbs_nouns = [word for word, tag in tagged_words if tag in ['VB','VBD','VBZ','VBP','NNS','NNP','NNPS','NN']]
    return ' '.join(verbs_nouns)

# Filter verbs (VB) and nouns (NN) in each sentence
df_after_def_extraction['Sentences'] = df_after_def_extraction['Words_Tags'].apply(lambda x: [filter_verbs_nouns(tagged_words) for tagged_words in x])

# Function to extract nouns from a list of words and POS tags
def extract_nouns(words_tags):
    nouns = [word for word, tag in words_tags if tag.startswith('N')]
    return nouns

# Apply the function to each element in the 'Words_Tags' column and store the results in a new column 'Concept'
df_after_def_extraction['Concepts'] = df_after_def_extraction['Words_Tags'].apply(lambda x: [extract_nouns(sent) for sent in x])

pd.set_option('display.max_colwidth', None)
# Display the resulting DataFrame
df_after_def_extraction.head(3)


Unnamed: 0,Title,Description,Definition,Words_Tags,Sentences,Concepts
0,CHAPTER 1 INTRODUCTION,"Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.",,"[[(Project, NNP), (Management, NNP), (Institute, NNP), (PMI, NNP), (practice, NN), (standards, NNS), (are, VBP), (guides, NNS), (to, TO), (the, DT), (use, NN), (of, IN), (a, DT), (tool, NN), (,, ,), (technique, NN), (,, ,), (or, CC), (process, NN), (identified, VBN), (in, IN), (A, NNP), (Guide, NNP), (to, TO), (the, DT), (Project, NNP), (Management, NNP), (Body, NNP), (of, IN), (Knowledge, NNP), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (or, CC), (other, JJ), (PMI, NNP), (standards, NNS), (., .)], [(Practice, NN), (standards, NNS), (are, VBP), (targeted, VBN), (at, IN), (audiences, NNS), (who, WP), (participate, VBP), (in, IN), (the, DT), (management, NN), (of, IN), (projects, NNS), (., .)], [(This, DT), (includes, VBZ), (project, NN), (managers, NNS), (,, ,), (project, NN), (personnel, NNS), (,, ,), (contract, NN), (personnel, NNS), (,, ,), (supervisors, NNS), (,, ,), (and, CC), (other, JJ), (project, NN), (stakeholders, NNS), (., .)], [(A, DT), (PMI, NNP), (practice, NN), (standard, NN), (describes, VBZ), (processes, NNS), (,, ,), (activities, NNS), (,, ,), (inputs, NNS), (,, ,), (and, CC), (outputs, NNS), (for, IN), (a, DT), (specific, JJ), (Knowledge, NNP), (Area, NNP), (., .)], [(It, PRP), (provides, VBZ), (information, NN), (on, IN), (what, WP), (the, DT), (significant, JJ), (process, NN), (,, ,), (tool, NN), (,, ,), (or, CC), (technique, NN), (is, VBZ), (,, ,), (what, WP), (it, PRP), (does, VBZ), (,, ,), (why, WRB), (it, PRP), (is, VBZ), (significant, JJ), (,, ,), (when, WRB), (it, PRP), (should, MD), (be, VB), (performed, VBN), (or, CC), (executed, VBN), (,, ,), (and, CC), (,, ,), (if, IN), (necessary, JJ), (for, IN), (further, JJ), (clarification, NN), (,, ,), (who, WP), (should, MD), (perform, VB), (the, DT), (process, NN), (., .)], [(A, DT), (practice, NN), (standard, NN), (does, VBZ), (not, RB), (prescribe, VB), (how, WRB), (the, DT), (process, NN), (is, VBZ), (to, TO), (be, VB), (implemented, VBN), (,, ,), (leaving, VBG), (that, IN), (subject, NN), (for, IN), (other, JJ), (forums, NNS), (such, JJ), (as, IN), (handbooks, NNS), (,, ,), (manuals, NNS), (,, ,), (and, CC), (courses, NNS), (., .)]]","[Project Management Institute PMI practice standards are guides use tool technique process A Guide Project Management Body Knowledge PMBOK Guide Fourth Edition PMI standards, Practice standards are audiences participate management projects, includes project managers project personnel contract personnel supervisors project stakeholders, PMI practice standard describes processes activities inputs outputs Knowledge Area, provides information process tool technique is does is be clarification perform process, practice standard does prescribe process is be subject forums handbooks manuals courses]","[[Project, Management, Institute, PMI, practice, standards, guides, use, tool, technique, process, A, Guide, Project, Management, Body, Knowledge, PMBOK, Guide, Fourth, Edition, PMI, standards], [Practice, standards, audiences, management, projects], [project, managers, project, personnel, contract, personnel, supervisors, project, stakeholders], [PMI, practice, standard, processes, activities, inputs, outputs, Knowledge, Area], [information, process, tool, technique, clarification, process], [practice, standard, process, subject, forums, handbooks, manuals, courses]]"
1,1.1 Purpose of the Practice Standard for Project Risk Management,"The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.",,"[[(The, DT), (purpose, NN), (of, IN), (the, DT), (Practice, NNP), (Standard, NNP), (for, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (is, VBZ), (to, TO), (a, DT), (provide, NN), (a, DT), (standard, NN), (for, IN), (project, NN), (management, NN), (practitioners, NNS), (and, CC), (other, JJ), (stakeholders, NNS), (that, WDT), (defines, VBZ), (the, DT), (aspects, NNS), (of, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (that, WDT), (are, VBP), (recognized, VBN), (as, IN), (good, JJ), (practice, NN), (on, IN), (most, JJS), (projects, NNS), (most, RBS), (of, IN), (the, DT), (time, NN), (and, CC), (b, VB), (provide, VB), (a, DT), (standard, NN), (that, WDT), (is, VBZ), (globally, RB), (applicable, JJ), (and, CC), (consistently, RB), (applied, VBN), (., .)], [(This, DT), (practice, NN), (standard, NN), (has, VBZ), (a, DT), (descriptive, JJ), (purpose, NN), (rather, RB), (than, IN), (one, CD), (used, VBN), (for, IN), (training, NN), (or, CC), (educational, JJ), (purposes, NNS), (., .)], [(The, DT), (Practice, NNP), (Standard, NNP), (for, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (covers, VBZ), (risk, NN), (management, NN), (as, IN), (it, PRP), (is, VBZ), (applied, VBN), (to, TO), (single, JJ), (projects, NNS), (only, RB), (., .)], [(Like, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (,, ,), (this, DT), (practice, NN), (standard, NN), (does, VBZ), (not, RB), (cover, VB), (risk, NN), (in, IN), (programs, NNS), (or, CC), (portfolios, NNS), (of, IN), (projects, NNS), (., .)], [(Chapter, NN), (11, CD), (of, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (,, ,), (is, VBZ), (the, DT), (basis, NN), (for, IN), (the, DT), (Practice, NNP), (Standard, NNP), (for, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (., .)], [(This, DT), (practice, NN), (standard, NN), (is, VBZ), (consistent, JJ), (with, IN), (that, DT), (chapter, NN), (,, ,), (emphasizing, VBG), (the, DT), (concepts, NNS), (and, CC), (principles, NNS), (relating, VBG), (to, TO), (Project, NNP), (Risk, NNP), (Management, NNP), (., .)], [(It, PRP), (is, VBZ), (aligned, VBN), (with, IN), (other, JJ), (PMI, NNP), (practice, NN), (standards, NNS), (., .)], [(Figure, NN), (11, CD), (compares, VBZ), (the, DT), (purposes, NNS), (of, IN), (this, DT), (practice, NN), (standard, NN), (to, TO), (those, DT), (of, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (and, CC), (textbooks, NNS), (,, ,), (handbooks, NNS), (,, ,), (and, CC), (courses, NNS), (., .)], [(Figure, NN), (11, CD), (., .)], [(Hierarchy, NNP), (of, IN), (PMI, NNP), (Project, NNP), (Risk, NNP), (Management, NNP), (Resources, NNPS), (This, DT), (practice, NN), (standard, NN), (is, VBZ), (organized, VBN), (in, IN), (three, CD), (main, JJ), (sections, NNS), (1, CD), (., .)], [(Introductory, NNP), (material, NN), (including, VBG), (the, DT), (framework, NN), (,, ,), (purpose, NN), (,, ,), (principles, NNS), (,, ,), (context, NN), (of, IN), (,, ,), (and, CC), (introduction, NN), (to, TO), (Project, VB), (Risk, NNP), (Management, NNP), (processes, VBZ), (as, RB), (defined, VBN), (in, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (., .)], [(2, CD), (., .)], [(Principles, NNS), (underlying, VBG), (the, DT), (six, CD), (Project, NNP), (Risk, NNP), (Management, NNP), (processes, VBZ), (in, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (., .)], [(The, DT), (six, CD), (processes, NNS), (are, VBP), (as, IN), (follows, VBZ), (Plan, NNP), (Risk, NNP), (Management, NNP), (,, ,), (Identify, NNP), (Risks, NNP), (,, ,), (Perform, NNP), (Qualitative, NNP), (Risk, NNP), (Analysis, NNP), (,, ,), (Perform, NNP), (Quantitative, NNP), (Risk, NNP), (Analysis, NNP), (,, ,), (Plan, NNP), (Risk, NNP), (Responses, NNP), (,, ,), (and, CC), (Monitor, NNP), (and, CC), (Control, NNP), (Risks, NNP), (., .)], [(Each, DT), (of, IN), (these, DT), (six, CD), (processes, NNS), (is, VBZ), (described, VBN), (in, IN), (a, DT), (chapter, NN), (that, WDT), (addresses, VBZ), (the, DT), (following, JJ), (four, CD), (topics, NNS), (a, DT), (purpose, NN), (and, CC), (objectives, NNS), (of, IN), (the, DT), (process, NN), (b, SYM), (critical, JJ), (success, NN), (factors, NNS), (for, IN), (the, DT), (process, NN), (c, NN), (tools, NNS), (and, CC), (techniques, NNS), (for, IN), (the, DT), (process, NN), (and, CC), (d, NN), (documenting, VBG), (the, DT), (results, NNS), (of, IN), (the, DT), (process, NN), (., .)], [(3, CD), (., .)], [(A, DT), (glossary, NN), (of, IN), (terms, NNS), (which, WDT), (are, VBP), (used, VBN), (in, IN), (this, DT), (practice, NN), (standard, NN), (., .)], [(This, DT), (practice, NN), (standard, NN), (emphasizes, VBZ), (those, DT), (principles, NNS), (that, WDT), (are, VBP), (fundamental, JJ), (to, TO), (effective, JJ), (,, ,), (comprehensive, JJ), (,, ,), (and, CC), (successful, JJ), (Project, NNP), (Risk, NNP), (Management, NNP), (., .)], [(These, DT), (principles, NNS), (can, MD), (and, CC), (should, MD), (be, VB), (stated, VBN), (at, IN), (a, DT), (general, JJ), (level, NN), (for, IN), (several, JJ), (reasons, NNS), (1, CD), (., .)], [(Principles, NNS), (are, VBP), (expected, VBN), (to, TO), (be, VB), (agreed, VBN), (upon, IN), (now, RB), (and, CC), (to, TO), (be, VB), (valid, JJ), (in, IN), (the, DT), (future, NN), (., .)], [(While, IN), (tools, NNS), (and, CC), (techniques, NNS), (are, VBP), (constantly, RB), (evolving, VBG), (,, ,), (the, DT), (principles, NNS), (have, VBP), (more, JJR), (stability, NN), (and, CC), (persistence, NN), (., .)], [(2, CD), (., .)], [(Different, JJ), (projects, NNS), (,, ,), (organizations, NNS), (,, ,), (and, CC), (situations, NNS), (will, MD), (require, VB), (different, JJ), (approaches, NNS), (to, TO), (Project, VB), (Risk, NNP), (Management, NNP), (., .)], [(In, IN), (particular, JJ), (,, ,), (risk, JJ), (management, NN), (is, VBZ), (a, DT), (discipline, NN), (that, WDT), (contains, VBZ), (a, DT), (series, NN), (of, IN), (processes, NNS), (to, TO), (apply, VB), (to, TO), (both, DT), (large, JJ), (and, CC), (small, JJ), (projects, NNS), (., .)], [(Risk, NNP), (management, NN), (will, MD), (be, VB), (more, RBR), (effective, JJ), (if, IN), (its, PRP$), (practice, NN), (is, VBZ), (tailored, VBN), (to, TO), (the, DT), (project, NN), (and, CC), (congruent, NN), (with, IN), (the, DT), (organizational, JJ), (culture, NN), (,, ,), (processes, NNS), (and, CC), (assets, NNS), (., .)], [(There, EX), (are, VBP), (many, JJ), (different, JJ), (ways, NNS), (of, IN), (conducting, VBG), (risk, NN), (management, NN), (that, WDT), (may, MD), (comply, VB), (with, IN), (the, DT), (principles, NNS), (of, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (as, IN), (presented, VBN), (in, IN), (this, DT), (practice, NN), (standard, NN), (., .)], [(3, CD), (., .)], [(The, DT), (principles, NNS), (are, VBP), (applicable, JJ), (to, TO), (projects, NNS), (carried, VBN), (out, RP), (in, IN), (a, DT), (global, JJ), (context, NN), (,, ,), (re, NN), (ecting, VBG), (the, DT), (many, JJ), (business, NN), (and, CC), (organizational, JJ), (arrangements, NNS), (between, IN), (participants, NNS), (,, ,), (for, IN), (example, NN), (,, ,), (joint, JJ), (ventures, NNS), (between, IN), (commercial, JJ), (and, CC), (national, JJ), (companies, NNS), (,, ,), (government, NN), (and, CC), (nongovernment, JJ), (organizations, NNS), (,, ,), (and, CC), (the, DT), (crosscultural, JJ), (environment, NN), (often, RB), (found, VBN), (on, IN), (these, DT), (project, NN), (teams, NNS), (., .)], [(The, DT), (principles, NNS), (described, VBD), (herein, NNS), (can, MD), (be, VB), (used, VBN), (as, IN), (a, DT), (check, NN), (for, IN), (an, DT), (organizations, NNS), (processes, NNS), (., .)], [(Practitioners, NNS), (can, MD), (establish, VB), (processes, NNS), (specific, JJ), (to, TO), (their, PRP$), (particular, JJ), (situation, NN), (,, ,), (project, NN), (,, ,), (or, CC), (organization, NN), (and, CC), (then, RB), (compare, VB), (them, PRP), (with, IN), (these, DT), (principles, NNS), (,, ,), (thus, RB), (validating, VBG), (them, PRP), (against, IN), (good, JJ), (Project, NN), (Risk, NNP), (Management, NNP), (practice, NN), (., .)]]","[purpose Practice Standard Project Risk Management is provide standard project management practitioners stakeholders defines aspects Project Risk Management are practice projects time b provide standard is, practice standard has purpose training purposes, Practice Standard Project Risk Management covers risk management is projects, PMBOK Guide Fourth Edition practice standard does cover risk programs portfolios projects, Chapter PMBOK Guide Fourth Edition is basis Practice Standard Project Risk Management, practice standard is chapter concepts principles Project Risk Management, is PMI practice standards, Figure compares purposes practice standard PMBOK Guide Fourth Edition textbooks handbooks courses, Figure, Hierarchy PMI Project Risk Management Resources practice standard is sections, Introductory material framework purpose principles context introduction Project Risk Management processes PMBOK Guide Fourth Edition, , Principles Project Risk Management processes PMBOK Guide Fourth Edition, processes are follows Plan Risk Management Identify Risks Perform Qualitative Risk Analysis Perform Quantitative Risk Analysis Plan Risk Responses Monitor Control Risks, processes is chapter addresses topics purpose objectives process success factors process c tools techniques process d results process, , glossary terms are practice standard, practice standard emphasizes principles are Project Risk Management, principles be level reasons, Principles are be be future, tools techniques are principles have stability persistence, , projects organizations situations require approaches Project Risk Management, management is discipline contains series processes apply projects, Risk management be practice is project congruent culture processes assets, are ways risk management comply principles Project Risk Management practice standard, , principles are projects context re business arrangements participants example ventures companies government organizations environment project teams, principles described herein be check organizations processes, Practitioners establish processes situation project organization compare principles Project Risk Management practice]","[[purpose, Practice, Standard, Project, Risk, Management, provide, standard, project, management, practitioners, stakeholders, aspects, Project, Risk, Management, practice, projects, time, standard], [practice, standard, purpose, training, purposes], [Practice, Standard, Project, Risk, Management, risk, management, projects], [PMBOK, Guide, Fourth, Edition, practice, standard, risk, programs, portfolios, projects], [Chapter, PMBOK, Guide, Fourth, Edition, basis, Practice, Standard, Project, Risk, Management], [practice, standard, chapter, concepts, principles, Project, Risk, Management], [PMI, practice, standards], [Figure, purposes, practice, standard, PMBOK, Guide, Fourth, Edition, textbooks, handbooks, courses], [Figure], [Hierarchy, PMI, Project, Risk, Management, Resources, practice, standard, sections], [Introductory, material, framework, purpose, principles, context, introduction, Risk, Management, PMBOK, Guide, Fourth, Edition], [], [Principles, Project, Risk, Management, PMBOK, Guide, Fourth, Edition], [processes, Plan, Risk, Management, Identify, Risks, Perform, Qualitative, Risk, Analysis, Perform, Quantitative, Risk, Analysis, Plan, Risk, Responses, Monitor, Control, Risks], [processes, chapter, topics, purpose, objectives, process, success, factors, process, c, tools, techniques, process, d, results, process], [], [glossary, terms, practice, standard], [practice, standard, principles, Project, Risk, Management], [principles, level, reasons], [Principles, future], [tools, techniques, principles, stability, persistence], [], [projects, organizations, situations, approaches, Risk, Management], [management, discipline, series, processes, projects], [Risk, management, practice, project, congruent, culture, processes, assets], [ways, risk, management, principles, Project, Risk, Management, practice, standard], [], [principles, projects, context, re, business, arrangements, participants, example, ventures, companies, government, organizations, environment, project, teams], [principles, herein, check, organizations, processes], [Practitioners, processes, situation, project, organization, principles, Project, Risk, Management, practice]]"
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[[(The, DT), (definition, NN), (of, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (,, ,), (as, IN), (defined, VBN), (in, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (,, ,), (is, VBZ), (the, DT), (basis, NN), (for, IN), (this, DT), (practice, NN), (standard, NN), (Project, NNP), (Risk, NNP), (Management, NNP), (includes, VBZ), (the, DT), (processes, NNS), (concerned, VBN), (with, IN), (conducting, VBG), (risk, NN), (management, NN), (planning, NN), (,, ,), (identification, NN), (,, ,), (analysis, NN), (,, ,), (responses, NNS), (,, ,), (and, CC), (monitoring, NN), (and, CC), (control, NN), (on, IN), (a, DT), (project, NN), (., .)], [(The, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (also, RB), (states, VBZ), (The, DT), (objectives, NNS), (of, IN), (Project, NNP), (Risk, NNP), (Management, NNP), (are, VBP), (to, TO), (increase, VB), (the, DT), (probability, NN), (and, CC), (impact, NN), (of, IN), (positive, JJ), (events, NNS), (,, ,), (and, CC), (decrease, VB), (the, DT), (probability, NN), (and, CC), (impact, NN), (of, IN), (negative, JJ), (events, NNS), (in, IN), (the, DT), (project, NN), (., .)], [(In, IN), (the, DT), (PMBOK, NNP), (Guide, NNP), (Fourth, NNP), (Edition, NNP), (,, ,), (project, NN), (risk, NN), (is, VBZ), (an, DT), (uncertain, JJ), (event, NN), (or, CC), (condition, NN), (that, IN), (,, ,), (if, IN), (it, PRP), (occurs, VBZ), (,, ,), (has, VBZ), (a, DT), (positive, JJ), (or, CC), (negative, JJ), (effect, NN), (on, IN), (a, DT), (projects, NNS), (objectives, NNS), (., .)], [(Project, NN), (objectives, NNS), (include, VBP), (scope, NN), (,, ,), (schedule, NN), (,, ,), (cost, NN), (,, ,), (and, CC), (quality, NN), (., .)], [(Project, NN), (Risk, NNP), (Management, NNP), (aims, VBZ), (to, TO), (identify, VB), (and, CC), (prioritize, VB), (risks, NNS), (in, IN), (advance, NN), (of, IN), (their, PRP$), (occurrence, NN), (,, ,), (and, CC), (provide, RB), (actionoriented, VBN), (information, NN), (to, TO), (project, NN), (managers, NNS), (., .)], [(This, DT), (orientation, NN), (requires, VBZ), (consideration, NN), (of, IN), (events, NNS), (that, WDT), (may, MD), (or, CC), (may, MD), (not, RB), (occur, VB), (and, CC), (are, VBP), (therefore, RB), (described, VBN), (in, IN), (terms, NNS), (of, IN), (likelihood, NN), (or, CC), (probability, NN), (of, IN), (occurrence, NN), (in, IN), (addition, NN), (to, TO), (other, JJ), (dimensions, NNS), (such, JJ), (as, IN), (their, PRP$), (impact, NN), (on, IN), (objectives, NNS), (., .)]]","[definition Project Risk Management PMBOK Guide Fourth Edition is basis practice standard Project Risk Management includes processes risk management planning identification analysis responses monitoring control project, PMBOK Guide Fourth Edition states objectives Project Risk Management are increase probability impact events decrease probability impact events project, PMBOK Guide Fourth Edition project risk is event condition occurs has effect projects objectives, Project objectives include scope schedule cost quality, Project Risk Management aims identify prioritize risks advance occurrence information project managers, orientation requires consideration events occur are terms likelihood probability occurrence addition dimensions impact objectives]","[[definition, Project, Risk, Management, PMBOK, Guide, Fourth, Edition, basis, practice, standard, Project, Risk, Management, processes, risk, management, planning, identification, analysis, responses, monitoring, control, project], [PMBOK, Guide, Fourth, Edition, objectives, Project, Risk, Management, probability, impact, events, probability, impact, events, project], [PMBOK, Guide, Fourth, Edition, project, risk, event, condition, effect, projects, objectives], [Project, objectives, scope, schedule, cost, quality], [Project, Risk, Management, risks, advance, occurrence, information, project, managers], [orientation, consideration, events, terms, likelihood, probability, occurrence, addition, dimensions, impact, objectives]]"


Enhanced POS Tagging Method for Compound Words

In [43]:
df = process_dataframe(updated_data_for_df)
df_after_def_extraction=extract_definitions(df)


In [44]:
import nltk
from nltk import sent_tokenize, word_tokenize, pos_tag
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def pos_tag_with_compound_words(sentence):
    # Tokenize the sentence into words
    words = word_tokenize(sentence)
    
    # Perform POS tagging using the pos_tag method from NLTK
    tagged_words = pos_tag(words)
    
    # Initialize a list to store words with their meanings
    words_with_meaning = []
    
    # Iterate through the tagged words
    i = 0
    while i < len(tagged_words):
        word, tag = tagged_words[i]
        
        # If the word is a noun (N) and the next one is also, treat them as a compound word
        if tag.startswith('N'):
            compound_word = word
            while i < len(tagged_words) - 1 and tagged_words[i + 1][1].startswith('N'):
                i += 1
                compound_word += " " + tagged_words[i][0]
            words_with_meaning.append((compound_word, 'NN'))
        else:
            words_with_meaning.append((word, tag))
        i += 1
    
    return words_with_meaning

# Apply the function to each sentence in the 'Section' column and store the results in a new column 'Words_Tags'
df_after_def_extraction['Words_Tags'] = df_after_def_extraction['Description'].apply(lambda x: [pos_tag_with_compound_words(sent) for sent in sent_tokenize(x)])

# Function to extract nouns (N) and verbs (V) from a list of words and POS tags
def extract_nouns_and_verbs(words_tags):
    nouns_and_verbs = [word for word, tag in words_tags if tag.startswith('N') or tag.startswith('V')]
    return ' '.join(nouns_and_verbs)

# Apply the function to each element in the 'Words_Tags' column and store the results in a new column 'Nouns_Verbs'
df_after_def_extraction['Sentences'] = df_after_def_extraction['Words_Tags'].apply(lambda x: '. '.join([extract_nouns_and_verbs(sent) for sent in x]))

# Function to extract nouns from a list of words and POS tags
def extract_nouns(words_tags):
    nouns = [word for word, tag in words_tags if tag.startswith('N')]
    return nouns

# Apply the function to each element in the 'Words_Tags' column and store the results in a new column 'Concept'
df_after_def_extraction['Concepts'] = df_after_def_extraction['Words_Tags'].apply(lambda x: [extract_nouns(sent) for sent in x])

df_after_def_extraction.head(3)


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Unnamed: 0,Title,Description,Definition,Words_Tags,Sentences,Concepts
0,CHAPTER 1 INTRODUCTION,"Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.",,"[[(Project Management Institute PMI practice standards, NN), (are, VBP), (guides, NN), (to, TO), (the, DT), (use, NN), (of, IN), (a, DT), (tool, NN), (,, ,), (technique, NN), (,, ,), (or, CC), (process, NN), (identified, VBN), (in, IN), (A Guide, NN), (to, TO), (the, DT), (Project Management Body, NN), (of, IN), (Knowledge PMBOK Guide Fourth Edition, NN), (or, CC), (other, JJ), (PMI standards, NN), (., .)], [(Practice standards, NN), (are, VBP), (targeted, VBN), (at, IN), (audiences, NN), (who, WP), (participate, VBP), (in, IN), (the, DT), (management, NN), (of, IN), (projects, NN), (., .)], [(This, DT), (includes, VBZ), (project managers, NN), (,, ,), (project personnel, NN), (,, ,), (contract personnel, NN), (,, ,), (supervisors, NN), (,, ,), (and, CC), (other, JJ), (project stakeholders, NN), (., .)], [(A, DT), (PMI practice standard, NN), (describes, VBZ), (processes, NN), (,, ,), (activities, NN), (,, ,), (inputs, NN), (,, ,), (and, CC), (outputs, NN), (for, IN), (a, DT), (specific, JJ), (Knowledge Area, NN), (., .)], [(It, PRP), (provides, VBZ), (information, NN), (on, IN), (what, WP), (the, DT), (significant, JJ), (process, NN), (,, ,), (tool, NN), (,, ,), (or, CC), (technique, NN), (is, VBZ), (,, ,), (what, WP), (it, PRP), (does, VBZ), (,, ,), (why, WRB), (it, PRP), (is, VBZ), (significant, JJ), (,, ,), (when, WRB), (it, PRP), (should, MD), (be, VB), (performed, VBN), (or, CC), (executed, VBN), (,, ,), (and, CC), (,, ,), (if, IN), (necessary, JJ), (for, IN), (further, JJ), (clarification, NN), (,, ,), (who, WP), (should, MD), (perform, VB), (the, DT), (process, NN), (., .)], [(A, DT), (practice standard, NN), (does, VBZ), (not, RB), (prescribe, VB), (how, WRB), (the, DT), (process, NN), (is, VBZ), (to, TO), (be, VB), (implemented, VBN), (,, ,), (leaving, VBG), (that, IN), (subject, NN), (for, IN), (other, JJ), (forums, NN), (such, JJ), (as, IN), (handbooks, NN), (,, ,), (manuals, NN), (,, ,), (and, CC), (courses, NN), (., .)]]",Project Management Institute PMI practice standards are guides use tool technique process identified A Guide Project Management Body Knowledge PMBOK Guide Fourth Edition PMI standards. Practice standards are targeted audiences participate management projects. includes project managers project personnel contract personnel supervisors project stakeholders. PMI practice standard describes processes activities inputs outputs Knowledge Area. provides information process tool technique is does is be performed executed clarification perform process. practice standard does prescribe process is be implemented leaving subject forums handbooks manuals courses,"[[Project Management Institute PMI practice standards, guides, use, tool, technique, process, A Guide, Project Management Body, Knowledge PMBOK Guide Fourth Edition, PMI standards], [Practice standards, audiences, management, projects], [project managers, project personnel, contract personnel, supervisors, project stakeholders], [PMI practice standard, processes, activities, inputs, outputs, Knowledge Area], [information, process, tool, technique, clarification, process], [practice standard, process, subject, forums, handbooks, manuals, courses]]"
1,1.1 Purpose of the Practice Standard for Project Risk Management,"The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.",,"[[(The, DT), (purpose, NN), (of, IN), (the, DT), (Practice Standard, NN), (for, IN), (Project Risk Management, NN), (is, VBZ), (to, TO), (a, DT), (provide, NN), (a, DT), (standard, NN), (for, IN), (project management practitioners, NN), (and, CC), (other, JJ), (stakeholders, NN), (that, WDT), (defines, VBZ), (the, DT), (aspects, NN), (of, IN), (Project Risk Management, NN), (that, WDT), (are, VBP), (recognized, VBN), (as, IN), (good, JJ), (practice, NN), (on, IN), (most, JJS), (projects, NN), (most, RBS), (of, IN), (the, DT), (time, NN), (and, CC), (b, VB), (provide, VB), (a, DT), (standard, NN), (that, WDT), (is, VBZ), (globally, RB), (applicable, JJ), (and, CC), (consistently, RB), (applied, VBN), (., .)], [(This, DT), (practice standard, NN), (has, VBZ), (a, DT), (descriptive, JJ), (purpose, NN), (rather, RB), (than, IN), (one, CD), (used, VBN), (for, IN), (training, NN), (or, CC), (educational, JJ), (purposes, NN), (., .)], [(The, DT), (Practice Standard, NN), (for, IN), (Project Risk Management, NN), (covers, VBZ), (risk management, NN), (as, IN), (it, PRP), (is, VBZ), (applied, VBN), (to, TO), (single, JJ), (projects, NN), (only, RB), (., .)], [(Like, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (,, ,), (this, DT), (practice standard, NN), (does, VBZ), (not, RB), (cover, VB), (risk, NN), (in, IN), (programs, NN), (or, CC), (portfolios, NN), (of, IN), (projects, NN), (., .)], [(Chapter, NN), (11, CD), (of, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (,, ,), (is, VBZ), (the, DT), (basis, NN), (for, IN), (the, DT), (Practice Standard, NN), (for, IN), (Project Risk Management, NN), (., .)], [(This, DT), (practice standard, NN), (is, VBZ), (consistent, JJ), (with, IN), (that, DT), (chapter, NN), (,, ,), (emphasizing, VBG), (the, DT), (concepts, NN), (and, CC), (principles, NN), (relating, VBG), (to, TO), (Project Risk Management, NN), (., .)], [(It, PRP), (is, VBZ), (aligned, VBN), (with, IN), (other, JJ), (PMI practice standards, NN), (., .)], [(Figure, NN), (11, CD), (compares, VBZ), (the, DT), (purposes, NN), (of, IN), (this, DT), (practice standard, NN), (to, TO), (those, DT), (of, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (and, CC), (textbooks, NN), (,, ,), (handbooks, NN), (,, ,), (and, CC), (courses, NN), (., .)], [(Figure, NN), (11, CD), (., .)], [(Hierarchy, NN), (of, IN), (PMI Project Risk Management Resources, NN), (This, DT), (practice standard, NN), (is, VBZ), (organized, VBN), (in, IN), (three, CD), (main, JJ), (sections, NN), (1, CD), (., .)], [(Introductory material, NN), (including, VBG), (the, DT), (framework, NN), (,, ,), (purpose, NN), (,, ,), (principles, NN), (,, ,), (context, NN), (of, IN), (,, ,), (and, CC), (introduction, NN), (to, TO), (Project, VB), (Risk Management, NN), (processes, VBZ), (as, RB), (defined, VBN), (in, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (., .)], [(2, CD), (., .)], [(Principles, NN), (underlying, VBG), (the, DT), (six, CD), (Project Risk Management, NN), (processes, VBZ), (in, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (., .)], [(The, DT), (six, CD), (processes, NN), (are, VBP), (as, IN), (follows, VBZ), (Plan Risk Management, NN), (,, ,), (Identify Risks, NN), (,, ,), (Perform Qualitative Risk Analysis, NN), (,, ,), (Perform Quantitative Risk Analysis, NN), (,, ,), (Plan Risk Responses, NN), (,, ,), (and, CC), (Monitor, NN), (and, CC), (Control Risks, NN), (., .)], [(Each, DT), (of, IN), (these, DT), (six, CD), (processes, NN), (is, VBZ), (described, VBN), (in, IN), (a, DT), (chapter, NN), (that, WDT), (addresses, VBZ), (the, DT), (following, JJ), (four, CD), (topics, NN), (a, DT), (purpose, NN), (and, CC), (objectives, NN), (of, IN), (the, DT), (process, NN), (b, SYM), (critical, JJ), (success factors, NN), (for, IN), (the, DT), (process c tools, NN), (and, CC), (techniques, NN), (for, IN), (the, DT), (process, NN), (and, CC), (d, NN), (documenting, VBG), (the, DT), (results, NN), (of, IN), (the, DT), (process, NN), (., .)], [(3, CD), (., .)], [(A, DT), (glossary, NN), (of, IN), (terms, NN), (which, WDT), (are, VBP), (used, VBN), (in, IN), (this, DT), (practice standard, NN), (., .)], [(This, DT), (practice standard, NN), (emphasizes, VBZ), (those, DT), (principles, NN), (that, WDT), (are, VBP), (fundamental, JJ), (to, TO), (effective, JJ), (,, ,), (comprehensive, JJ), (,, ,), (and, CC), (successful, JJ), (Project Risk Management, NN), (., .)], [(These, DT), (principles, NN), (can, MD), (and, CC), (should, MD), (be, VB), (stated, VBN), (at, IN), (a, DT), (general, JJ), (level, NN), (for, IN), (several, JJ), (reasons, NN), (1, CD), (., .)], [(Principles, NN), (are, VBP), (expected, VBN), (to, TO), (be, VB), (agreed, VBN), (upon, IN), (now, RB), (and, CC), (to, TO), (be, VB), (valid, JJ), (in, IN), (the, DT), (future, NN), (., .)], [(While, IN), (tools, NN), (and, CC), (techniques, NN), (are, VBP), (constantly, RB), (evolving, VBG), (,, ,), (the, DT), (principles, NN), (have, VBP), (more, JJR), (stability, NN), (and, CC), (persistence, NN), (., .)], [(2, CD), (., .)], [(Different, JJ), (projects, NN), (,, ,), (organizations, NN), (,, ,), (and, CC), (situations, NN), (will, MD), (require, VB), (different, JJ), (approaches, NN), (to, TO), (Project, VB), (Risk Management, NN), (., .)], [(In, IN), (particular, JJ), (,, ,), (risk, JJ), (management, NN), (is, VBZ), (a, DT), (discipline, NN), (that, WDT), (contains, VBZ), (a, DT), (series, NN), (of, IN), (processes, NN), (to, TO), (apply, VB), (to, TO), (both, DT), (large, JJ), (and, CC), (small, JJ), (projects, NN), (., .)], [(Risk management, NN), (will, MD), (be, VB), (more, RBR), (effective, JJ), (if, IN), (its, PRP$), (practice, NN), (is, VBZ), (tailored, VBN), (to, TO), (the, DT), (project, NN), (and, CC), (congruent, NN), (with, IN), (the, DT), (organizational, JJ), (culture, NN), (,, ,), (processes, NN), (and, CC), (assets, NN), (., .)], [(There, EX), (are, VBP), (many, JJ), (different, JJ), (ways, NN), (of, IN), (conducting, VBG), (risk management, NN), (that, WDT), (may, MD), (comply, VB), (with, IN), (the, DT), (principles, NN), (of, IN), (Project Risk Management, NN), (as, IN), (presented, VBN), (in, IN), (this, DT), (practice standard, NN), (., .)], [(3, CD), (., .)], [(The, DT), (principles, NN), (are, VBP), (applicable, JJ), (to, TO), (projects, NN), (carried, VBN), (out, RP), (in, IN), (a, DT), (global, JJ), (context, NN), (,, ,), (re, NN), (ecting, VBG), (the, DT), (many, JJ), (business, NN), (and, CC), (organizational, JJ), (arrangements, NN), (between, IN), (participants, NN), (,, ,), (for, IN), (example, NN), (,, ,), (joint, JJ), (ventures, NN), (between, IN), (commercial, JJ), (and, CC), (national, JJ), (companies, NN), (,, ,), (government, NN), (and, CC), (nongovernment, JJ), (organizations, NN), (,, ,), (and, CC), (the, DT), (crosscultural, JJ), (environment, NN), (often, RB), (found, VBN), (on, IN), (these, DT), (project teams, NN), (., .)], [(The, DT), (principles, NN), (described, VBD), (herein, NN), (can, MD), (be, VB), (used, VBN), (as, IN), (a, DT), (check, NN), (for, IN), (an, DT), (organizations processes, NN), (., .)], [(Practitioners, NN), (can, MD), (establish, VB), (processes, NN), (specific, JJ), (to, TO), (their, PRP$), (particular, JJ), (situation, NN), (,, ,), (project, NN), (,, ,), (or, CC), (organization, NN), (and, CC), (then, RB), (compare, VB), (them, PRP), (with, IN), (these, DT), (principles, NN), (,, ,), (thus, RB), (validating, VBG), (them, PRP), (against, IN), (good, JJ), (Project Risk Management practice, NN), (., .)]]",purpose Practice Standard Project Risk Management is provide standard project management practitioners stakeholders defines aspects Project Risk Management are recognized practice projects time b provide standard is applied. practice standard has purpose used training purposes. Practice Standard Project Risk Management covers risk management is applied projects. PMBOK Guide Fourth Edition practice standard does cover risk programs portfolios projects. Chapter PMBOK Guide Fourth Edition is basis Practice Standard Project Risk Management. practice standard is chapter emphasizing concepts principles relating Project Risk Management. is aligned PMI practice standards. Figure compares purposes practice standard PMBOK Guide Fourth Edition textbooks handbooks courses. Figure. Hierarchy PMI Project Risk Management Resources practice standard is organized sections. Introductory material including framework purpose principles context introduction Project Risk Management processes defined PMBOK Guide Fourth Edition. . Principles underlying Project Risk Management processes PMBOK Guide Fourth Edition. processes are follows Plan Risk Management Identify Risks Perform Qualitative Risk Analysis Perform Quantitative Risk Analysis Plan Risk Responses Monitor Control Risks. processes is described chapter addresses topics purpose objectives process success factors process c tools techniques process d documenting results process. . glossary terms are used practice standard. practice standard emphasizes principles are Project Risk Management. principles be stated level reasons. Principles are expected be agreed be future. tools techniques are evolving principles have stability persistence. . projects organizations situations require approaches Project Risk Management. management is discipline contains series processes apply projects. Risk management be practice is tailored project congruent culture processes assets. are ways conducting risk management comply principles Project Risk Management presented practice standard. . principles are projects carried context re ecting business arrangements participants example ventures companies government organizations environment found project teams. principles described herein be used check organizations processes. Practitioners establish processes situation project organization compare principles validating Project Risk Management practice,"[[purpose, Practice Standard, Project Risk Management, provide, standard, project management practitioners, stakeholders, aspects, Project Risk Management, practice, projects, time, standard], [practice standard, purpose, training, purposes], [Practice Standard, Project Risk Management, risk management, projects], [PMBOK Guide Fourth Edition, practice standard, risk, programs, portfolios, projects], [Chapter, PMBOK Guide Fourth Edition, basis, Practice Standard, Project Risk Management], [practice standard, chapter, concepts, principles, Project Risk Management], [PMI practice standards], [Figure, purposes, practice standard, PMBOK Guide Fourth Edition, textbooks, handbooks, courses], [Figure], [Hierarchy, PMI Project Risk Management Resources, practice standard, sections], [Introductory material, framework, purpose, principles, context, introduction, Risk Management, PMBOK Guide Fourth Edition], [], [Principles, Project Risk Management, PMBOK Guide Fourth Edition], [processes, Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, Monitor, Control Risks], [processes, chapter, topics, purpose, objectives, process, success factors, process c tools, techniques, process, d, results, process], [], [glossary, terms, practice standard], [practice standard, principles, Project Risk Management], [principles, level, reasons], [Principles, future], [tools, techniques, principles, stability, persistence], [], [projects, organizations, situations, approaches, Risk Management], [management, discipline, series, processes, projects], [Risk management, practice, project, congruent, culture, processes, assets], [ways, risk management, principles, Project Risk Management, practice standard], [], [principles, projects, context, re, business, arrangements, participants, example, ventures, companies, government, organizations, environment, project teams], [principles, herein, check, organizations processes], [Practitioners, processes, situation, project, organization, principles, Project Risk Management practice]]"
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[[(The, DT), (definition, NN), (of, IN), (Project Risk Management, NN), (,, ,), (as, IN), (defined, VBN), (in, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (,, ,), (is, VBZ), (the, DT), (basis, NN), (for, IN), (this, DT), (practice standard Project Risk Management, NN), (includes, VBZ), (the, DT), (processes, NN), (concerned, VBN), (with, IN), (conducting, VBG), (risk management planning, NN), (,, ,), (identification, NN), (,, ,), (analysis, NN), (,, ,), (responses, NN), (,, ,), (and, CC), (monitoring, NN), (and, CC), (control, NN), (on, IN), (a, DT), (project, NN), (., .)], [(The, DT), (PMBOK Guide Fourth Edition, NN), (also, RB), (states, VBZ), (The, DT), (objectives, NN), (of, IN), (Project Risk Management, NN), (are, VBP), (to, TO), (increase, VB), (the, DT), (probability, NN), (and, CC), (impact, NN), (of, IN), (positive, JJ), (events, NN), (,, ,), (and, CC), (decrease, VB), (the, DT), (probability, NN), (and, CC), (impact, NN), (of, IN), (negative, JJ), (events, NN), (in, IN), (the, DT), (project, NN), (., .)], [(In, IN), (the, DT), (PMBOK Guide Fourth Edition, NN), (,, ,), (project risk, NN), (is, VBZ), (an, DT), (uncertain, JJ), (event, NN), (or, CC), (condition, NN), (that, IN), (,, ,), (if, IN), (it, PRP), (occurs, VBZ), (,, ,), (has, VBZ), (a, DT), (positive, JJ), (or, CC), (negative, JJ), (effect, NN), (on, IN), (a, DT), (projects objectives, NN), (., .)], [(Project objectives, NN), (include, VBP), (scope, NN), (,, ,), (schedule, NN), (,, ,), (cost, NN), (,, ,), (and, CC), (quality, NN), (., .)], [(Project Risk Management, NN), (aims, VBZ), (to, TO), (identify, VB), (and, CC), (prioritize, VB), (risks, NN), (in, IN), (advance, NN), (of, IN), (their, PRP$), (occurrence, NN), (,, ,), (and, CC), (provide, RB), (actionoriented, VBN), (information, NN), (to, TO), (project managers, NN), (., .)], [(This, DT), (orientation, NN), (requires, VBZ), (consideration, NN), (of, IN), (events, NN), (that, WDT), (may, MD), (or, CC), (may, MD), (not, RB), (occur, VB), (and, CC), (are, VBP), (therefore, RB), (described, VBN), (in, IN), (terms, NN), (of, IN), (likelihood, NN), (or, CC), (probability, NN), (of, IN), (occurrence, NN), (in, IN), (addition, NN), (to, TO), (other, JJ), (dimensions, NN), (such, JJ), (as, IN), (their, PRP$), (impact, NN), (on, IN), (objectives, NN), (., .)]]",definition Project Risk Management defined PMBOK Guide Fourth Edition is basis practice standard Project Risk Management includes processes concerned conducting risk management planning identification analysis responses monitoring control project. PMBOK Guide Fourth Edition states objectives Project Risk Management are increase probability impact events decrease probability impact events project. PMBOK Guide Fourth Edition project risk is event condition occurs has effect projects objectives. Project objectives include scope schedule cost quality. Project Risk Management aims identify prioritize risks advance occurrence actionoriented information project managers. orientation requires consideration events occur are described terms likelihood probability occurrence addition dimensions impact objectives,"[[definition, Project Risk Management, PMBOK Guide Fourth Edition, basis, practice standard Project Risk Management, processes, risk management planning, identification, analysis, responses, monitoring, control, project], [PMBOK Guide Fourth Edition, objectives, Project Risk Management, probability, impact, events, probability, impact, events, project], [PMBOK Guide Fourth Edition, project risk, event, condition, effect, projects objectives], [Project objectives, scope, schedule, cost, quality], [Project Risk Management, risks, advance, occurrence, information, project managers], [orientation, consideration, events, terms, likelihood, probability, occurrence, addition, dimensions, impact, objectives]]"


Extracting Concepts with Named Entity Recognition Method in SpaCy

In [45]:
df = process_dataframe(updated_data_for_df)
df_after_def_extraction=extract_definitions(df)

In [46]:
# Named Entity Recognition
import pandas as pd
import spacy

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Split each paragraph into sentences
df_after_def_extraction['Sentences'] = df_after_def_extraction['Description'].apply(lambda x: [sent.text for sent in nlp(x).sents])

# Apply spaCy NER to each sentence and store the results in a new column
"""
This line applies Named Entity Recognition (NER) to each sentence in every paragraph.
For each sentence (sent) in the list of sentences (sentences), it extracts the text of each named entity (ent.text) and stores them in the 'Concepts' column.
"""
df_after_def_extraction['Concepts'] = df_after_def_extraction['Sentences'].apply(lambda sentences: [ent.text for sent in sentences for ent in nlp(sent).ents])

# Display the resulting DataFrame
pd.set_option('display.max_colwidth', None)
df_after_def_extraction.head(3)


Unnamed: 0,Title,Description,Definition,Sentences,Concepts
0,CHAPTER 1 INTRODUCTION,"Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.",,"[Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards., Practice standards are targeted at audiences who participate in the management of projects., This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders., A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area., It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process., A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.]","[Project Management Institute PMI, the Project Management Body of Knowledge PMBOK Guide Fourth Edition, PMI, PMI, Knowledge Area]"
1,1.1 Purpose of the Practice Standard for Project Risk Management,"The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.",,"[The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied., This practice standard has a descriptive purpose rather than one used for training or educational purposes., The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only., Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects., Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management ., This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management., It is aligned with other PMI practice standards., Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses., Figure 11., Hierarchy of PMI Project Risk Management Resources, This practice standard is organized in three main sections 1., Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition., 2., Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition., The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks., Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process., 3., A glossary of terms which are used in this practice standard., This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management., These principles can and should be stated at a general level for several reasons 1., Principles are expected to be agreed upon now and to be valid in the future., While tools and techniques are constantly evolving, the principles have more stability and persistence., 2. Different projects, organizations, and situations will require different approaches to Project Risk Management., In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects., Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets., There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard., 3., The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams., The principles described herein can be used as a check for an organizations processes., Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.]","[the Practice Standard for Project Risk Management, Project Risk Management, The Practice Standard for Project Risk Management, the PMBOK Guide Fourth Edition, Chapter 11, the Practice Standard for Project Risk Management, Project Risk Management, PMI, 11, 11, PMI Project Risk Management Resources, three, 1, Project Risk Management, 2, the six Project Risk Management, six, Plan Risk Management, Identify Risks, Perform Quantitative Risk Analysis, Plan Risk Responses, Monitor, Control Risks, six, four, 3, Project Risk Management, 1, 2, Project Risk Management, Project Risk Management, 3, Project Risk Management]"
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project., The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project., In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives., Project objectives include scope, schedule, cost, and quality., Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers., This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.]","[Project Risk Management, Project Risk Management, Project Risk Management, Project Risk Management]"


### Concepts Extraction with Topic Modeling

Latent Dirichlet Allocation (LDA) with CountVectorizer

In [47]:
df = process_dataframe(updated_data_for_df)
df_after_def_extraction=extract_definitions(df)

In [48]:
import pandas as pd
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation
from nltk.util import ngrams


# Function to tokenize a paragraph into sentences
def tokenize_paragraph(paragraph):
    sentences = nltk.sent_tokenize(paragraph)
    return sentences

# Function to extract n-grams (from 1 to 3 words) from tokenized sentences
def extract_ngrams(tokens):
    ngram_list = []
    for sentence_tokens in tokens:
        for n in range(1, 4):  # Extract n-grams from 1 to 3 words
            ngram_list.extend(list(ngrams(sentence_tokens, n)))
    return ngram_list

# Apply paragraph tokenization
df_after_def_extraction['Tokenized_Description'] = df_after_def_extraction['Description'].apply(tokenize_paragraph)

# Join all sentences of each paragraph into a single text
df_after_def_extraction['Section_text'] = df_after_def_extraction['Tokenized_Description'].apply(lambda x: ' '.join(x))

# Create a Count Vectorizer with n-grams
vectorizer = CountVectorizer(max_df=0.85, max_features=1000, stop_words='english', ngram_range=(1, 3))
X = vectorizer.fit_transform(df_after_def_extraction['Section_text'])

# Apply LDA for each section
num_topics = 10  # You can adjust the number of topics based on your needs

for index, row in df_after_def_extraction.iterrows():
    section_text = ' '.join(row['Tokenized_Description'])
    X_section = vectorizer.transform([section_text])
    
    lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
    lda.fit(X_section)
    
    feature_names = vectorizer.get_feature_names_out()
    top_keywords_idx = lda.components_[0].argsort()[:-10 - 1:-1]  # Top 10 keywords for this section
    top_keywords = [feature_names[i] for i in top_keywords_idx]
    
    # Check if 'Concepts' column exists, if not, create it
    if 'Concepts' not in df_after_def_extraction.columns:
        df_after_def_extraction['Concepts'] = ''
    
    df_after_def_extraction.at[index, 'Concepts'] = ', '.join(top_keywords)

# Display the DataFrame with the new 'Concepts' column
df_after_def_extraction.head(3)


  perword_bound = bound / word_cnt


Unnamed: 0,Title,Description,Definition,Tokenized_Description,Section_text,Concepts
0,CHAPTER 1 INTRODUCTION,"Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.",,"[Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards., Practice standards are targeted at audiences who participate in the management of projects., This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders., A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area., It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process., A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.]","Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.","project stakeholders, area, pmbok, use, stakeholders, guide fourth, guide fourth edition, leaving, executed, projects"
1,1.1 Purpose of the Practice Standard for Project Risk Management,"The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.",,"[The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied., This practice standard has a descriptive purpose rather than one used for training or educational purposes., The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only., Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects., Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management ., This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management., It is aligned with other PMI practice standards., Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses., Figure 11., Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1., Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition., 2., Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition., The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks., Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process., 3., A glossary of terms which are used in this practice standard., This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management., These principles can and should be stated at a general level for several reasons 1., Principles are expected to be agreed upon now and to be valid in the future., While tools and techniques are constantly evolving, the principles have more stability and persistence., 2., Different projects, organizations, and situations will require different approaches to Project Risk Management., In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects., Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets., There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard., 3., The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams., The principles described herein can be used as a check for an organizations processes., Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.]","The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.","level, require, culture, results, responses, approaches, terms, defined, defines, apply"
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project., The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project., In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives., Project objectives include scope, schedule, cost, and quality., Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers., This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.]","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","event condition occurs, analysis, identify, cost, basis, project objectives, occurs positive negative, occurs positive, occurs, practice"


Latent Dirichlet Allocation (LDA) with TF-IDF

In [49]:
df = process_dataframe(updated_data_for_df)
df_after_def_extraction=extract_definitions(df)

In [50]:
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation



# Function to tokenize a paragraph into sentences
def tokenize_paragraph(paragraph):
    sentences = nltk.sent_tokenize(paragraph)
    return sentences

# Function to extract n-grams (from 1 to 3 words) from tokenized sentences
def extract_ngrams(tokens):
    ngram_list = []
    for sentence_tokens in tokens:
        for n in range(1, 4):  # Extract n-grams from 1 to 3 words
            ngram_list.extend(list(ngrams(sentence_tokens, n)))
    return ngram_list

# Apply paragraph tokenization
df_after_def_extraction['Tokenized_Description'] = df_after_def_extraction['Description'].apply(tokenize_paragraph)

# Join all sentences of each paragraph into a single text
df_after_def_extraction['Description_text'] = df_after_def_extraction['Tokenized_Description'].apply(lambda x: ' '.join(x))

# Create a TF-IDF Vectorizer (TfidfVectorizer) with n-grams
vectorizer = TfidfVectorizer(max_df=0.85, max_features=1000, stop_words='english', ngram_range=(1, 3))
X = vectorizer.fit_transform(df_after_def_extraction['Description_text'])

# Apply LDA for each section
num_topics = 10  # You can adjust the number of topics based on your needs

concepts_list = []  

for index, row in df_after_def_extraction.iterrows():
    section_text = ' '.join(row['Tokenized_Description'])
    X_section = vectorizer.transform([section_text])
    
    lda = LatentDirichletAllocation(n_components=num_topics, random_state=42)
    lda.fit(X_section)
    
    feature_names = vectorizer.get_feature_names_out()
    top_keywords_idx = lda.components_[0].argsort()[:-10 - 1:-1]  # Top 10 keywords for this section
    top_keywords = [feature_names[i] for i in top_keywords_idx]
    
    concepts_list.append(top_keywords)

# Add the concepts list to your DataFrame
df_after_def_extraction['Concepts'] = concepts_list

# Display the DataFrame with the new 'Concepts' column
df_after_def_extraction.head(3)


  perword_bound = bound / word_cnt


Unnamed: 0,Title,Description,Definition,Tokenized_Description,Description_text,Concepts
0,CHAPTER 1 INTRODUCTION,"Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.",,"[Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards., Practice standards are targeted at audiences who participate in the management of projects., This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders., A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area., It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process., A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.]","Project Management Institute PMI practice standards are guides to the use of a tool, technique, or process identified in A Guide to the Project Management Body of Knowledge PMBOK Guide Fourth Edition or other PMI standards. Practice standards are targeted at audiences who participate in the management of projects. This includes project managers, project personnel, contract personnel, supervisors, and other project stakeholders. A PMI practice standard describes processes, activities, inputs, and outputs for a specific Knowledge Area. It provides information on what the significant process, tool, or technique is, what it does, why it is significant, when it should be performed or executed, and, if necessary for further clarification, who should perform the process. A practice standard does not prescribe how the process is to be implemented, leaving that subject for other forums such as handbooks, manuals, and courses.","[stakeholders, identified, projects, information, use, processes, specific, perform, activities, project stakeholders]"
1,1.1 Purpose of the Practice Standard for Project Risk Management,"The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.",,"[The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied., This practice standard has a descriptive purpose rather than one used for training or educational purposes., The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only., Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects., Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management ., This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management., It is aligned with other PMI practice standards., Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses., Figure 11., Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1., Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition., 2., Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition., The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks., Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process., 3., A glossary of terms which are used in this practice standard., This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management., These principles can and should be stated at a general level for several reasons 1., Principles are expected to be agreed upon now and to be valid in the future., While tools and techniques are constantly evolving, the principles have more stability and persistence., 2., Different projects, organizations, and situations will require different approaches to Project Risk Management., In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects., Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets., There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard., 3., The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams., The principles described herein can be used as a check for an organizations processes., Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.]","The purpose of the Practice Standard for Project Risk Management is to a provide a standard for project management practitioners and other stakeholders that defines the aspects of Project Risk Management that are recognized as good practice on most projects most of the time and b provide a standard that is globally applicable and consistently applied. This practice standard has a descriptive purpose rather than one used for training or educational purposes. The Practice Standard for Project Risk Management covers risk management as it is applied to single projects only. Like the PMBOK Guide Fourth Edition, this practice standard does not cover risk in programs or portfolios of projects. Chapter 11 of the PMBOK Guide Fourth Edition, is the basis for the Practice Standard for Project Risk Management . This practice standard is consistent with that chapter, emphasizing the concepts and principles relating to Project Risk Management. It is aligned with other PMI practice standards. Figure 11 compares the purposes of this practice standard to those of the PMBOK Guide Fourth Edition and textbooks, handbooks, and courses. Figure 11. Hierarchy of PMI Project Risk Management Resources This practice standard is organized in three main sections 1. Introductory material including the framework, purpose, principles, context of, and introduction to Project Risk Management processes as defined in the PMBOK Guide Fourth Edition. 2. Principles underlying the six Project Risk Management processes in the PMBOK Guide Fourth Edition. The six processes are as follows Plan Risk Management, Identify Risks, Perform Qualitative Risk Analysis, Perform Quantitative Risk Analysis, Plan Risk Responses, and Monitor and Control Risks. Each of these six processes is described in a chapter that addresses the following four topics a purpose and objectives of the process b critical success factors for the process c tools and techniques for the process and d documenting the results of the process. 3. A glossary of terms which are used in this practice standard. This practice standard emphasizes those principles that are fundamental to effective, comprehensive, and successful Project Risk Management. These principles can and should be stated at a general level for several reasons 1. Principles are expected to be agreed upon now and to be valid in the future. While tools and techniques are constantly evolving, the principles have more stability and persistence. 2. Different projects, organizations, and situations will require different approaches to Project Risk Management. In particular, risk management is a discipline that contains a series of processes to apply to both large and small projects. Risk management will be more effective if its practice is tailored to the project and congruent with the organizational culture, processes and assets. There are many different ways of conducting risk management that may comply with the principles of Project Risk Management as presented in this practice standard. 3. The principles are applicable to projects carried out in a global context, re ecting the many business and organizational arrangements between participants, for example, joint ventures between commercial and national companies, government and nongovernment organizations, and the crosscultural environment often found on these project teams. The principles described herein can be used as a check for an organizations processes. Practitioners can establish processes specific to their particular situation, project, or organization and then compare them with these principles, thus validating them against good Project Risk Management practice.","[responses, time, objectives, stakeholders, identify, example, project management, level, identify risks, success]"
2,1.2 Project Risk Management Definition,"The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project., The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project., In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives., Project objectives include scope, schedule, cost, and quality., Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers., This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.]","The definition of Project Risk Management, as defined in the PMBOK Guide Fourth Edition, is the basis for this practice standard Project Risk Management includes the processes concerned with conducting risk management planning, identification, analysis, responses, and monitoring and control on a project. The PMBOK Guide Fourth Edition also states The objectives of Project Risk Management are to increase the probability and impact of positive events, and decrease the probability and impact of negative events in the project. In the PMBOK Guide Fourth Edition, project risk is an uncertain event or condition that, if it occurs, has a positive or negative effect on a projects objectives. Project objectives include scope, schedule, cost, and quality. Project Risk Management aims to identify and prioritize risks in advance of their occurrence, and provide actionoriented information to project managers. This orientation requires consideration of events that may or may not occur and are therefore described in terms of likelihood or probability of occurrence in addition to other dimensions such as their impact on objectives.","[risks, analysis, responses, identify, cost, projects, information, planning, schedule, include]"
