In [None]:
# %pip install textacy
# %pip install spacy
# !python -m spacy download en_core_web_sm

In [1]:
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline, logging

# Suppress warnings from the transformers library
logging.set_verbosity_error()

# Check if GPU is available and set the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load the pre-trained BERT model for POS tagging
model_name = 'vblagoje/bert-english-uncased-finetuned-pos'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForTokenClassification.from_pretrained(model_name).to(device)

# Define a function to find all action verbs in the text using the BERT model
def find_action_verbs(text):
    # Use the BERT model for POS tagging
    nlp = pipeline("token-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
    pos_results = nlp(text)
    
    # Find all action verbs and handle sub-word tokens
    action_verbs = []
    current_word = ""
    for result in pos_results:
        if result['entity'] == 'VERB':  # Check if the entity is a verb
            if result['word'].startswith('##'):
                current_word += result['word'][2:]
            else:
                if current_word:
                    action_verbs.append(current_word)
                current_word = result['word']
    if current_word:
        action_verbs.append(current_word)
    
    return action_verbs

# Example text
text = """
PERFORMED PM PER SCOPE HVAC TASKS REPLACED AIR FILTERS VISUALLY INSPECTED FOR REFRIGERANT LEAKS LUBRICATED ALL BEARINGS AS NEEDED SHUT OFF WATER SUPPLY  
FALL VISIT AS NEEDED  INSPECTED BELTS AND ADJUSTED AS NEEDED CLEANED CONDENSATE LINES CHECKED CONTACTORS VISUALLY INSPECTED EVAPORATOR AND CONDENSER COILS 
INSPECTED AND TESTED HEATING CIRCUIT FOR OPERATION INSPECTED AND TESTED EACH CONDENSATE PUMP IF APPLICABLE REFRIGERATIONS TASKS INSPECTED ALL HINGES AND 
GASKETS CHECKED ELECTRICAL CONTACTS  CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED FOR LEAKS CLEANED ALL PRE FILTER MEDIA AND 
REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS AS NEEDED CHECKED CONTROLS CALIBRATION AND 
OPERATION WALK IN TASKS INSPECFTED ALL HINGES AND GASKETS CHECKED ELECTRICAL CONTACTS CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED 
FOR LEAKS CLEANE DALL PRE FILTER MEDIA AND REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
INSPECTED SIGHT GLASS CHECKED CONTROL CALIBRATION AND OPERATION ICE MACHINE TASKS CHECKED ICE PRODUCTION THICKNESS AND SENSING PROBES INSPECTED DOOR AND GASKET 
VISUALLY INSPECTED BIN AND BIN TSTAT INSPECTED WATER PUMPS AND DISTRIBUTION TUBES VISUALLY INSPECTED WATER FILTERS INSPECTED FOR ANY LEAKS CLEANED CONDENSER COILS 
INSPECTED ALL ELECTRICAL COMPONENTS INSPECTED FAN MOTOR  BLADES   BEARINGS   LUBRICATED AS NEEDED DESCALED ICE MACHINE EXHAUST FAN TASKS INSPECTED ROOF FAN BELTS AND 
SHEAVES ADJUSTED AS NEEDED REPLACED BELTS AS NEEDED  20X25X2 PLEATED FILTER HIGH EFFICIENCY
"""

# Find all action verbs in the text
action_verbs = find_action_verbs(text)

# Print the results
print("Action Verbs:", action_verbs)

Action Verbs: ['performed', 'replaced', 'inspected', 'lubricated', 'needed', 'shut', 'needed', 'inspected', 'adjusted', 'needed', 'cleaned', 'checked', 'inspected', 'inspected', 'tested', 'inspected', 'tested', 'inspected', 'checked', 'inspected', 'inspected', 'cleaned', 'replaced', 'brushed', 'needed', 'inspected', 'lubricated', 'needed', 'checked', 'walk', 'insfted', 'checked', 'inspected', 'inspected', 'replaced', 'brushed', 'needed', 'inspectedcated', 'inspected', 'checked', 'checked', 'inspected', 'inspected', 'inspected', 'inspected', 'inspected', 'cleaned', 'inspected', 'inspected', 'lubricated', 'needed', 'descaled', 'inspected', 'adjusted', 'needed', 'replaced', 'neededted']


In [12]:
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline, logging

# Suppress warnings from the transformers library
logging.set_verbosity_error()

# Check if GPU is available and set the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load the pre-trained BERT model for POS tagging
model_name = 'vblagoje/bert-english-uncased-finetuned-pos'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForTokenClassification.from_pretrained(model_name).to(device)

# Define a function to find all action verbs in the text using the BERT model
def find_action_verbs(text, nlp_pipeline):
    # Use the BERT model for POS tagging
    pos_results = nlp_pipeline(text)
    
    # Find all action verbs and handle sub-word tokens
    action_verbs = []
    current_word = ""
    for result in pos_results:
        if result['entity'] == 'VERB':  # Check if the entity is a verb
            if result['word'].startswith('##'):
                current_word += result['word'][2:]
            else:
                if current_word:
                    action_verbs.append(current_word)
                current_word = result['word']
    if current_word:
        action_verbs.append(current_word)
    
    return action_verbs

# Define a function to find objects for each action verb
def find_objects_for_verbs(text, action_verbs, nlp_pipeline):
    verb_object_pairs = []
    for i, verb in enumerate(action_verbs):
        start_index = text.find(verb)
        end_index = text.find(action_verbs[i + 1]) if i + 1 < len(action_verbs) else len(text)
        substring = text[start_index:end_index]
        
        # Use the BERT model for POS tagging to find objects in the substring
        pos_results = nlp_pipeline(substring)
        objects = []
        current_word = ""
        for result in pos_results:
            if result['entity'] in ('B-NP', 'I-NP'):  # Check if the entity is a noun phrase (object)
                if result['word'].startswith('##'):
                    current_word += result['word'][2:]
                else:
                    if current_word:
                        objects.append(current_word)
                    current_word = result['word']
        if current_word:
            objects.append(current_word)
        
        # Pair the verb with the first object found in the substring
        if objects:
            verb_object_pairs.append((verb, ' '.join(objects)))
    
    return verb_object_pairs

# Initialize the pipeline
nlp_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, device=0 if torch.backends.mps.is_available() else -1)

# Example text
text = """
PERFORMED PM PER SCOPE HVAC TASKS REPLACED AIR FILTERS VISUALLY INSPECTED FOR REFRIGERANT LEAKS LUBRICATED ALL BEARINGS AS NEEDED SHUT OFF WATER SUPPLY  
FALL VISIT AS NEEDED  INSPECTED BELTS AND ADJUSTED AS NEEDED CLEANED CONDENSATE LINES CHECKED CONTACTORS VISUALLY INSPECTED EVAPORATOR AND CONDENSER COILS 
INSPECTED AND TESTED HEATING CIRCUIT FOR OPERATION INSPECTED AND TESTED EACH CONDENSATE PUMP IF APPLICABLE REFRIGERATIONS TASKS INSPECTED ALL HINGES AND 
GASKETS CHECKED ELECTRICAL CONTACTS  CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED FOR LEAKS CLEANED ALL PRE FILTER MEDIA AND 
REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS AS NEEDED CHECKED CONTROLS CALIBRATION AND 
OPERATION WALK IN TASKS INSPECFTED ALL HINGES AND GASKETS CHECKED ELECTRICAL CONTACTS CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED 
FOR LEAKS CLEANE DALL PRE FILTER MEDIA AND REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
INSPECTED SIGHT GLASS CHECKED CONTROL CALIBRATION AND OPERATION ICE MACHINE TASKS CHECKED ICE PRODUCTION THICKNESS AND SENSING PROBES INSPECTED DOOR AND GASKET 
VISUALLY INSPECTED BIN AND BIN TSTAT INSPECTED WATER PUMPS AND DISTRIBUTION TUBES VISUALLY INSPECTED WATER FILTERS INSPECTED FOR ANY LEAKS CLEANED CONDENSER COILS 
INSPECTED ALL ELECTRICAL COMPONENTS INSPECTED FAN MOTOR  BLADES   BEARINGS   LUBRICATED AS NEEDED DESCALED ICE MACHINE EXHAUST FAN TASKS INSPECTED ROOF FAN BELTS AND 
SHEAVES ADJUSTED AS NEEDED REPLACED BELTS AS NEEDED  20X25X2 PLEATED FILTER HIGH EFFICIENCY
"""

# Find all action verbs in the text
action_verbs = find_action_verbs(text, nlp_pipeline)
print("Action Verbs:", action_verbs)

# Find objects for each action verb
verb_object_pairs = find_objects_for_verbs(text, action_verbs, nlp_pipeline)

# Print the results
print("Verb-Object Pairs:", verb_object_pairs)

Action Verbs: ['performed', 'replaced', 'inspected', 'lubricated', 'needed', 'shut', 'needed', 'inspected', 'adjusted', 'needed', 'cleaned', 'checked', 'inspected', 'inspected', 'tested', 'inspected', 'tested', 'inspected', 'checked', 'inspected', 'inspected', 'cleaned', 'replaced', 'brushed', 'needed', 'inspected', 'lubricated', 'needed', 'checked', 'walk', 'insfted', 'checked', 'inspected', 'inspected', 'replaced', 'brushed', 'needed', 'inspectedcated', 'inspected', 'checked', 'checked', 'inspected', 'inspected', 'inspected', 'inspected', 'inspected', 'cleaned', 'inspected', 'inspected', 'lubricated', 'needed', 'descaled', 'inspected', 'adjusted', 'needed', 'replaced', 'neededted']
Verb-Object Pairs: []


In [43]:
import re
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline, logging

# Suppress warnings from the transformers library
logging.set_verbosity_error()

# Check if GPU is available and set the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load the pre-trained BERT model for POS tagging
model_name = 'vblagoje/bert-english-uncased-finetuned-pos'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForTokenClassification.from_pretrained(model_name).to(device)

# Define a function to find all action verbs in the text using the BERT model
def find_action_verbs(text, nlp_pipeline):
    # Use the BERT model for POS tagging
    pos_results = nlp_pipeline(text)
    
    # Find all action verbs and handle sub-word tokens
    action_verbs = []
    current_word = ""
    for result in pos_results:
        if result['entity'] == 'VERB':  # Check if the entity is a verb
            if result['word'].startswith('##'):
                current_word += result['word'][2:]
            else:
                if current_word:
                    action_verbs.append(current_word)
                current_word = result['word']
    if current_word:
        action_verbs.append(current_word)
    
    return action_verbs


# Initialize the pipeline
nlp_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, device=0 if torch.backends.mps.is_available() else -1)

# Example text
text = """
PERFORMED PM PER SCOPE HVAC TASKS REPLACED AIR FILTERS VISUALLY INSPECTED FOR REFRIGERANT LEAKS LUBRICATED ALL BEARINGS AS NEEDED SHUT OFF WATER SUPPLY  
FALL VISIT AS NEEDED  INSPECTED BELTS AND ADJUSTED AS NEEDED CLEANED CONDENSATE LINES CHECKED CONTACTORS VISUALLY INSPECTED EVAPORATOR AND CONDENSER COILS 
INSPECTED AND TESTED HEATING CIRCUIT FOR OPERATION INSPECTED AND TESTED EACH CONDENSATE PUMP IF APPLICABLE REFRIGERATIONS TASKS INSPECTED ALL HINGES AND 
GASKETS CHECKED ELECTRICAL CONTACTS  CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED FOR LEAKS CLEANED ALL PRE FILTER MEDIA AND 
REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS AS NEEDED CHECKED CONTROLS CALIBRATION AND 
OPERATION WALK IN TASKS INSPECFTED ALL HINGES AND GASKETS CHECKED ELECTRICAL CONTACTS CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED 
FOR LEAKS CLEANE DALL PRE FILTER MEDIA AND REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
INSPECTED SIGHT GLASS CHECKED CONTROL CALIBRATION AND OPERATION ICE MACHINE TASKS CHECKED ICE PRODUCTION THICKNESS AND SENSING PROBES INSPECTED DOOR AND GASKET 
VISUALLY INSPECTED BIN AND BIN TSTAT INSPECTED WATER PUMPS AND DISTRIBUTION TUBES VISUALLY INSPECTED WATER FILTERS INSPECTED FOR ANY LEAKS CLEANED CONDENSER COILS 
INSPECTED ALL ELECTRICAL COMPONENTS INSPECTED FAN MOTOR  BLADES   BEARINGS   LUBRICATED AS NEEDED DESCALED ICE MACHINE EXHAUST FAN TASKS INSPECTED ROOF FAN BELTS AND 
SHEAVES ADJUSTED AS NEEDED REPLACED BELTS AS NEEDED  20X25X2 PLEATED FILTER HIGH EFFICIENCY
"""

# remove special characters from text
text = re.sub(r'[^A-Za-z0-9\s]+', '', text)

# Find all action verbs in the text
action_verbs = find_action_verbs(text, nlp_pipeline)
print("Extracted Action Verbs:", action_verbs)

# Normalize case
text = text.lower()
action_verbs = [verb.lower() for verb in action_verbs]

# Define a function to find objects for each action verb
def find_objects_for_verbs(text, action_verbs, nlp_pipeline):
    verb_object_pairs = []
    for i, verb in enumerate(action_verbs):
        start_index = text.find(verb)
        end_index = text.find(action_verbs[i + 1]) if i + 1 < len(action_verbs) else len(text)
        substring = text[start_index:end_index]
        
        # Use the BERT model for POS tagging to find objects in the substring
        pos_results = nlp_pipeline(substring)
        objects = []
        current_word = ""
        for result in pos_results:
            if result['entity'].startswith('B-') or result['entity'].startswith('I-'):  # Check if the entity is a noun phrase (object)
                if result['word'].startswith('##'):
                    current_word += result['word'][2:]
                else:
                    if current_word:
                        objects.append(current_word)
                    current_word = result['word']
        if current_word:
            objects.append(current_word)
        
        # Pair the verb with the first object found in the substring
        if objects:
            verb_object_pairs.append((verb, ' '.join(objects)))
    
    return verb_object_pairs



# Find objects for each action verb
verb_object_pairs = find_objects_for_verbs(text, action_verbs, nlp_pipeline)

# Print the results
print("Verb-Object Pairs:", verb_object_pairs)

Extracted Action Verbs: ['performed', 'replaced', 'inspected', 'lubricated', 'needed', 'shut', 'needed', 'inspected', 'adjusted', 'needed', 'cleaned', 'checked', 'inspected', 'inspected', 'tested', 'inspected', 'tested', 'inspected', 'checked', 'inspected', 'inspected', 'cleaned', 'replaced', 'brushed', 'needed', 'inspected', 'lubricated', 'needed', 'checked', 'walk', 'insfted', 'checked', 'inspected', 'inspected', 'replaced', 'brushed', 'needed', 'inspectedcated', 'inspected', 'checked', 'checked', 'inspected', 'inspected', 'inspected', 'inspected', 'inspected', 'cleaned', 'inspected', 'inspected', 'lubricated', 'needed', 'descaled', 'inspected', 'adjusted', 'needed', 'replaced', 'neededted']
Verb-Object Pairs: []


In [37]:
print(len(action_verbs))
print(start_index,end_index)

57
8 -1


In [44]:
import re
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline, logging

# Suppress warnings from the transformers library
logging.set_verbosity_error()

# Check if GPU is available and set the device
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load the pre-trained BERT model for POS tagging
model_name = 'vblagoje/bert-english-uncased-finetuned-pos'
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForTokenClassification.from_pretrained(model_name).to(device)

# Define a function to find all action verbs in the text using the BERT model
def find_action_verbs(text, nlp_pipeline):
    # Use the BERT model for POS tagging
    pos_results = nlp_pipeline(text)
    
    # Find all action verbs and handle sub-word tokens
    action_verbs = []
    current_word = ""
    for result in pos_results:
        if result['entity'] == 'VERB':  # Check if the entity is a verb
            if result['word'].startswith('##'):
                current_word += result['word'][2:]
            else:
                if current_word:
                    action_verbs.append(current_word)
                current_word = result['word']
    if current_word:
        action_verbs.append(current_word)
    
    return action_verbs

# Initialize the pipeline
nlp_pipeline = pipeline("token-classification", model=model, tokenizer=tokenizer, device=0 if torch.backends.mps.is_available() else -1)

# Example text
text = """
PERFORMED PM PER SCOPE HVAC TASKS REPLACED AIR FILTERS VISUALLY INSPECTED FOR REFRIGERANT LEAKS LUBRICATED ALL BEARINGS AS NEEDED SHUT OFF WATER SUPPLY  
FALL VISIT AS NEEDED  INSPECTED BELTS AND ADJUSTED AS NEEDED CLEANED CONDENSATE LINES CHECKED CONTACTORS VISUALLY INSPECTED EVAPORATOR AND CONDENSER COILS 
INSPECTED AND TESTED HEATING CIRCUIT FOR OPERATION INSPECTED AND TESTED EACH CONDENSATE PUMP IF APPLICABLE REFRIGERATIONS TASKS INSPECTED ALL HINGES AND 
GASKETS CHECKED ELECTRICAL CONTACTS  CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED FOR LEAKS CLEANED ALL PRE FILTER MEDIA AND 
REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS AS NEEDED CHECKED CONTROLS CALIBRATION AND 
OPERATION WALK IN TASKS INSPECFTED ALL HINGES AND GASKETS CHECKED ELECTRICAL CONTACTS CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS VISUALLY INSPECTED 
FOR LEAKS CLEANE DALL PRE FILTER MEDIA AND REPLACED IF APPLICABLE BRUSHED COIL FINS AS NEEDED INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
INSPECTED SIGHT GLASS CHECKED CONTROL CALIBRATION AND OPERATION ICE MACHINE TASKS CHECKED ICE PRODUCTION THICKNESS AND SENSING PROBES INSPECTED DOOR AND GASKET 
VISUALLY INSPECTED BIN AND BIN TSTAT INSPECTED WATER PUMPS AND DISTRIBUTION TUBES VISUALLY INSPECTED WATER FILTERS INSPECTED FOR ANY LEAKS CLEANED CONDENSER COILS 
INSPECTED ALL ELECTRICAL COMPONENTS INSPECTED FAN MOTOR  BLADES   BEARINGS   LUBRICATED AS NEEDED DESCALED ICE MACHINE EXHAUST FAN TASKS INSPECTED ROOF FAN BELTS AND 
SHEAVES ADJUSTED AS NEEDED REPLACED BELTS AS NEEDED  20X25X2 PLEATED FILTER HIGH EFFICIENCY
"""

# Remove special characters from the text
text = re.sub(r'[^A-Za-z0-9\s]', '', text)

# Find all action verbs in the text
action_verbs = find_action_verbs(text, nlp_pipeline)
print("Extracted Action Verbs:", action_verbs)

# Debugging: Print the length and corresponding substring in the text
for verb in action_verbs:
    print(f"Verb: '{verb}', Length: {len(verb)}")
    print(f"Text Substring: '{text[text.find(verb):text.find(verb)+len(verb)]}'")

# Normalize case
text = text.lower()
action_verbs = [verb.lower() for verb in action_verbs]

# Find the start and end indices
start_index = text.find(action_verbs[0])
end_index = text.find(action_verbs[1]) if len(action_verbs) > 1 else len(text)

print("Start Index:", start_index)
print("End Index:", end_index)

# Extract the substring between the first and second verbs
if start_index != -1 and end_index != -1:
    substring = text[start_index + len(action_verbs[0]):end_index]
    print("Substring between action_verbs[0] and action_verbs[1]:")
    print(substring)
else:
    print("Not enough action verbs found to extract a substring between them.")

Extracted Action Verbs: ['performed', 'replaced', 'inspected', 'lubricated', 'needed', 'shut', 'needed', 'inspected', 'adjusted', 'needed', 'cleaned', 'checked', 'inspected', 'inspected', 'tested', 'inspected', 'tested', 'inspected', 'checked', 'inspected', 'inspected', 'cleaned', 'replaced', 'brushed', 'needed', 'inspected', 'lubricated', 'needed', 'checked', 'walk', 'insfted', 'checked', 'inspected', 'inspected', 'replaced', 'brushed', 'needed', 'inspectedcated', 'inspected', 'checked', 'checked', 'inspected', 'inspected', 'inspected', 'inspected', 'inspected', 'cleaned', 'inspected', 'inspected', 'lubricated', 'needed', 'descaled', 'inspected', 'adjusted', 'needed', 'replaced', 'neededted']
Verb: 'performed', Length: 9
Text Substring: ''
Verb: 'replaced', Length: 8
Text Substring: ''
Verb: 'inspected', Length: 9
Text Substring: ''
Verb: 'lubricated', Length: 10
Text Substring: ''
Verb: 'needed', Length: 6
Text Substring: ''
Verb: 'shut', Length: 4
Text Substring: ''
Verb: 'needed', 