# Maintenance Report NLP Analysis

This notebook implements an NLP pipeline for analyzing maintenance reports using BERT and NLTK.

In [1]:
import os
import re
import torch
from transformers import BertTokenizer, BertForTokenClassification, pipeline, logging
from spellchecker import SpellChecker
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

# Suppress warnings from the transformers library
logging.set_verbosity_error()

# Download required NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt', quiet=True)

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet', quiet=True)

try:
    nltk.data.find('taggers/averaged_perceptron_tagger')
except LookupError:
    nltk.download('averaged_perceptron_tagger', quiet=True)

In [2]:
def initialize_model():
    try:
        # Check if GPU/MPS is available and set the device
        if torch.backends.mps.is_available():
            device = torch.device("mps")
        else:
            device = torch.device("cpu")
        
        # Load the pre-trained BERT model for POS tagging
        model_name = 'vblagoje/bert-english-uncased-finetuned-pos'
        tokenizer = BertTokenizer.from_pretrained(model_name)
        model = BertForTokenClassification.from_pretrained(model_name).to(device)
        
        return tokenizer, model, device
    except Exception as e:
        print(f"Error initializing model: {str(e)}")
        return None, None, None

# Initialize the model
tokenizer, model, device = initialize_model()

In [3]:
def process_text(text, tokenizer, model, device):
    try:
        # Text preprocessing
        text = re.sub(r'\bas needed\b', '', text.lower())
        text = re.sub(r'\s*,\s*and\s+', ', ', text)  # Normalize conjunctions
        text = re.sub(r'\s+', ' ', text).strip()

        # Initialize the spell checker and lemmatizer
        spell = SpellChecker()
        lemmatizer = WordNetLemmatizer()

        # Process the text
        tokens = word_tokenize(text)
        tagged = pos_tag(tokens)
        
        # Extract verbs and their corresponding objects
        verb_object_pairs = []
        current_verb = None
        current_objects = []
        
        def is_potential_verb(word, tag):
            # Check if word ends with common past tense/participle endings
            return (tag.startswith('VB') or tag == 'VBD' or tag == 'VBN' or 
                   (tag == 'JJ' and (word.endswith('ed') or word.endswith('en'))))
        
        i = 0
        while i < len(tagged):
            word, tag = tagged[i]
            
            # Handle verbs (including past tense, participles, and adjectives that are actually verbs)
            if is_potential_verb(word, tag):
                # Save previous pair if exists
                if current_verb and current_objects:
                    verb_object_pairs.append((lemmatizer.lemmatize(current_verb, 'v'), ' '.join(current_objects)))
                current_verb = spell.correction(word)
                current_objects = []
            
            # Handle nouns, adjectives, and compound objects
            elif (tag.startswith('NN') or 
                  (tag.startswith('JJ') and not word.endswith('ed')) or 
                  tag == 'IN' or tag.startswith('VBG')):  # Include prepositions and gerunds
                if current_verb:
                    temp_objects = [word]
                    # Look ahead for compound objects and their modifiers
                    j = i + 1
                    while j < len(tagged) and (
                        tagged[j][1].startswith('NN') or 
                        (tagged[j][1].startswith('JJ') and not tagged[j][0].endswith('ed')) or 
                        tagged[j][1] == 'IN' or  # Include prepositions
                        tagged[j][1].startswith('VBG')  # Include gerunds
                    ):
                        temp_objects.append(tagged[j][0])
                        j += 1
                    
                    # Only add if we have a meaningful object phrase
                    if any(t[1].startswith('NN') for t in tagged[i:j]):
                        current_objects.extend(temp_objects)
                    i = j - 1  # Update index to skip processed compound words
            
            # Handle commas as phrase separators
            elif word == ',':
                if current_verb and current_objects:
                    verb_object_pairs.append((lemmatizer.lemmatize(current_verb, 'v'), ' '.join(current_objects)))
                    current_objects = []
                    current_verb = None
            
            i += 1

        # Add the last pair if exists
        if current_verb and current_objects:
            verb_object_pairs.append((lemmatizer.lemmatize(current_verb, 'v'), ' '.join(current_objects)))

        return verb_object_pairs

    except Exception as e:
        print(f"Error processing text: {str(e)}")
        return []

In [4]:
# Example maintenance report text
text = """
PERFORMED PM PER SCOPE HVAC TASKS REPLACED AIR FILTERS VISUALLY INSPECTED FOR REFRIGERANT LEAKS LUBRICATED ALL BEARINGS AS NEEDED SHUT OFF WATER SUPPLY 
FALL VISIT AS NEEDED  INSPECTED BELTS AND ADJUSTED AS NEEDED CLEANED CONDENSATE LINES BRUSHED COIL FINS INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
CHECKED CONTROLS CALIBRATION AND OPERATION WALK IN TASKS INSPECTED ALL HINGES AND GASKETS CHECKED ELECTRICAL CONTACTS CONTROLS AND COMPONENTS INSPECTED RELAYS AND CONTACTORS 
VISUALLY INSPECTED FOR LEAKS CLEANED ALL PRE FILTER MEDIA AND REPLACED IF APPLICABLE BRUSHED COIL FINS INSPECTED BLOWER WHEELS AND FANS LUBRICATED MOTORS AND BEARINGS 
INSPECTED SIGHT GLASS CHECKED CONTROL CALIBRATION AND OPERATION ICE MACHINE TASKS CHECKED ICE PRODUCTION THICKNESS AND SENSING PROBES INSPECTED DOOR AND GASKET VISUALLY 
INSPECTED BIN AND BIN TSTAT INSPECTED WATER PUMPS AND DISTRIBUTION TUBES VISUALLY INSPECTED WATER FILTERS INSPECTED FOR ANY LEAKS CLEANED CONDENSER COILS 
INSPECTED ALL ELECTRICAL COMPONENTS INSPECTED FAN MOTOR BLADES BEARINGS LUBRICATED DESCALED ICE MACHINE EXHAUST FAN TASKS INSPECTED ROOF FAN BELTS AND SHEAVES ADJUSTED 
REPLACED BELTS 20X25X2 PLEATED FILTER HIGH EFFICIENCY
"""

# Process the text
results = process_text(text, tokenizer, model, device)

# Print results with task categorization
def categorize_task(verb, obj):
    inspection_verbs = {'inspect', 'check', 'examine', 'monitor', 'observe', 'verify'}
    cleaning_verbs = {'clean', 'brush', 'wash', 'wipe', 'descale'}
    maintenance_verbs = {'lubricate', 'adjust', 'replace', 'repair', 'calibrate'}
    
    if verb in inspection_verbs:
        return "Inspection"
    elif verb in cleaning_verbs:
        return "Cleaning"
    elif verb in maintenance_verbs:
        return "Maintenance"
    else:
        return "Other"

# Group tasks by category
tasks_by_category = {}
for verb, obj in results:
    category = categorize_task(verb, obj)
    if category not in tasks_by_category:
        tasks_by_category[category] = set()  # Use a set to store unique pairs
    tasks_by_category[category].add((verb, obj))  # Add to set to ensure uniqueness

# Print tasks by category
for category in ["Other", "Maintenance", "Inspection", "Cleaning"]:
    if category in tasks_by_category:
        print(f"\n{category} Tasks ({len(tasks_by_category[category])}):")
        print("----------------------------------------")
        for verb, obj in sorted(tasks_by_category[category]):  # Sort for consistent order
            print(f"• {verb.capitalize()}: {obj}")

for category, tasks in tasks_by_category.items():
    print(f"\n{category} Tasks ({len(tasks)}):\n" + "-" * 40)
    for verb, obj in tasks:
        print(f"• {verb.capitalize()}: {obj}")

# Print statistics
print("\nTask Distribution:")
for category, tasks in tasks_by_category.items():
    percentage = (len(tasks) / len(results)) * 100
    print(f"{category}: {len(tasks)} tasks ({percentage:.1f}%)")


Other Tasks (6):
----------------------------------------
• Exhaust: fan tasks
• Perform: pm per scope hvac tasks
• Pleat: high efficiency
• Sense: probes
• Shut: water supply fall visit
• Walk: in tasks

Maintenance Tasks (4):
----------------------------------------
• Lubricate: bearings
• Lubricate: motors bearings
• Replace: air filters
• Replace: belts

Inspection Tasks (19):
----------------------------------------
• Check: control calibration operation ice machine tasks
• Check: controls calibration operation
• Check: electrical contacts controls components
• Check: ice production thickness
• Inspect: belts
• Inspect: bin bin tstat
• Inspect: blower wheels fans
• Inspect: door gasket
• Inspect: electrical components
• Inspect: fan motor blades bearings
• Inspect: for leaks
• Inspect: for refrigerant leaks
• Inspect: hinges gaskets
• Inspect: leaks
• Inspect: relays contactors
• Inspect: roof fan belts sheaves
• Inspect: sight glass
• Inspect: water filters
• Inspect: water pump