In [1]:
from transformers import pipeline

# Load a pre-trained NER model from Hugging Face
nlp_ner = pipeline("ner", model="xlm-roberta-large")

def process_inspection(text):
    """
    Processes the inspection text, categorizing details based on the template,
    and triggering prompts based on specific keywords.
    
    Args:
        text (str): The input text from the inspection.
    
    Returns:
        dict: Categorized inspection report.
    """
    # Define section-specific keywords and triggers
    section_keywords = {
        "tires": ["left front tire", "right front tire", "left rear tire", "right rear tire", "tire pressure", "tire condition"],
        "battery": ["battery make", "battery replacement date", "battery voltage", "battery water level", "battery condition", "battery leak", "battery rust"],
        "exterior": ["rust", "dent", "damage", "oil leak", "suspension"],
        "brakes": ["brake fluid level", "brake condition", "emergency brake"],
        "engine": ["engine oil condition", "engine oil color", "brake fluid condition", "brake fluid color", "oil leak", "engine rust", "engine damage"]
    }
    
    prompt_triggers = {
        "header": ["truck serial number", "truck model", "inspection id", "inspector name", "inspection employee id", "date & time of inspection", "location of inspection", "geo coordinates", "service meter hours", "inspector signature", "customer name", "cat customer id"],
        "tires": ["tire pressure", "tire condition"],
        "battery": ["battery make", "battery replacement date", "battery voltage", "battery water level", "battery condition", "battery leak", "battery rust"],
        "exterior": ["rust", "dent", "damage", "oil leak"],
        "brakes": ["brake fluid level", "brake condition", "emergency brake"],
        "engine": ["engine oil condition", "engine oil color", "brake fluid condition", "brake fluid color", "oil leak", "engine rust", "engine damage"]
    }
    
    image_triggers = {
        "tires": ["tire"],
        "battery": ["battery", "leak", "rust"],
        "exterior": ["rust", "damage", "oil leak"],
        "brakes": ["brake fluid"],
        "engine": ["engine", "rust", "damage", "oil leak"]
    }
    
    # Tokenize the text
    tokens = text.lower().split()  # Lowercasing all tokens for consistency
    
    # Initialize report structure
    report = {
        "header": [],
        "tires": [],
        "battery": [],
        "exterior": [],
        "brakes": [],
        "engine": [],
        "voice_of_customer": [],
        "summary": []
    }
    
    current_section = None

    # Iterate through tokens to find phrases and triggers
    for i in range(len(tokens)):
        token = tokens[i]
        
        # Identify section
        for section, keywords in section_keywords.items():
            if any(keyword in token for keyword in keywords):
                current_section = section
                break
        
        # Process tokens under the current section
        if current_section:
            report[current_section].append(token)
        
        # Check for multi-word triggers
        if any(trigger in token for trigger in prompt_triggers.get(current_section, [])):
            print(f"Triggering next prompt after detecting: '{token}'")
            # Add logic to play the next prompt here
        
        if any(trigger in token for trigger in image_triggers.get(current_section, [])):
            print(f"Prompt to capture an image of the specific part after detecting: '{token}'")
        
        if token in prompt_triggers.get("header", []):
            print(f"Processing header information: '{token}'")

    # Create a formatted summary of the inspection report
    for section, details in report.items():
        if details:
            report["summary"].append(f"{section.capitalize()} details: {' '.join(details)}")
    
    return report

# Example usage
input_text = "The engine is OK but the brake fluid is low. There is rust on the suspension, and the tire is broken. Truck serial number 7301234, model 735, inspection ID 001."
inspection_report = process_inspection(input_text)
print("Inspection Report:", inspection_report)


Some weights of XLMRobertaForTokenClassification were not initialized from the model checkpoint at xlm-roberta-large and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Triggering next prompt after detecting: 'rust'
Prompt to capture an image of the specific part after detecting: 'rust'
Inspection Report: {'header': [], 'tires': [], 'battery': [], 'exterior': ['rust', 'on', 'the', 'suspension,', 'and', 'the', 'tire', 'is', 'broken.', 'truck', 'serial', 'number', '7301234,', 'model', '735,', 'inspection', 'id', '001.'], 'brakes': [], 'engine': [], 'voice_of_customer': [], 'summary': ['Exterior details: rust on the suspension, and the tire is broken. truck serial number 7301234, model 735, inspection id 001.', 'Summary details: Exterior details: rust on the suspension, and the tire is broken. truck serial number 7301234, model 735, inspection id 001.']}
