In [35]:
from transformers import pipeline

# Load a pre-trained NER model from Hugging Face
nlp_ner = pipeline("ner", model="xlm-roberta-large")

def process_inspection(text):
    """
    Processes the inspection text, categorizing details based on the template,
    and triggering prompts based on specific keywords.
    
    Args:
        text (str): The input text from the inspection.
    
    Returns:
        dict: Categorized inspection report.
    """
    # Define section-specific keywords and triggers
    section_keywords = {
        "tires": ["left front tire", "right front tire", "left rear tire", "right rear tire", "tire pressure", "tire condition"],
        "battery": ["battery make", "battery replacement date", "battery voltage", "battery water level", "battery condition", "battery leak", "battery rust"],
        "exterior": ["rust", "dent", "damage", "oil leak", "suspension"],
        "brakes": ["brake fluid level", "brake condition", "emergency brake"],
        "engine": ["engine oil condition", "engine oil color", "brake fluid condition", "brake fluid color", "oil leak", "engine rust", "engine damage"]
    }
    
    prompt_triggers = {
        "header": ["truck serial number", "truck model", "inspection id", "inspector name", "inspection employee id", "date & time of inspection", "location of inspection", "geo coordinates", "service meter hours", "inspector signature", "customer name", "cat customer id"],
        "tires": ["tire pressure", "tire condition"],
        "battery": ["battery make", "battery replacement date", "battery voltage", "battery water level", "battery condition", "battery leak", "battery rust"],
        "exterior": ["rust", "dent", "damage", "oil leak"],
        "brakes": ["brake fluid level", "brake condition", "emergency brake"],
        "engine": ["engine oil condition", "engine oil color", "brake fluid condition", "brake fluid color", "oil leak", "engine rust", "engine damage"]
    }
    
    # Tokenize the text
    tokens = text.lower().split()  # Lowercasing all tokens for consistency
    
    # Initialize report structure
    report = {
        "header": [],
        "tires": [],
        "battery": [],
        "exterior": [],
        "brakes": [],
        "engine": [],
        "voice_of_customer": [],
        "summary": []
    }
    
    current_section = None
    header_info = {
        "truck_serial_number": None,
        "model": None,
        "inspection_id": None
    }

    # Iterate through tokens to find phrases and triggers
    for i in range(len(tokens)):
        token = tokens[i]
        
        # Identify section
        for section, keywords in section_keywords.items():
            if any(keyword in token for keyword in keywords):
                current_section = section
                break
        
        # Process tokens under the current section
        if current_section and token not in ["truck", "serial", "number", "model", "inspection", "id"]:
            report[current_section].append(token)
        
        # Check and extract header information
        if token == "serial" and tokens[i+1] == "number":
            header_info["truck_serial_number"] = tokens[i+2].strip(',')
        elif token == "model":
            header_info["model"] = tokens[i+1].strip(',')
        elif token == "id" and tokens[i-1] == "inspection":
            header_info["inspection_id"] = tokens[i+1].strip(',')
        
    # Create a formatted summary of the inspection report
    if report["exterior"]:
        report["summary"].append(f"Exterior details: {' '.join(report['exterior'])}")
    if header_info["truck_serial_number"]:
        report["summary"].append(f"Truck serial number: {header_info['truck_serial_number']}")
    if header_info["model"]:
        report["summary"].append(f"Model: {header_info['model']}")
    if header_info["inspection_id"]:
        report["summary"].append(f"Inspection ID: {header_info['inspection_id']}")
    
    return report

# Example usage
input_text = "The engine is OK but the brake fluid is low. There is rust on the suspension, and the tire is broken. Truck serial number 7301234, model 735, inspection ID 001."
inspection_report = process_inspection(input_text)

# Print the inspection report summary, with each sentence on a new line
print("\nInspection Report:")
for summary in inspection_report["summary"]:
    print(summary)



All PyTorch model weights were used when initializing TFXLMRobertaForTokenClassification.

Some weights or buffers of the TF 2.0 model TFXLMRobertaForTokenClassification were not initialized from the PyTorch model and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Inspection Report:
Exterior details: rust on the suspension, and the tire is broken. 7301234, 735, 001.
Truck serial number: 7301234
Model: 735
Inspection ID: 001.


In [36]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def create_pdf_report(inspection_report, filename="Inspection_Report.pdf"):
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter
    
    # Title
    c.setFont("Helvetica", 12)
    c.drawString(200, height - 40, "Inspection Report")

    # Add summary details
    y_position = height - 80
    for summary in inspection_report["summary"]:
        c.drawString(40, y_position, summary)
        y_position -= 20  # Move down for the next line
    
    c.save()
    print(f"PDF report saved as {filename}")

# Example usage
def process_inspection(text):
    # Dummy implementation for demonstration
    report = {
        "summary": [
            "Exterior details: rust on the suspension, and the tire is broken.",
            "Truck serial number: 7301234",
            "Model: 735",
            "Inspection ID: 001"
        ]
    }
    return report

input_text = "The engine is OK but the brake fluid is low. There is rust on the suspension, and the tire is broken. Truck serial number 7301234, model 735, inspection ID 001."
inspection_report = process_inspection(input_text)

# Create and download the PDF report
create_pdf_report(inspection_report)


ModuleNotFoundError: No module named 'reportlab'

In [32]:
from reportlab.lib.pagesizes import letter
from reportlab.pdfgen import canvas

def create_pdf_report(inspection_report, filename="Inspection_Report.pdf"):
    c = canvas.Canvas(filename, pagesize=letter)
    width, height = letter
    
    # Title
    c.setFont("Helvetica", 12)
    c.drawString(200, height - 40, "Inspection Report")

    # Add summary details
    y_position = height - 80
    for summary in inspection_report["summary"]:
        c.drawString(40, y_position, summary)
        y_position -= 20  # Move down for the next line
    
    c.save()
    print(f"PDF report saved as {filename}")

# Example usage
input_text = "The engine is OK but the brake fluid is low. There is rust on the suspension, and the tire is broken. Truck serial number 7301234, model 735, inspection ID 001."
inspection_report = process_inspection(input_text)

# Create and download the PDF report
create_pdf_report(inspection_report)


ModuleNotFoundError: No module named 'reportlab'