# Label new inspection report.

Generate labeling report from new inspection report

In [1]:
import sys
from pathlib import Path

import pandas as pd
from langchain_core.messages.ai import AIMessage

sys.path.append("src")

import psclabeler as psc

## Generate Response

In [2]:
NEW_INSPECTION = Path("./data/New Inspection Report.pdf")
# SAMPLE_INSPECTION = Path("./data/Sample Inspection Report.pdf")

In [3]:
report_string = psc.data_query.data_ingest.parse_pdf_to_string(NEW_INSPECTION)
report_dict = psc.data_query.data_ingest.split_report_to_chunk(report_string)

In [14]:
model = psc.model.labeler.FewShotLLMPSCInspector()
response_results = []
for v in report_dict.values():
    response = model.rate_risk(v)
    response_results.append(response)

### Capture all response from each deficiency and parse it accordingly
- gather output
- and potentially metadata

In [15]:
response_results

[AIMessage(content="Deficiency: Location of emergency installations. Not as required.\n\nCorrective Action: New weathertight IP67 rating electrical emergency stop switch boxes, along with newly fabricated outer protection steel boxes, have been installed at the port and starboard bunker stations, meeting the approval of the attending Class surveyor.\n\nPreventive Action: Ship Staff advised to fortify their inspection regime and diligently carry out the checks as per CLE 08 - Inactive Function tests, and rectify faults if any immediately.\n\nReason: The deficiency is due to a design or engineering defect, which is a weakness in the organization's processes. The non-compliance of emergency stop switches with requirements for electrical switch boxes installed on weather decks exposed to marine environment is a potential threat to human life and can cause accidents.\n\nClassification: High Risk.", response_metadata={'token_usage': {'completion_tokens': 163, 'prompt_tokens': 1466, 'total_to

In [16]:
def get_content_array(reponse_list: list[AIMessage]) -> list[str]:
    """Retrieve only content from AI response"""
    return [r.content for r in reponse_list]


def get_token_use_array(reponse_list: list[AIMessage]) -> list[str]:
    """Retrieve only content from AI response"""
    return [r.response_metadata["token_usage"] for r in reponse_list]


def parse_single_deficiency_response_to_dict(response: list[str]):
    """After splitting each response into a list of 3 items, convert it into a dictionary."""
    split_k_v = [i.split(":", maxsplit=1) for i in response]
    return {i[0].lower(): i[1].strip() for i in split_k_v}

### Content Response as output for user

In [17]:
response_content = get_content_array(response_results)
response_content

["Deficiency: Location of emergency installations. Not as required.\n\nCorrective Action: New weathertight IP67 rating electrical emergency stop switch boxes, along with newly fabricated outer protection steel boxes, have been installed at the port and starboard bunker stations, meeting the approval of the attending Class surveyor.\n\nPreventive Action: Ship Staff advised to fortify their inspection regime and diligently carry out the checks as per CLE 08 - Inactive Function tests, and rectify faults if any immediately.\n\nReason: The deficiency is due to a design or engineering defect, which is a weakness in the organization's processes. The non-compliance of emergency stop switches with requirements for electrical switch boxes installed on weather decks exposed to marine environment is a potential threat to human life and can cause accidents.\n\nClassification: High Risk.",
 "Deficiency: The loading computer used for Stability Calculation was not approved by the RO.\n\nCorrective Act

### Additional Response as analysis for internal

In [18]:
response_token = get_token_use_array(response_results)
response_token

[{'completion_tokens': 163, 'prompt_tokens': 1466, 'total_tokens': 1629},
 {'completion_tokens': 158, 'prompt_tokens': 1493, 'total_tokens': 1651},
 {'completion_tokens': 135, 'prompt_tokens': 1435, 'total_tokens': 1570}]

In [19]:
results = []
for res in response_content:
    split_response = res.split("\n\n")
    parse_response = parse_single_deficiency_response_to_dict(split_response)
    results.append(parse_response)

## Note for future.
what happen if there are no `deficiency`, `reason` and `classification` column from llm?

In [23]:
df = pd.DataFrame(results)
df = df[
    ["deficiency", "corrective action", "preventive action", "reason", "classification"]
]
meta_df = pd.DataFrame(response_token)

Sample of `df` output

In [24]:
df

Unnamed: 0,deficiency,corrective action,preventive action,reason,classification
0,Location of emergency installations. Not as re...,New weathertight IP67 rating electrical emerge...,Ship Staff advised to fortify their inspection...,The deficiency is due to a design or engineeri...,High Risk.
1,The loading computer used for Stability Calcul...,1. Master explain the use rule for the loading...,"1. Master enhance supervision and training, su...",The loading computer used for stability calcul...,High Risk.
2,"Alarms/Emergency Signal - At the engine room, ...",Replaced all busted bulbs and rechecked all th...,1. Educate the crew to test the lights alarm s...,This deficiency is a potential threat to human...,Medium Risk.


Sample of `metadata df`

In [25]:
meta_df = pd.concat([df, meta_df],axis=1)
meta_df

Unnamed: 0,deficiency,corrective action,preventive action,reason,classification,completion_tokens,prompt_tokens,total_tokens
0,Location of emergency installations. Not as re...,New weathertight IP67 rating electrical emerge...,Ship Staff advised to fortify their inspection...,The deficiency is due to a design or engineeri...,High Risk.,163,1466,1629
1,The loading computer used for Stability Calcul...,1. Master explain the use rule for the loading...,"1. Master enhance supervision and training, su...",The loading computer used for stability calcul...,High Risk.,158,1493,1651
2,"Alarms/Emergency Signal - At the engine room, ...",Replaced all busted bulbs and rechecked all th...,1. Educate the crew to test the lights alarm s...,This deficiency is a potential threat to human...,Medium Risk.,135,1435,1570


Realign deficiency number

In [26]:
meta_df.index = meta_df.index + 1
df.index = df.index + 1

Saving as excel 

In [27]:
with pd.ExcelWriter('./data/results.xlsx') as writer:
    df.to_excel(writer, sheet_name='label_deficiency')
    meta_df.to_excel(writer, sheet_name='token_output')