# Label new inspection report.

Generate labeling report from new inspection report

In [43]:
import sys
from pathlib import Path

import pandas as pd
from langchain_core.messages.ai import AIMessage

sys.path.append("src")

import psclabeler as psc

## Generate Response

In [44]:
NEW_INSPECTION = Path("./data/New Inspection Report.pdf")
# SAMPLE_INSPECTION = Path("./data/Sample Inspection Report.pdf")

In [45]:
report_string = psc.data_query.data_ingest.parse_pdf_to_string(NEW_INSPECTION)
report_dict = psc.data_query.data_ingest.split_report_to_chunk(report_string)

In [46]:
model = psc.model.labeler.ZeroShotLLMPSCInspector()
response_results = []
for v in report_dict.values():
    response = model.rate_risk(v)
    response_results.append(response)

### Capture all response from each deficiency and parse it accordingly
- gather output
- and potentially metadata

In [47]:
response_results

[AIMessage(content='Deficiency: Location of emergency installations. Not as required.\n\nReason: The emergency stop switches installed on weather decks were not compliant with requirements for marine environment. This poses a potential threat to personnel and the ship in case of an emergency.\n\nClassification: Medium', response_metadata={'token_usage': {'completion_tokens': 50, 'prompt_tokens': 401, 'total_tokens': 451}, 'model_name': 'gpt-35-turbo', 'system_fingerprint': None, 'prompt_filter_results': [{'prompt_index': 0, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'jailbreak': {'filtered': False, 'detected': False}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual': {'filtered': False, 'severity': 'safe'}, 'violence': {'filtered': False, 'severity': 'safe'}}}], 'finish_reason': 'stop', 'logprobs': None, 'content_filter_results': {'hate': {'filtered': False, 'severity': 'safe'}, 'self_harm': {'filtered': False, 'severity': 'safe'}, 'sexual

In [48]:
def get_content_array(reponse_list: list[AIMessage]) -> list[str]:
    """Retrieve only content from AI response"""
    return [r.content for r in reponse_list]


def get_token_use_array(reponse_list: list[AIMessage]) -> list[str]:
    """Retrieve only content from AI response"""
    return [r.response_metadata["token_usage"] for r in reponse_list]


def parse_single_deficiency_response_to_dict(response: list[str]):
    """After splitting each response into a list of 3 items, convert it into a dictionary."""
    split_k_v = [i.split(":", maxsplit=1) for i in response]
    return {i[0].lower(): i[1].strip() for i in split_k_v}

### Content Response as output for user

In [49]:
response_content = get_content_array(response_results)
response_content

['Deficiency: Location of emergency installations. Not as required.\n\nReason: The emergency stop switches installed on weather decks were not compliant with requirements for marine environment. This poses a potential threat to personnel and the ship in case of an emergency.\n\nClassification: Medium',
 "Deficiency: The loading computer used for Stability Calculation was not approved by the RO.\n\nReason: The chief mate installed the loading computer device software to another computer in addition to the specified loading computer, indicating work negligence.\n\nAnalysis: This deficiency exposes a weakness in the organization's processes and has the potential to cause medium economic and reputational harm.\n\nClassification: Medium",
 "Deficiency: Alarms/Emergency Signal - One light not working in each of the four signal columns in the engine room.\n\nReason:\n- Step 1: The deficiency is related to the alarms and emergency signal system in the engine room.\n- Step 2: The root cause is 

### Additional Response as analysis for internal

In [50]:
response_token = get_token_use_array(response_results)
response_token

[{'completion_tokens': 50, 'prompt_tokens': 401, 'total_tokens': 451},
 {'completion_tokens': 71, 'prompt_tokens': 428, 'total_tokens': 499},
 {'completion_tokens': 131, 'prompt_tokens': 370, 'total_tokens': 501}]

In [51]:
results = []
for res in response_content:
    split_response = res.split("\n\n")
    parse_response = parse_single_deficiency_response_to_dict(split_response)
    results.append(parse_response)

## Note for future.
what happen if there are no `deficiency`, `reason` and `classification` column from llm?

In [60]:
df = pd.DataFrame(results)
df = df[["deficiency", "reason", "classification"]]
meta_df = pd.DataFrame(response_token)

Sample of `df` output

In [61]:
df

Unnamed: 0,deficiency,reason,classification
0,Location of emergency installations. Not as re...,The emergency stop switches installed on weath...,Medium
1,The loading computer used for Stability Calcul...,The chief mate installed the loading computer ...,Medium
2,Alarms/Emergency Signal - One light not workin...,- Step 1: The deficiency is related to the ala...,Medium


Sample of `metadata df`

In [62]:
meta_df = pd.concat([df, meta_df],axis=1)
meta_df

Unnamed: 0,deficiency,reason,classification,completion_tokens,prompt_tokens,total_tokens
0,Location of emergency installations. Not as re...,The emergency stop switches installed on weath...,Medium,50,401,451
1,The loading computer used for Stability Calcul...,The chief mate installed the loading computer ...,Medium,71,428,499
2,Alarms/Emergency Signal - One light not workin...,- Step 1: The deficiency is related to the ala...,Medium,131,370,501


Realign deficiency number

In [57]:
meta_df.index = meta_df.index + 1
df.index = df.index + 1

Saving as excel 

In [58]:
with pd.ExcelWriter('./data/results.xlsx') as writer:
    df.to_excel(writer, sheet_name='label_deficiency')
    meta_df.to_excel(writer, sheet_name='token_output')