In [None]:
!git clone https://github.com/k-ganda/NJIA.git

Cloning into 'NJIA'...
remote: Enumerating objects: 40, done.[K
remote: Counting objects: 100% (40/40), done.[K
remote: Compressing objects: 100% (35/35), done.[K
remote: Total 40 (delta 6), reused 21 (delta 1), pack-reused 0 (from 0)[K
Receiving objects: 100% (40/40), 5.19 MiB | 11.47 MiB/s, done.
Resolving deltas: 100% (6/6), done.


In [None]:
# Change the current working directory to the 'NJIA' repository
%cd NJIA

# Verify your current working directory
!pwd

# List the contents of the repository to confirm you are in the right place
!ls

/content/NJIA
/content/NJIA
2_preprocessing   4_clinical_extractions.json  new_colab_file.txt
3_medASR_outputs  input_audio		       README.md


In [None]:
import json
from datetime import datetime
from pprint import pprint


In [None]:
with open("4_clinical_extractions.json", "r") as f:
    clinical_data = json.load(f)

len(clinical_data)

4

In [None]:
def empty_p3_record():
    return {
        "facility_details": {
            "facility_name": None,
            "examiner_name": None,
            "exam_date": None
        },
        "survivor_statement_summary": None,
        "history_of_assault": {
            "timing": None,
            "mechanism": None,
            "repeated_assault": None,
            "drug_facilitated_suspected": None
        },
        "physical_examination": {
            "injuries_observed": [],
            "injury_locations": [],
            "injury_age_estimate": None
        },
        "clinical_opinion": {
            "consistency_with_history": "To be determined by clinician",
            "degree_of_force": None
        },
        "limitations_and_uncertainty": [],
        "clinician_review_required": True
    }


In [None]:
def map_to_p3(audio_id, extracted):
    p3 = empty_p3_record()

    p3["facility_details"]["exam_date"] = datetime.today().strftime("%Y-%m-%d")

    p3["history_of_assault"]["timing"] = extracted.get("timing_of_assault")
    p3["history_of_assault"]["mechanism"] = extracted.get("mechanism_of_injury")
    p3["history_of_assault"]["repeated_assault"] = extracted.get("repeated_assault")

    drug_flag = extracted.get("drug_facilitated_indicators")
    p3["history_of_assault"]["drug_facilitated_suspected"] = (
        "yes" if drug_flag not in ["no", None] else "no"
    )

    p3["physical_examination"]["injuries_observed"] = extracted.get("injury_type", [])
    p3["physical_examination"]["injury_locations"] = extracted.get("body_location", [])
    p3["physical_examination"]["injury_age_estimate"] = extracted.get("injury_color_or_stage")

    p3["limitations_and_uncertainty"] = extracted.get("survivor_uncertainty_notes", [])

    return {
        "audio_id": audio_id,
        "p3_pre_fill": p3
    }


In [None]:
p3_records = []

for item in clinical_data:
    mapped = map_to_p3(
        audio_id=item["audio_id"],
        extracted=item["clinical_facts"]
    )
    p3_records.append(mapped)


In [None]:
OUTPUT_FILE = "5_p3_pre_filled_records.json"

with open(OUTPUT_FILE, "w") as f:
    json.dump(p3_records, f, indent=2)

OUTPUT_FILE


'5_p3_pre_filled_records.json'

In [None]:
print("Example P3 Pre-filled Record:\n")
pprint(p3_records[3])


Example P3 Pre-filled Record:

{'audio_id': 'case2.wav',
 'p3_pre_fill': {'clinical_opinion': {'consistency_with_history': 'To be '
                                                                  'determined '
                                                                  'by '
                                                                  'clinician',
                                      'degree_of_force': None},
                 'clinician_review_required': True,
                 'facility_details': {'exam_date': '2026-01-21',
                                      'examiner_name': None,
                                      'facility_name': None},
                 'history_of_assault': {'drug_facilitated_suspected': 'no',
                                        'mechanism': 'Grab',
                                        'repeated_assault': None,
                                        'timing': 'Sudden'},
                 'limitations_and_uncertainty': "I couldn't breathe."

In [None]:
import os

OUTPUT_DIR = "." # Define OUTPUT_DIR as the current directory
readme_path = os.path.join(OUTPUT_DIR, "README.md")

# Example content for the README.md file
readme_content = """
## ## ## Forensic Clinical Formatting (P3 Mapping)

This stage maps extracted clinical facts into a structured, medico-legal format aligned with the Kenya Police P3 medical examination form.

### Objective
To reduce clinician documentation burden while improving forensic rigor by pre-filling standardized sections of the P3 form using AI-extracted clinical facts.

---

### Design Approach
- Extracted facts are mapped to P3-equivalent sections:
  - History of assault
  - Physical examination findings
  - Injury timing and mechanism
- The system does not make diagnoses or legal conclusions
- All outputs are explicitly marked as **pre-filled drafts** requiring clinician review

---

### Human-in-the-Loop Safeguards
- Survivor uncertainty is preserved verbatim
- Clinical opinion fields are left blank
- Final validation is performed by a licensed clinician

---

### Output Artifact



"""

with open(readme_path, "w") as f:
    f.write(readme_content)

print(f"README.md created/updated at: {readme_path}")

README.md created/updated at: ./README.md
