In [48]:
import requests
import os
import urllib3
import json
from enum import Enum
from pydantic import BaseModel, Field

In [49]:
def get_text(file_path):
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    headers = {
        'Authorization': 'Bearer YOUR_TOKEN_HERE',
    }

    try:
        with open(file_path, 'rb') as file:
            files = {
                'file': (os.path.basename(file_path), file, 'application/pdf')
            }

            response = requests.post('https://grupmedai-api-des.itcomb.cat/pdf/text', headers=headers, files=files, verify=False)

            if response.status_code == 200:
                response_data = response.json()
                if 'content' in response_data:
                    text = response_data['content']
                    return text
                else:
                    print("Error: 'content' key not found in the response.")
                    return response_data
            else:
                print(f"Error: Received status code {response.status_code}")
                print(f"Response: {response.json()}")
                return None

    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [50]:
# get_text(r"C:\Users\Sophie\Tagging code\documents\HISTORIAL_MÈDIC.pdf")

In [51]:
def get_doc_tags_1(doc_path,model):

    text = get_text(doc_path)
   
    class TagsEnum(str, Enum):
        medical = 'Medical'
        legal = 'Legal'
        email = 'Email'

    class DocTags(BaseModel):
        tag: TagsEnum = Field(..., description="Tag of the document")
        confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in tag assignment")

   
    #print("json schema", DocTags.model_json_schema())

     # Define API endpoint
    api_url = "http://ollama-api-des.itcomb.cat/api/chat"  # Default Ollama API endpoint

    prompt = f'What category is the best fit for this document:\n{text}\n\n'
    test_prompt =  f"""
    You are classifying the type of document based on its overall purpose and context, not just the words used.

    Available categories:
    - Medical: primarily about diagnoses, prescriptions, patient care, or insurance.
    - Legal: primarily about rights, obligations, regulations, or formal agreements.
    - Email: informal or formal communication, regardless of content, especially with greeting lines, sender/receiver info, and signatures.

    Document:\n
    {text}\n\n

    Instructions:
    - Respond with a JSON object in this format:
    {{
     "tag": "Medical",  // One of: Medical, Legal, Email
     "confidence": 0.92     // A number between 0.0 and 1.0

     - The confidence should reflect how certain you are, based on clarity, keywords, and structure.
     - Do not include any extra explanation or text."
    }}

    """

    # Prepare the payload
    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": test_prompt
            }
        ],
        "format": DocTags.model_json_schema(),
        "stream": False
    }
   
    # Make the API request
    response = requests.post(api_url, json=payload, verify=False)
    #print(response.text)
    response_data = response.json()
   
    # Parse the response
    tag = DocTags.model_validate_json(response_data["message"]["content"])
    return tag

In [52]:
tag_test = get_doc_tags_1(r"C:\Users\Sophie\Tagging code\documents\HISTORIAL_MÈDIC.pdf",model="qwen2.5:32b")
print(tag_test)
print("Tag:",tag_test.tag.value)
print("Confidence:",tag_test.confidence)

tag=<TagsEnum.medical: 'Medical'> confidence=0.98
Tag: Medical
Confidence: 0.98


In [53]:
def get_doc_tags_2(doc_path,category,model):

    text = get_text(doc_path)

    if category == 'Medical':

        class MedicalSubTags(str,Enum):
            reports = 'Reports'
            medical_history = 'Medical History'
            medical_certificate = 'Medical Certificate'
            initial_clinical_diagnosis = 'Initial Clinical Diagnosis'
            sequelae_stabilization = 'Sequelae Stabilization'
            other = 'Other'
        
        class MedicalTag(BaseModel):
            sub_tag: MedicalSubTags = Field(...,description='Type of medical document')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")

        model_class = MedicalTag

    elif category == 'Legal':

        class LegalSubTags(str,Enum):
            acts = 'Acts'
            receipt = 'Acknowledgment of Receipt'
            resolutions = 'Resolutions or Closure'
            notices = 'Notices'
            reports = 'Reports'
            deposits = 'Guarantees or Deposits'
            administrative = 'Administrative'
            prelim_proceedings = 'Preliminary Proceedings'
            payments = 'Payments or Compensations'
            appeal = 'Appeal'
            power_attorney = 'Power of Attorney'
            sentence = 'Sentence'
            other = 'Other'

        class LegalTag(BaseModel):
            sub_tag: LegalSubTags = Field(...,description='Type of legal document')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")
        
        model_class = LegalTag
    
    elif category == 'Email':

        class EmailSubTags(str,Enum):
            legal = 'Legal'
            medical = 'Medical'
        
        class EmailTag(BaseModel):
            sub_tag: EmailSubTags = Field(...,description='Type of email')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")
        
        model_class = EmailTag
    
    schema = model_class.model_json_schema()
    #expected_field = 'sub_tag'

    api_url = "http://ollama-api-des.itcomb.cat/api/chat"

    prompt = f'''
    This document was categorized as {category}. What is its specific type?
    
    Instructions:
    - Respond with a JSON object in this format:
    {{
     "sub_tag": "...",  // One of the defined sub-tags
     "confidence": 0.87     // A number between 0.0 and 1.0

     - The confidence should reflect how certain you are, based on clarity, keywords, and structure.
     - Do not include any extra explanation or text."
    }}

    Document:
    \n{text}\n\n'

    '''

    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": prompt
            }
        ],
        "format": schema,
        "stream": False
    }

    response = requests.post(api_url, json=payload, verify=False)
    #print(response.text)
    response_data = response.json()
   
    # Parse the response
    tag = model_class.model_validate_json(response_data["message"]["content"])
    return tag

In [54]:
def get_doc_tags_2_iter2(doc_path,category,model):

    text = get_text(doc_path)

    if category == 'Medical':

        class MedicalSubTags(str,Enum):
            reports = 'Reports'
            medical_history = 'Medical History'
            medical_certificate = 'Medical Certificate'
            initial_clinical_diagnosis = 'Initial Clinical Diagnosis'
            sequelae_stabilization = 'Sequelae Stabilization'
            other = 'Other'
        
        class MedicalTag(BaseModel):
            sub_tag: MedicalSubTags = Field(...,description='Type of medical document')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")

        model_class = MedicalTag

        content = f"""
        This document was categorized as {category}. What is its specific type?

        - Reports: Summaries or findings from clinical evaluations or diagnostics.
        - Medical History: Records of a patient’s past health conditions or treatments.
        - Medical Certificate: Formal statements issued by a doctor for administrative/legal purposes.
        - Initial Clinical Diagnosis: First clinical assessment of a condition.
        - Sequelae Stabilization: Documents related to the stabilization of after-effects of prior medical issues.
        - Other: Any medical document that doesn’t fit the above.

        Instructions:
        - Respond with a JSON object in this format:
        {{
        "sub_tag": "...",  // One of the defined sub-tags
        "confidence": 0.87     // A number between 0.0 and 1.0

        - The confidence should reflect how certain you are, based on clarity, keywords, and structure.
        - Do not include any extra explanation or text."
        }}

        Document:\n
        {text}\n\n
        """

    elif category == 'Legal':

        class LegalSubTags(str,Enum):
            acts = 'Acts'
            receipt = 'Acknowledgment of Receipt'
            resolutions = 'Resolutions or Closure'
            notices = 'Notices'
            reports = 'Reports'
            deposits = 'Guarantees or Deposits'
            administrative = 'Administrative'
            prelim_proceedings = 'Preliminary Proceedings'
            payments = 'Payments or Compensations'
            appeal = 'Appeal'
            power_attorney = 'Power of Attorney'
            sentence = 'Sentence'
            other = 'Other'

        class LegalTag(BaseModel):
            sub_tag: LegalSubTags = Field(...,description='Type of legal document')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")
        
        model_class = LegalTag

        content = f"""
        This document was categorized as {category}. What is its specific type?

        - Acts: Formal legislative or regulatory documents.
        - Acknowledgment of Receipt: Confirmations that a party has received a document or notice.
        - Resolutions or Closure: Documents marking the end or resolution of a legal process or case.
        - Notices: Formal communications informing parties of legal procedures or rights.
        - Reports: Legal assessments or statements generated during proceedings or investigations.
        - Guarantees or Deposits: Documents related to financial sureties or collateral.
        - Administrative: Internal or procedural legal communications.
        - Preliminary Proceedings: Initial steps taken in a legal case or investigation.
        - Payments or Compensations: Documents involving settlements or financial reimbursements.
        - Appeal: Requests for review or reconsideration of a prior legal decision.
        - Power of Attorney: Documents granting legal authority to act on another’s behalf.
        - Sentence: Final decisions or rulings issued by a court or authority.
        - Other: Any legal document that does not clearly fit into the above categories.

        Instructions:
        - Respond with a JSON object in this format:
        {{
        "sub_tag": "...",  // One of the defined sub-tags
        "confidence": 0.87     // A number between 0.0 and 1.0

        - The confidence should reflect how certain you are, based on clarity, keywords, and structure.
        - Do not include any extra explanation or text."
        }}

        Document:\n
        {text}\n\n
        """
    
    elif category == 'Email':

        class EmailSubTags(str,Enum):
            legal = 'Legal'
            medical = 'Medical'
        
        class EmailTag(BaseModel):
            sub_tag: EmailSubTags = Field(...,description='Type of email')
            confidence: float = Field(...,ge=0.0,le=1.0,description="Confidence in sub-tag assignment")
        
        model_class = EmailTag

        content = f"""
        This document was categorized as {category}. What is its specific type?

        - Legal: The email discusses or contains legal matters, contracts, notices, or procedures.
        - Medical: The email contains or references medical content, such as diagnoses, treatments, or health records.

        Instructions:
        - Respond with a JSON object in this format:
        {{
        "sub_tag": "...",  // One of the defined sub-tags
        "confidence": 0.87     // A number between 0.0 and 1.0

        - The confidence should reflect how certain you are, based on clarity, keywords, and structure.
        - Do not include any extra explanation or text."
        }}

        Document:\n
        {text}\n\n
        """
    
    schema = model_class.model_json_schema()
    #expected_field = 'sub_tag'

    api_url = "http://ollama-api-des.itcomb.cat/api/chat"

    payload = {
        "model": model,
        "messages": [
            {
                "role": "user",
                "content": content
            }
        ],
        "format": schema,
        "stream": False
    }

    response = requests.post(api_url, json=payload, verify=False)
    #print(response.text)
    response_data = response.json()
   
    # Parse the response
    tag = model_class.model_validate_json(response_data["message"]["content"])
    return tag

In [55]:
def get_doc_tags_3(doc_path, category, sub_tag, model):
    text = get_text(doc_path)

    sub_subtag_map = {
        "Medical": {
            "Medical History": [
                "Authorization Clinical History", "Clinical History", "Petition Clinical History"
            ],
            "Reports": [
                "Expanded Report", "Forensic Report", "External Expert Report", "Internal Expert Report", "Preliminary Report", "VDC Report"
            ],
            "Other": ["Other"]
        },
        "Legal": {
            "Acknowledgment of Receipt": ["Accusation of Receipt"],
            "Acts": ["Record of previous hearing", "Commission minutes", "Act of conciliation", "Act of preliminary diligences"],
            "Administrative": [
                "Citation", "Response to the demand", "Testimonial statement", "Demand", "Complaint", "Designate",
                "Location", "Doctor’s file", "Minute", "Notification", "Refusal", "Requirement"
            ],
            "Appeal": ["Opposition Appeal", "Appeal"],
            "Guarantees or Deposits": ["Certificate of insurance", "Incident communication", "Bail"],
            "Notices": ["Notice of previous hearing", "Notice of judgment"],
            "Other": ["Other", "Complementary documents"],
            "Payments of compensations": ["Proof of payment", "Receipt", "Collection", "Cost assessment"],
            "Power of Attorney": ["Power of attorney"],
            "Reports": ["Attested", "Medical Assessment report", "Claim report", "Resolution letter", "Lawyer’s report"],
            "Resolutions or Closure": ["Archive/dismissal", "Decree", "Conversation", "Administrative resolution"],
            "Sentence": ["Appeal sentence", "Judgment of cassation", "Judgment of first instance"]
        }
    }

    if category in sub_subtag_map and sub_tag in sub_subtag_map[category]:
        options = sub_subtag_map[category][sub_tag]
        confidence_default = None 
    else:
        options = [sub_tag]
        confidence_default = 1.0 #fallback

    enum_name = f"{category}_{sub_tag}_Enum".replace(" ", "_").replace("/", "_")
    SubSubTagEnum = Enum(enum_name, {opt.replace(" ", "_").replace("/", "_"): opt for opt in options})

    class SubSubTagResult(BaseModel):
        sub_sub_tag: SubSubTagEnum = Field(..., description="Specific type of document within this sub-tag")
        confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence in the sub-sub-tag classification")

    #Fallback
    if confidence_default is not None:
        return SubSubTagResult(sub_sub_tag=SubSubTagEnum[options[0].replace(" ", "_").replace("/", "_")], confidence=confidence_default)

    formatted_options = "\n".join([f"- {o}" for o in options])
    prompt = f"""
    This document was categorized as '{category}' → '{sub_tag}'.
    What is the specific sub-sub-type of this document?

    Available options:
    {formatted_options}

    Instructions:
    - Choose the best fitting sub-sub-tag based on the document content.
    - Return a JSON object with fields "sub_sub_tag" and "confidence" (between 0.0 and 1.0).
    - Example:
    {{
        "sub_sub_tag": "Claim report",
        "confidence": 0.91
    }}

    Document:
    {text}
    """

    api_url = "http://ollama-api-des.itcomb.cat/api/chat"
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "format": SubSubTagResult.model_json_schema(),
        "stream": False
    }

    response = requests.post(api_url, json=payload, verify=False)
    response_data = response.json()
    result = SubSubTagResult.model_validate_json(response_data["message"]["content"])
    return result

In [56]:
path = r"C:\Users\Sophie\Tagging code\documents\HISTORIAL_MÈDIC.pdf"
tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Medical (Confidence: 0.98)
Sub-tag: Medical History (Confidence: 0.95)
Sub-tag alternative: Medical History (Confidence: 0.95)
----------------------------------------------------------------------------------------
Sub-sub-tag: Clinical History (Confidence: 0.97)
Sub-sub-tag with alternative: Clinical History (Confidence: 0.98)


In [57]:
path = r"C:\Users\Sophie\Tagging code\documents\diagnostic_clinic_inicial.pdf"
tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Medical (Confidence: 0.98)
Sub-tag: Medical History (Confidence: 0.95)
Sub-tag alternative: Initial Clinical Diagnosis (Confidence: 0.92)
----------------------------------------------------------------------------------------
Sub-sub-tag: Clinical History (Confidence: 0.97)
Sub-sub-tag with alternative: Initial Clinical Diagnosis (Confidence: 1.0)


In [58]:
path = r"C:\Users\Sophie\Tagging code\documents\claim_medical_malpractice.pdf"
tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Legal (Confidence: 0.95)
Sub-tag: Sentence (Confidence: 0.92)
Sub-tag alternative: Reports (Confidence: 0.89)
----------------------------------------------------------------------------------------
Sub-sub-tag: Judgment of first instance (Confidence: 0.68)
Sub-sub-tag with alternative: Claim report (Confidence: 0.97)


In [59]:
path = r"C:\Users\Sophie\Tagging code\documents\email_legal.pdf"

tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Legal (Confidence: 0.95)
Sub-tag: Notices (Confidence: 0.92)
Sub-tag alternative: Preliminary Proceedings (Confidence: 0.95)
----------------------------------------------------------------------------------------
Sub-sub-tag: Notice of previous hearing (Confidence: 0.75)
Sub-sub-tag with alternative: Preliminary Proceedings (Confidence: 1.0)


In [60]:
path = r"C:\Users\Sophie\Tagging code\documents\doctor_file.pdf"

tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Medical (Confidence: 0.95)
Sub-tag: Medical History (Confidence: 0.87)
Sub-tag alternative: Other (Confidence: 0.95)
----------------------------------------------------------------------------------------
Sub-sub-tag: Clinical History (Confidence: 0.75)
Sub-sub-tag with alternative: Other (Confidence: 0.97)


In [61]:
path = r"C:\Users\Sophie\Tagging code\documents\PublicWaterMassMailing.pdf"

tag_1 = get_doc_tags_1(path,model="qwen2.5:32b")
print(f"Tag: {tag_1.tag.value} (Confidence: {tag_1.confidence})")
tag_2 = get_doc_tags_2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f"Sub-tag: {tag_2.sub_tag.value} (Confidence: {tag_2.confidence})")
tag_2_2 = get_doc_tags_2_iter2(path,category=tag_1.tag.value,model="qwen2.5:32b")
print(f'Sub-tag alternative: {tag_2_2.sub_tag.value} (Confidence: {tag_2_2.confidence})')
print('----------------------------------------------------------------------------------------')
sub_tag = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag: {sub_tag.sub_sub_tag.value} (Confidence: {sub_tag.confidence})")
sub_tag_2 = get_doc_tags_3(path,category=tag_1.tag.value, sub_tag = tag_2_2.sub_tag.value, model="qwen2.5:32b")
print(f"Sub-sub-tag with alternative: {sub_tag_2.sub_sub_tag.value} (Confidence: {sub_tag_2.confidence})")

Tag: Email (Confidence: 0.75)
Sub-tag: Medical (Confidence: 0.15)
Sub-tag alternative: Legal (Confidence: 0.35)
----------------------------------------------------------------------------------------
Sub-sub-tag: Medical (Confidence: 1.0)
Sub-sub-tag with alternative: Legal (Confidence: 1.0)
