<a href="https://colab.research.google.com/github/nxxk23/AI-Engineer/blob/main/fraud/iclaim.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip -q install langchain langchain_huggingface langchain_community  gradio transformers elasticsearch gradio_calendar

In [2]:
import os
from getpass import getpass
import requests
import os
import json
from huggingface_hub import InferenceClient
from elasticsearch import Elasticsearch
import pandas as pd
from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer
import gradio as gr
import json
from gradio_calendar import Calendar

[diag file](https://drive.google.com/file/d/1zQI2pQvS7sK_9d4j9-kNAzyNYFTizmaE/view?usp=drive_link)

[simb file](https://drive.google.com/file/d/1_YPVzunCFWmxieTXr8BcoJqPEcXvLu4A/view?usp=drive_link)



In [3]:
data = pd.read_csv('/content/drive/MyDrive/AIEngineer/fraud/opd_discharge_August_2024.csv')
simb = pd.read_csv('/content/drive/MyDrive/AIEngineer/fraud/simblist.csv')
diag = pd.read_csv('/content/drive/MyDrive/AIEngineer/fraud/diaglist.csv')

In [4]:
# Elasticsearch credentials
user = "natthaphol.po"
passwords = "cnEM5CeFrG"
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
model = AutoModel.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')
es = Elasticsearch(
    hosts=[{'host': 'elk.manageai.co.th', 'port': 443, 'scheme': 'https'}],
    basic_auth=(user, passwords)
)

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

def rag_encode(text):
    encoded_input = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**encoded_input)
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
    return sentence_embeddings.squeeze().tolist()

def search_exact_match_score(query_text, index_name):
    query_vector = rag_encode(query_text)
    search_query = {
        "size": 10,
        "_source": ["icd10", "total_average_price"],  # Include only required fields
        "query": {
            "script_score": {
                "query": {
                    "match_all": {}
                },
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {
                        "query_vector": query_vector
                    }
                }
            }
        }
    }

    try:
        response = es.search(index=index_name, body=search_query, timeout='120s')
        exact_score_matches = [
            {
                "_source": hit["_source"]
            }
            for hit in response['hits']['hits'] if hit["_score"] == 2.0
        ]

        if exact_score_matches:
            return exact_score_matches
        else:
            print("No documents found with score 2.0.")
            return None

    except Exception as e:
        print(f"Error performing search: {e}")
        return None

def search_similar_vectors(query_text, index_name):
    query_vector = rag_encode(query_text)

    search_query = {
        "size": 1000,
        "_source": [
            "simb_billing_code", "local_billing_name", "item_id", "item_name",
            "average_initial_price", "order_count", "icd10", "range"
        ],
        "query": {
            "script_score": {
                "query": {
                    "match_all": {}
                },
                "script": {
                    "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                    "params": {
                        "query_vector": query_vector
                    }
                }
            }
        }
    }

    try:
        response = es.search(index=index_name, body=search_query, timeout='120s')
        results = []
        for hit in response['hits']['hits']:
            score = hit["_score"]
            if score == 2.0:
                result = {
                    "_index": hit["_index"],
                    "_id": hit["_id"],
                    "_score": score,
                    "_source": hit["_source"]
                }
                results.append(result)

        if results:
            source_data = [hit["_source"] for hit in results]
            df = pd.DataFrame(source_data)
            return df
        else:
            print("No results with score 2.0 found.")
            return None

    except Exception as e:
        print(f"Error performing search: {e}")
        return None

def extract_primary_icd10(diagnosis_str):
    try:
        diagnoses = json.loads(diagnosis_str)
        if isinstance(diagnoses, list):
            for d in diagnoses:
                if isinstance(d, dict) and d.get("dx_type") == "PP":
                    return d.get("icd10")
    except (json.JSONDecodeError, TypeError):
        pass
    return None

data['icd10'] = data['diagnosis'].apply(extract_primary_icd10)

df = data[data['order_item']!='[]'][['cid','patient','illnes_type','is_follow_up','billing','order_item','total_bill_amount','insurance','diagnosis','icd10','chief_complaint']]

### gradio interface

# Mappings for illness type and follow-up status
illness_type_mapping = {
    "ILL": "เจ็บป่วย",
    "ACC": "อุบัติเหตุ",
    "FU": "ผู้ป่วยนอกติดตามอาการหลังจาก Admit เจ็บป่วย",
    "ER": "อุบัติเหตุฉุกเฉินภายใน 24 ชม.",
    "HD": "ผู้ป่วยนอกล้างไต",
    "DEN": "ทันตกรรม"
}

follow_up_mapping = {
    "N": "เข้ารับการรักษาครั้งแรก",
    "Y": "เข้ารักษาแบบต่อเนื่อง"
}

diagnosis_mapping = {
    row['ICD-10 Code']: row['Description'].strip()
    for _, row in diag.iterrows()
}

def populate_patient_info(cid):
    patient_row = df[df["cid"] == cid]
    if not patient_row.empty:
        patient_data = json.loads(patient_row.iloc[0]["patient"])  # Parse the string into a dictionary
        icd10_code = patient_row.iloc[0]["icd10"]
        diagnosis_description = diag[diag['ICD-10 Code'] == icd10_code]['Description'].iloc[0]

        # Call search_exact_match_score for max amount
        search_result = search_exact_match_score(icd10_code, 'internal-manageai-icd10-average_price')
        average_price = search_result[0]['_source']['total_average_price'] if search_result else 0.0

        illness_type_code = patient_row["illnes_type"].iloc[0]
        illness_type_mapped = illness_type_mapping.get(illness_type_code, "อุบัติเหตุ")
        visit_type_mapped = follow_up_mapping.get(patient_row["is_follow_up"].iloc[0], "Unknown")
        chief_complaint = patient_row["chief_complaint"].iloc[0]

        return (
            patient_data.get("title_name", ""),
            patient_data.get("first_name", ""),
            patient_data.get("last_name", ""),
            patient_data.get("hn", ""),
            patient_data.get("dob", ""),
            illness_type_mapped,
            visit_type_mapped,
            chief_complaint,
            diagnosis_description.strip(),
            average_price  # Return max claimable amount
        )
    else:
        return "", "", "", "", "", "", "", "", "", 0.0

sample_cids = df[df['is_follow_up']=='N']['cid'].sample(5).tolist()

def populate_sample_data(cid):
    title_name, first_name, last_name, hn, dob, illness_type, visit_type, chief_complaint, diagnosis_description, average_price = populate_patient_info(cid)
    return cid, title_name, first_name, last_name, hn, dob, illness_type, visit_type, chief_complaint, diagnosis_description, average_price

def update_diagnosis_choices(cid):
    patient_row = df[df["cid"] == cid]
    if not patient_row.empty:
        icd10_code = patient_row['icd10'].iloc[0]
        diagnosis_description = diagnosis_mapping.get(icd10_code, "")
        return diagnosis_description  # Set as single choice and default value
    return ""

def get_simb_code(bill_description):
    description_to_simb = dict(zip(simb['Description'], simb['SIMB_1']))
    return description_to_simb.get(bill_description, "")

def get_billing_table_with_order_items(cid):
    patient_row = df[df["cid"] == cid]
    if not patient_row.empty:
        billing_data = json.loads(patient_row.iloc[0]["billing"])
        order_items_data = json.loads(patient_row.iloc[0]["order_item"])

        billing_df = pd.DataFrame(billing_data)
        order_items_df = pd.DataFrame(order_items_data)

        all_rows = []

        for _, billing_row in billing_df.iterrows():
            matching_items = order_items_df[order_items_df['simb_billing_code'] == billing_row['simb_billing_code']]

            # Add a row for the billing category
            all_rows.append({
                "ลำดับ": len(all_rows) + 1,
                "หมวดหมู่ค่าบริการ": f"{billing_row['simb_billing_code']} {billing_row['local_billing_name']}",
                "รายการ": "",
                "จำนวน": billing_row.get('item_amount', ""),  # Use get to safely access the key
                "จำนวนเงิน(ก่อนหักส่วนลด)": float(billing_row['billing_initial']),
                "ส่วนลด": float(billing_row['billing_discount']),
                "จำนวนเงิน(หลังหักส่วนลด)": float(billing_row['billing_net_amount']),
                "Billing Code": "",
                "SIMB": billing_row['simb_billing_code']
            })

            # Add rows for each item under this category
            for _, item in matching_items.iterrows():
                all_rows.append({
                    "ลำดับ": "",
                    "หมวดหมู่ค่าบริการ": item['item_id'],
                    "รายการ": item['item_name'],
                    "จำนวน": item['item_amount'],  # Access item_amount from order_items_df
                    "จำนวนเงิน(ก่อนหักส่วนลด)": float(item['initial']),
                    "ส่วนลด": float(item['discount']),
                    "จำนวนเงิน(หลังหักส่วนลด)": float(item['net_amount']),
                    "Billing Code": billing_row['local_billing_code'],
                    "SIMB": billing_row['simb_billing_code']
                })

        # Summation of numeric columns
        total_initial = billing_df['billing_initial'].astype(float).sum()
        total_discount = billing_df['billing_discount'].astype(float).sum()
        total_net = billing_df['billing_net_amount'].astype(float).sum()

        all_rows.append({
            "ลำดับ": "",
            "หมวดหมู่ค่าบริการ": "รวม",
            "รายการ": "",
            "จำนวน": "",
            "จำนวนเงิน(ก่อนหักส่วนลด)": total_initial,
            "ส่วนลด": total_discount,
            "จำนวนเงิน(หลังหักส่วนลด)": total_net,
            "Billing Code": "",
            "SIMB": ""
        })

        # Grand total row
        all_rows.append({
            "ลำดับ": "",
            "หมวดหมู่ค่าบริการ": "รวมค่ารักษาพยาบาลทั้งหมด",
            "รายการ": "",
            "จำนวน": "",
            "จำนวนเงิน(ก่อนหักส่วนลด)": total_initial,
            "ส่วนลด": total_discount,
            "จำนวนเงิน(หลังหักส่วนลด)": total_net,
            "Billing Code": "",
            "SIMB": ""
        })

        return pd.DataFrame(all_rows), total_net  # Return both DataFrame and total_net

    return pd.DataFrame(columns=["ลำดับ", "หมวดหมู่ค่าบริการ", "รายการ", "จำนวน", "จำนวนเงิน(ก่อนหักส่วนลด)", "ส่วนลด", "จำนวนเงิน(หลังหักส่วนลด)", "Billing Code", "SIMB"]), 0.0

def update_billing_table_with_order_items(cid):
    billing_df, total_net = get_billing_table_with_order_items(cid)
    return billing_df.values.tolist(), total_net

def calculate_recommended_claim(total_net, remaining_opd_year, total_average_price):
    total_net = float(total_net) if isinstance(total_net, str) else total_net
    remaining_opd_year = float(remaining_opd_year) if isinstance(remaining_opd_year, str) else remaining_opd_year
    total_average_price = float(total_average_price) if isinstance(total_average_price, str) else total_average_price
    if remaining_opd_year >= total_average_price:
        recommended_claim = total_average_price - total_net
    else:
        recommended_claim = remaining_opd_year - total_net

    return recommended_claim

def recommend_drugs_for_patient(patient_icd10_code):
    output = search_similar_vectors(patient_icd10_code, 'internal-manageai-icd10-order')
    if output is not None:
        filtered_output = output[output['simb_billing_code'] == '1.1.1(3)']
        sorted_output = filtered_output.sort_values(by=['order_count'], ascending=False)
        top = sorted_output.drop(columns=['order_count','icd10','range']).head(10)
        return top
    else:
        return "No results found for the given ICD10 code."

def handle_form(
    id_card, title, first_name, last_name, patient_id, birth_date, remaining_opd_year,
    treatment_type, visit_type, chief_complaint, diag_type, diagnosis, total_net
):
    total_net_float = float(total_net)
    remaining_opd_year_float = float(remaining_opd_year) if remaining_opd_year else 0.0
    icd10_code = diagnosis.split(" ")[0] if diagnosis else None

    # Calculate recommended claim
    output = search_exact_match_score(icd10_code, 'internal-manageai-icd10-average_price')
    if output:
        average_price = float(output[0]['_source'].get('total_average_price', 0.0))
        max_claimable = max(0, average_price - total_net_float)
        additional_claim = min(max_claimable, remaining_opd_year_float - total_net_float) if max_claimable > 0 else 0.0
        recommendation_adjustment = f"{'+ ' if additional_claim >= 0 else '- '}{abs(additional_claim):.2f}"
        recommended_claim = total_net_float + additional_claim
        claim_message = f"{recommended_claim:.2f}"
    else:
        recommendation_adjustment = "ไม่สามารถคำนวณได้"
        claim_message = "ไม่มีข้อมูลราคากลางสำหรับการรักษานี้"

    # Search recommended drugs for patient
    recommended_drugs = recommend_drugs_for_patient(icd10_code)
    if isinstance(recommended_drugs, pd.DataFrame):
        recommended_drugs_list = recommended_drugs.values.tolist()
    else:
        recommended_drugs_list = []  # Empty if no results

    return recommendation_adjustment, claim_message, recommended_drugs_list

custom_css = """
.red-border {
    border: 2px solid red !important;
    border-radius: 5px;
    padding: 5px;
}
"""



### Ui

with gr.Blocks(css=custom_css) as demo:
    with gr.Tab("Patient and Claim Information"):
        gr.Markdown("## ข้อมูลผู้ป่วยนอก")
        id_card = gr.Textbox(label="เลขบัตรประชาชน *", placeholder="กรอกเลขบัตรประชาชนของผู้ป่วย", elem_classes="red-border")
        sample_cid_buttons = gr.Radio(label="เลือกตัวอย่าง CID", choices=sample_cids, type="value")
        with gr.Row():
            title = gr.Dropdown(
                label="คำนำหน้า",
                choices=["คุณ", "นาง", "นางสาว", "นาย", "Mr", "Ms", "Miss", "Mrs", "เด็กชาย", "เด็กหญิง"]
            )
            first_name = gr.Textbox(label="ชื่อ")
            last_name = gr.Textbox(label="นามสกุล")
        with gr.Row():
            patient_id = gr.Textbox(label="รหัสผู้ป่วย (HN)")
            birth_date = Calendar(label="วัน/เดือน/ปีเกิด (ค.ศ.)")

        gr.Markdown("### ข้อมูลส่งเคลม")

        with gr.Row():
            treatment_type = gr.Dropdown(
                label="ประเภทการรักษา",
                value="",
                choices=list(illness_type_mapping.values())
            )
            visit_type = gr.Radio(
                label="ประเภทการเข้ารักษา",
                choices=list(follow_up_mapping.values())
            )
        chief_complaint = gr.Textbox(label="อาการสำคัญที่เข้ามาโรงพยาบาล", value="")

        gr.Markdown("#### Diagnosis Type")
        with gr.Row():
            diag_type = gr.Dropdown(
                label="Diagnosis Type",
                choices=["โรคหลักที่ให้การรักษา (PP)", "โรคประจำตัว (UD)", "การวินิจฉัยร่วม (CM)", "โรคแทรก (CP)", "โรคอื่นๆ (OT)"],
                value='โรคหลักที่ให้การรักษา (PP)'
            )
            diagnosis = gr.Dropdown(
                label="Diagnosis (ICD10) *",
                choices=list(diagnosis_mapping.values()),
                value=""
            )

        gr.Markdown("#### รายการค่ารักษาพยาบาล")
        billing_table = gr.DataFrame(
            headers=["ลำดับ", "หมวดหมู่ค่าบริการ", "รายการ", "จำนวน", "จำนวนเงิน(ก่อนหักส่วนลด)", "ส่วนลด", "จำนวนเงิน(หลังหักส่วนลด)", "Billing Code", "SIMB"],
            interactive=True
        )

    # with gr.Tab("Insurance Policy Information"):
    #     gr.Markdown("## ข้อมูลกรมธรรม์ที่ใช้")
    #     with gr.Row():
    #         service_type = gr.Dropdown(label="ประเภทบริการ", value="", choices=list(illness_type_mapping.values()))
    #         policy_type = gr.Textbox(label="ประเภทกรมธรรม์", value="ประกันรายบุคคล")
    #     with gr.Row():
    #         transaction_id = gr.Textbox(label="หมายเลขธุรกรรม", value="6bf7e99a-9551-454a-9805-beb8591e110f")
    #         ref_number = gr.Textbox(label="เลขที่อ้างอิงจากบริษัทประกัน", value="9ba6592f-0080-480d-8143-9e5d87a10a39")

    #     gr.Markdown("### ผลประโยชน์การรักษาผู้ป่วย")
    #     with gr.Row():
    #         policy_number = gr.Textbox(label="หมายเลขกรมธรรม์ที่มีผลบังคับ", value="W07776316")
    #         insured_name = gr.Textbox(label="ชื่อผู้เอาประกัน", value="Shirley Johnston")
    #     with gr.Row():
    #         insurance_plan_code = gr.Textbox(label="รหัสแผนประกัน", value="Precious Care")
    #         coverage_type = gr.Textbox(label="ประเภทความคุ้มครอง", value="OPD/IPD")
    #     with gr.Row():
    #         policy_start_date = Calendar(label="วันเวลาที่กรมธรรม์มีผลบังคับ", value="")
    #         policy_end_date = Calendar(label="วันเวลาที่สิ้นสุดความคุ้มครอง", value="")
    #     eligibility_status = gr.Textbox(label="สถานะการใช้สิทธิ์", value="มีสิทธิ์")
    #     with gr.Row():
    #         max_opd_year = gr.Textbox(label="จำนวนเงิน OPD ไม่เกินต่อปี", value=12000)

    with gr.Tab("Recommend Claim"):
        gr.Markdown("#### จำนวนเงินที่สามารถเบิกได้")
        with gr.Row():
            total_net = gr.Number(label="รวมค่ารักษาพยาบาลในการส่งยอดเคลมครั้งนี้ (บาท) ", interactive=False)
            average_price = gr.Number(label="จำนวนเบิกได้สูงสุดสำหรับโรคนี้ (บาท)", interactive=False)

        id_card.change(
            fn=update_billing_table_with_order_items,
            inputs=[id_card],
            outputs=[billing_table, total_net]
        )

        remaining_opd_year = gr.Textbox(label="จำนวนเงิน OPD คงเหลือ *", placeholder="กรอกจำนวนเงิน OPD คงเหลือ", elem_classes="red-border")

        with gr.Row(elem_classes="red-border"):
            recommendation_adjustment = gr.Textbox(label="แนะนำให้เบิกเพิ่ม(+)/ลด(-) (บาท)", interactive=False, visible=True)
            final_result = gr.Textbox(label="สามารถแนะนำให้เบิกได้สูงสุดสำหรับครั้งนี้ (บาท)", interactive=False, visible=True)  # เพิ่มช่องนี้

        id_card.change(
            fn=populate_patient_info,
            inputs=[id_card],
            outputs=[title, first_name, last_name, patient_id, birth_date, treatment_type, visit_type, chief_complaint, diagnosis, average_price]
        )

        id_card.change(
            fn=update_diagnosis_choices,
            inputs=[id_card],
            outputs=[diagnosis]
        )

        sample_cid_buttons.change(
            fn=populate_sample_data,
            inputs=[sample_cid_buttons],
            outputs=[id_card, title, first_name, last_name, patient_id, birth_date, treatment_type, visit_type, chief_complaint, diagnosis, average_price]
        )

        gr.Markdown("##### รายการยาที่มักใช้ในผู้ป่วยโรคเดียวกัน")
        recommended_drugs_table = gr.DataFrame(
            headers=["SIMB", "Billing Name", "Item ID", "Item Name", "Average Price"], interactive=False)

        submit_button = gr.Button("Submit")
        submit_button.click(
            fn=handle_form,
            inputs=[id_card, title, first_name, last_name, patient_id, birth_date, remaining_opd_year,
                    treatment_type, visit_type, chief_complaint, diag_type, diagnosis, total_net],
            outputs=[recommendation_adjustment, final_result, recommended_drugs_table]
        )

        remaining_opd_year.submit(
            fn=handle_form,
            inputs=[id_card, title, first_name, last_name, patient_id, birth_date, remaining_opd_year,
                    treatment_type, visit_type, chief_complaint, diag_type, diagnosis, total_net],
            outputs=[recommendation_adjustment, final_result, recommended_drugs_table]
        )

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]



In [5]:
demo.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://123236b7afbbd9e3f1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  response = es.search(index=index_name, body=search_query, timeout='120s')
  response = es.search(index=index_name, body=search_query, timeout='120s')


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://123236b7afbbd9e3f1.gradio.live


