In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel

base_model_id = "/home/st426/system/global_graph/Biomistral-Calme-Instruct-7b"
adapter_path = "./lora-biomistral-neural-invasion"

# 載 tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, use_fast=False)

# 載 base model (4bit 量化)
from transformers import BitsAndBytesConfig
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    torch_dtype=torch.float16,
)


model = PeftModel.from_pretrained(model, adapter_path)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)  


  warn(
`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Device set to use cuda:0


In [None]:
INSTRUCTION = """You are a pathology report coding assistant.
Task: Select one sentence from the pathology report that states perineural invasion.

Output format (JSON only):
{
  "reason": "<short reason, one sentence only>",
  "final_code": "<0/1/7/8/9>"
}

Codes:
0 = Not identified
1 = Present
7 = Not mentioned / not evaluable
8 = Not applicable (CIS / lymphoma / CNS tumor / etc.)
9 = Unknown
""" 

test_input = """Pathologic diagnosis: Breast  right  total mastectomy --- Infiltrating duct carcinoma  grade 2 (Nottingham histologic score: Tubular formation: 3 points  Nuclear pleomorphism: 2 points  Mitotic count: 2 points  total score: 7/9)  with micropapillary features. The nipple  skin  and deep fascia are free of tumor. Prognostic and predictive factor: 1. The largest size of invasive carcinoma: 15 mm. 2. Angiolymphatic permeation: Present. 3. Perineural invasion: Not identified. 4. Tumor focality: Single focus of invasive carcinoma. 5. Margins: Margins uninvolved by invasive carcinoma  distance from closest margin: 21 mm in deep fasia. 6. Microcalcifications: Not identified. 7. Lymph node status (combined the result of frozen section): No lymph node submitted or found. 8. Extranodal involvement: Not applicable. 9. Architectural pattern of DCIS: Not applicable. 10. Nuclear grade of DCIS: Not applicable. 11. Necrosis of DCIS: Not applicable. 12. Extensive intraductal component: Not applicable. 13. ER status: See previous report (S1104360). 14. PR status: See previous report (S1104360). 15. Her-2/neu status: See previous report (S1104360). 16. Ki-67 labeling index: See previous report (S1104360). 17. Treatment effect (response to presurgical/neoadjuvant therapy): Not applicable. 18. Pathological TNM stage: pT1cNx (According to the eighth edition  American Joint Committee on Cancer Staging Guidelines for Tumors). 19. TNM descriptors: Not applicable. Gross description: The specimen consists of a breast  16x15x3 cm and 500 gm. The elliptical skin  15x6 cm  has a nipple at its center  which is not unusual grossly. There is a stitch  labeled the 3 o'clock aspect  and 2 stitches  labeled the 12 o'clock aspect. A region with blue dye  4x1.5 cm  is seen at the skin. On serial cut  part of a grayish firm tumor  2x2x1 cm  is noted  1 cm from the nipple at 1 o'clock direction and 2.3 cm to the deep margin. The tumor is uncircumscribed and unencapsulated  infiltrating into surrounding breast tissue. The remaining breast tissue has no remarkable change. The deep fascia is unremarkable. Representative parts for section: A-B) deeo fascia C-E) tumor F) non tumor part breast parenchyma G) nipple and skin H) axillary soft tissue. Note: 1. The type of specimen fixation and processing: Formalin-fixed paraffin-embedded sections. 2. The antibody clone/vendor and detection system used: ER: Monoclonal/Ventana (clone: SP1  lot: D06729); PR: Monoclonal/Ventana (clone: 1E2  lot: D03286); Her-2/neu: Monoclonal/Ventana (clone: 4B5  lot: D07696). 3. Type of method: LSAB. 4. HER-2/neu testing is fixed in 10% neutral buffered formalin for at least 6 hours and no longer than 72 hours. 5. The IHC staining result of HER-2/neu is according to ASCO/CAP Scoring criteria (2018): a. Positive cases (Score:3+) are those with circumferential membrane staining that is complete  and within more than 10% of tumor cells. b. Equivocal or indeterminate cases (Score:2+) are those with weak to moderate complete membrane staining observed in >10% of tumor cells. c. Negative cases are defined as those with no staining is observed or membrane staining that is incomplete and is faint/barely perceptible and within less than or equal to 10% of tumor cells (Score: 0); incomplete membrane staining that is faint/barely perceptible and within more than 10% of tumor cells (Score: 1). 6. This test was developed and its performance characteristics determined by Department of Pathology of Taichung Veterans General Hospital. It has been approved by the U.S. Food and Drug Administration. 7. This case has been peer reviewed by two doctors. #T-04925 #M-85003_2 2 1120 00000F"""

prompt = f"{INSTRUCTION}\n\nInput:\n{test_input}\n\nOutput:\n"

# 產生推理結果
result = pipe(
    prompt,
    max_new_tokens=32,             
    temperature=0.0,                
    do_sample=False,
    eos_token_id=tokenizer.eos_token_id  
)[0]["generated_text"]


print(result)


You are a pathology report coding assistant.
Task: Select one sentence from the pathology report that states perineural invasion.
If none, return "NONE".

Output format (JSON only):
{
  "reason": "<short reason, one sentence only>",
  "final_code": "<0/1/7/8/9>"
}

Codes:
0 = Not identified
1 = Present
7 = Not mentioned / not evaluable
8 = Not applicable (CIS / lymphoma / CNS tumor / etc.)
9 = Unknown


Input:
Pathologic diagnosis: Breast  right  total mastectomy --- Infiltrating duct carcinoma  grade 2 (Nottingham histologic score: Tubular formation: 3 points  Nuclear pleomorphism: 2 points  Mitotic count: 2 points  total score: 7/9)  with micropapillary features. The nipple  skin  and deep fascia are free of tumor. Prognostic and predictive factor: 1. The largest size of invasive carcinoma: 15 mm. 2. Angiolymphatic permeation: Present. 3. Perineural invasion: Not identified. 4. Tumor focality: Single focus of invasive carcinoma. 5. Margins: Margins uninvolved by invasive carcinoma  d