In [1]:
!pip install langchain
!pip install transformers
!pip install accelerate
!pip install bitsandbytes
!pip install --upgrade pip
!pip install --upgrade langchain
!pip install langchain_community
!pip list | grep langchain
!pip list | grep langchain_community

!pip install -U langchain-huggingface

[0mlangchain                 0.2.12
langchain-community       0.2.11
langchain-core            0.2.29
langchain-huggingface     0.0.3
langchain-text-splitters  0.2.2
[0m

In [2]:
import os
import langchain

### prompts
from langchain import PromptTemplate, LLMChain

### models
# from langchain.llms import HuggingFacePipeline
# from langchain.embeddings import HuggingFaceInstructEmbeddings


import torch
import transformers
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline
)


#model = "/kaggle/input/llama-2/pytorch/7b-chat-hf/1"
#model = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
model = "meta-llama/Meta-Llama-3-8B"
# model = "Undi95/Meta-Llama-3-8B-hf"

tokenizer = AutoTokenizer.from_pretrained(model)

        
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = "nf4",
    bnb_4bit_compute_dtype = torch.float16,
    bnb_4bit_use_double_quant = True,
)

model_llama = AutoModelForCausalLM.from_pretrained(
    model,
    quantization_config = bnb_config,
    device_map = 'auto',
    do_sample=False,
    token='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'
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [3]:
import json
import numpy as np
import torch

def read_question_answer_file(file_path):
    """Reads a JSONL file with question-answer data and returns a list of dictionaries."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line))  # Parse each line as JSON
    return data

# Load your dataset

dataset_path = 'med_data/phrases_no_exclude_train.jsonl'  # Replace with the path to your JSON file
questions_data = read_question_answer_file(dataset_path)

print(questions_data[0]['question'])
print(questions_data[0]['answer_idx'])
print(questions_data[0]['options'])

A 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?
D
{'A': 'Ampicillin', 'B': 'Ceftriaxone', 'C': 'Doxycycline', 'D': 'Nitrofurantoin'}


In [3]:
question = questions_data[0]['question']
options = "\nA. Ampicillin\nB. Ceftriaxone\nC. Doxycycline\nD. Nitrofurantoin\n"

In [4]:
templateX = """Question: {question}[INST]Select the correct option only. No explanation required[/INST]

Options:{options}

#Answer:"""  # Force a single-line response


prompt_template = PromptTemplate(template=templateX, input_variables=["question", "options"])
promptX = prompt_template.format(question=question, options=options) 

In [5]:
tokenizer.padding_side = 'left'
tokenizer.pad_token = tokenizer.eos_token 
inputs = tokenizer(promptX, return_tensors='pt', truncation=True, padding="max_length", max_length=1024).to(model_llama.device)
outputs = model_llama.generate(**inputs, max_new_tokens=1)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Question: A 23-year-old pregnant woman at 22 weeks gestation presents with burning upon urination. She states it started 1 day ago and has been worsening despite drinking more water and taking cranberry extract. She otherwise feels well and is followed by a doctor for her pregnancy. Her temperature is 97.7°F (36.5°C), blood pressure is 122/77 mmHg, pulse is 80/min, respirations are 19/min, and oxygen saturation is 98% on room air. Physical exam is notable for an absence of costovertebral angle tenderness and a gravid uterus. Which of the following is the best treatment for this patient?[INST]Select the correct option only. No explanation required[/INST]

Options:
A. Ampicillin
B. Ceftriaxone
C. Doxycycline
D. Nitrofurantoin


#Answer: D


In [6]:
position = response.find('#Answer:')
prediction = response[position+8 :position+10].strip()
prediction

'D'

In [4]:
import json
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from tqdm import tqdm  # For progress bar

class QuestionAnswerDataset(Dataset):
    def __init__(self, questions_data):
        self.questionData = questions_data
    

    def __len__(self):
        return len(self.questionData)

    def __getitem__(self, idx):
        question_data = self.questionData[idx]
        question = question_data['question']
        options = question_data['options']
        options_str = "\n".join([f"{key}. {value}" for key, value in options.items()])
        answer = question_data['answer_idx']
        return question,options_str,answer

# Load your dataset
dataset = QuestionAnswerDataset(questions_data)
dataloader = DataLoader(dataset, batch_size=8, shuffle=False)  # Adjust batch_size as needed

In [5]:
#prompt generation
template = """Question: {question}[INST]Select the correct option only. No explanation required[/INST]

Options:{options}

#Answer:"""  # Force a single-line response


prompt = PromptTemplate(template=template, input_variables=["question", "options"])

#llm_chain = LLMChain(llm=llm, prompt=prompt)

In [None]:
correct_predictions = 0
total_predictions = 0
responses = []
answers = []

for batch in tqdm(dataloader):
    questions, options_strs, answer_idxs = batch
    tokenizer.padding_side = 'left'
    tokenizer.pad_token = tokenizer.eos_token
    prompts = [prompt.format(question=question, options=options_str) for question, options_str in zip(questions, options_strs)]
    
    inputs = tokenizer(prompts, return_tensors='pt', padding=True, truncation=True, max_length=1500).to(model_llama.device)
    
    with torch.no_grad():
        outputs = model_llama.generate(**inputs,max_new_tokens=1)
    
    decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    #print(decoded_outputs)
    for decoded_output, answer in zip(decoded_outputs, answer_idxs):
        position = decoded_output.find('#Answer:')
        answer_pred = decoded_output[position+8 :position+10].strip()
        #print(answer_pred)
        if answer == answer_pred.strip():
            correct_predictions += 1
        
        responses.append(answer_pred)
        answers.append(answer)
        total_predictions += 1

  0%|          | 0/1273 [00:00<?, ?it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 1/1273 [00:01<21:59,  1.04s/it]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 2/1273 [00:01<16:15,  1.30it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 3/1273 [00:02<14:25,  1.47it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 4/1273 [00:02<13:00,  1.63it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 5/1273 [00:03<12:52,  1.64it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  0%|          | 6/1273 [00:04<13:39,  1.55it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 7/1273 [00:04<13:07,  1.61it/s]Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
  1%|          | 8/1273 [00:05<12:50,  1

In [10]:
print(f"Accuracy: {correct_predictions / len(responses):.2%}")
correct_predictions

Accuracy: 45.04%


4584

In [11]:
len(responses)

10178