In [2]:
from transformers import AutoTokenizer, AutoModel
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM, DistilBertTokenizer, DistilBertModel,BertTokenizer, BertModel
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
# Check for GPU availability
if torch.cuda.is_available():
    device = torch.device("cuda")  # Move model and tensors to GPU
    print("Using GPU for computations")
else :
    print("GPU unaviable")

Using GPU for computations


In [8]:
tokenizer = AutoTokenizer.from_pretrained("medicalai/ClinicalBERT")
model = AutoModel.from_pretrained("medicalai/ClinicalBERT").to(device)

In [10]:
pipe = pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    max_length=100,
    trust_remote_code=True,
    device="cuda" if torch.cuda.is_available() else "cpu"  # Explicit device assignment

)

local_llm = HuggingFacePipeline(pipeline=pipe)

The model 'DistilBertModel' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalL

In [9]:
# Input text
text = "i got stomach ache can you help me"

# Tokenize input text
inputs = tokenizer(text, return_tensors='pt').to(device)

# Forward pass through the model
with torch.no_grad():
    outputs = model(**inputs)

# Get the output embeddings
last_hidden_states = outputs.last_hidden_state

In [25]:
# Convert the last hidden states tensor to a numpy array for easier inspection
last_hidden_states_np = last_hidden_states.cpu().numpy()

# Convert the token IDs back to tokens using the tokenizer
tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])

# Get the indexes of the actual tokens (excluding special tokens like [CLS], [SEP], etc.)
actual_token_indexes = [i for i, token in enumerate(tokens) if token != '[PAD]']

# Extract the actual hidden states for actual tokens
actual_hidden_states = last_hidden_states_np[0, actual_token_indexes]

# Convert hidden states back to text
decoded_text = tokenizer.decode(inputs['input_ids'][0][actual_token_indexes])

# Print the decoded text
print("Decoded text:", decoded_text)

Decoded text: [CLS] i got stomach ache can you help me [SEP]


In [10]:
# Define your medical text
text = "i got stomach ache can you help me"



# Tokenize the text and convert it to tensors (for model input)
encoded_input = tokenizer(text, return_tensors="pt").to(device)

# Get the sentence embedding
with torch.no_grad():
  # Pass the encoded input to the model
  output = model(**encoded_input)

# Access the pooled output (sentence embedding)
sentence_embedding = output[0][:, 0, :]  # [CLS] token embedding for sentence representation

# Print the sentence embedding size
print(sentence_embedding.size())

torch.Size([1, 768])


In [26]:
def simulate_chat(input_question):
    # Tokenize input question
    input_ids = tokenizer.encode(input_question, return_tensors="pt")
    
    # Perform inference
    with torch.no_grad():
        outputs = model(input_ids)
    
    # Get answer span
    start_scores = outputs.start_logits
    end_scores = outputs.end_logits
    
    # Find the answer span with the highest probability
    answer_start = torch.argmax(start_scores)
    answer_end = torch.argmax(end_scores)
    
    # Decode answer tokens
    answer_tokens = input_ids[0][answer_start:answer_end+1]
    answer = tokenizer.decode(answer_tokens)
    
    return answer