# Use this file to run the modified LLM.

In [None]:
pip install peft bitsandbytes

In [None]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

base_model_name = "qu-bit/SuperLLM"
adapter_model_name = "ashishu23/model1"

model = AutoModelForCausalLM.from_pretrained(base_model_name)
model = PeftModel.from_pretrained(model, adapter_model_name)

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

In [None]:
def ask_question(model, tokenizer, question):
    # Tokenize the question and move the tensors to CUDA
    inputs = tokenizer(question, return_tensors="pt")

    # Generate response with adjusted parameters for longer output
    outputs = model.generate(
        **inputs,
        max_length=1500,       # Increase this number for longer responses
        temperature=0.1,      # Adjusts randomness; lower is more focused
        top_p=0.9,            # Nucleus sampling to keep coherent outputs
        top_k=50,             # Limits the number of highest probability tokens
        do_sample=True,       # Enables sampling to get varied outputs
        num_return_sequences=1 # Number of sequences to generate
    )

    # Decode the output and skip special tokens
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

## Different queries can be asked by changing the question in the below cell

In [None]:
# Ask the question
question = "Who is Narendra Modi? And give all the details of his family,friends and life."
response = ask_question(model, tokenizer, question)

print(response)