In [None]:
# Testing the model
from unsloth import FastLanguageModel
import torch

# Load the saved model and tokenizer
print("Loading saved model and tokenizer...")
MODEL_PATH = "university_faq_model"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_PATH,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    local_files_only=True
)

# Convert model for inference
print("Preparing model for inference...")
model = FastLanguageModel.for_inference(model)

# Setup tokenizer with chat template
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

def test_model(question, model, tokenizer):
    # Format the conversation using the chat template
    conversation = [
        {"role": "user", "content": question}
    ]
    prompt = tokenizer.apply_chat_template(
        conversation,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize the formatted prompt
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        add_special_tokens=True
    ).to(model.device)
    
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.95,
        do_sample=True
    )
    
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Test the model with a few examples
print("\nTesting the trained model:")
test_questions = [
    "What departments are available?",
    "What is the semester fee for Computer Science?",
    "কোন কোন ডিপার্টমেন্ট আছে?",  # Bengali test
]

for question in test_questions:
    print("\nQuestion:", question)
    response = test_model(question, model, tokenizer)
    print("Response:", response)

In [None]:
# Combining Localhost server with ngrok
from pyngrok import ngrok
import os

# Set your authtoken (replace with your actual token)
ngrok.set_auth_token("2pEwqrKIFCT9Z5beoBPsmNFWjs2_24vXQkagVA29RdzYuBExr")

# Start ngrok tunnel
public_url = ngrok.connect(8000)
print(f"\nPublic URL: {public_url}")
print("You can now use this URL in Postman")
print("- GET /   (for status)")
print("- POST /generate   (for generating responses)")