# Inference to Run The Models
This is a inference to run the existing models in HuggingFace

In [None]:
!pip install torch transformers huggingface-hub

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login, HfApi

# Log in to Hugging Face
login(token="YOUR_HF_API_TOKEN")

model_name = "Unisyn-corp/nbfi-model-8b"

# Check if tokenizer exists; if not, load a base tokenizer and save it to the repo directory
try:
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        use_auth_token=True
    )
except OSError:
    # Fallback to a base model's tokenizer (replace with actual base model)
    base_model = "meta-llama/Meta-Llama-7B"
    tokenizer = AutoTokenizer.from_pretrained(base_model)
    tokenizer.save_pretrained("./local_tokenizer")

    # (Optional) push the newly saved tokenizer to your repo
    # This requires your repo to be set up locally, or you can use the HfApi to upload_folder.
    api = HfApi()
    api.upload_folder(
        folder_path="./local_tokenizer",
        repo_id=model_name,
        token="YOUR_HF_API_TOKEN"
    )
    # Now retry loading from the repo
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        use_auth_token=True
    )

# Adjust pad token if needed
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

# Load model from private repo
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    use_auth_token=True
)

# Tokenize the prompt
prompt = "Explain how Non-Bank Financial Institutions manage risk."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

# Generate text
outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)

# Decode and print
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
