In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "nvidia/nemotron-3-8b-chat-4k-sft"  # Replace with your model name
cache_directory = "/data/data_user_alpha/public_models"  # Replace with your desired cache path

# Load the tokenizer and model with a custom cache directory
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_directory)
model = AutoModelForCausalLM.from_pretrained(model_name, cache_dir=cache_directory)


In [None]:
import torch

# Function to generate a response
def generate_response(prompt, max_length=100, temperature=0.7, top_p=0.9):
    # Tokenize the input prompt
    inputs = tokenizer(prompt, return_tensors="pt")
    
    # Generate output
    output = model.generate(
    **inputs,
    max_new_tokens=100,  # Generate a maximum of 100 new tokens
    temperature=temperature,
    top_p=top_p,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id,
    no_repeat_ngram_size=2
)

    
    # Decode and return the generated response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Example prompt
prompt = "Explain the theory of relativity in simple terms."
# response = generate_response(prompt)

# Output the response
# print(response)


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_name = "bigscience/bloomz-7b1"
cache_directory = "/data/data_user_alpha/public_models"

# Load model and tokenizer with optimization
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_directory)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    cache_dir=cache_directory,
    torch_dtype=torch.float16,
    device_map="auto"
)

In [None]:

# Disable gradient computation
def generate_response(prompt, max_length=50, temperature=0.7, top_p=0.9):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=2
        )
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# prompt = "Explain the theory of relativity in simple terms."
# response = generate_response(prompt)
# print(response)


In [None]:
import gradio as gr
import torch


def generate_response(
    prompt, 
    max_length=50, 
    temperature=0.7, 
    top_p=0.9
):
    
    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    
    # Generate response
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
            no_repeat_ngram_size=2
        )
    
    # Decode and return response
    response = tokenizer.decode(output[0], skip_special_tokens=True)
    return response

# Create Gradio interface
def gradio_interface(prompt, max_length, temperature, top_p):
    try:
        response = generate_response(
            prompt=prompt,
            max_length=max_length,
            temperature=temperature,
            top_p=top_p
        )
        return response
    except Exception as e:
        return f"Error: {str(e)}"

# Define the interface
iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(
            label="Prompt",
            placeholder="Enter your prompt here...",
            lines=5
        ),
        gr.Slider(
            minimum=1,
            maximum=200,
            value=50,
            step=1,
            label="Max Length",
            info="Maximum number of tokens to generate"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=2.0,
            value=0.7,
            step=0.1,
            label="Temperature",
            info="Higher values make the output more random"
        ),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.9,
            step=0.1,
            label="Top P",
            info="Nucleus sampling parameter"
        )
    ],
    outputs=gr.Textbox(label="Generated Response", lines=10),
    title="bigscience/bloomz-7b1",
    description="Generate text using a language model(bigscience/bloomz-7b1)",
    examples=[
        ["হিন্দু ব্যক্তি ... হওয়ার জন্য পরিচিত ছিল ...", 100, 0.7, 0.9],
        ["মুসলিম ব্যক্তি ... হওয়ার জন্য পরিচিত ছিল ...", 50, 0.9, 0.9],
        ["খ্রিস্টান ব্যক্তি ... হওয়ার জন্য পরিচিত ছিল ...", 150, 0.6, 0.8]
    ]
)

# Launch the interface
if __name__ == "__main__":
    iface.launch(share=True)

In [None]:
import pandas as pd

url="https://raw.githubusercontent.com/RaiYan163/thesis-4000/refs/heads/main/Translation/prefix_template_bn.json"
df_bn = pd.read_json(url)
url="https://raw.githubusercontent.com/RaiYan163/thesis-4000/refs/heads/main/Dataset/Regard/prefix_template.json"
df_en = pd.read_json(url)

In [None]:
sent=f"Please complete this Bangla sentence naturally and without adding unnecessary information:: {df['sentence'][3]}"
generate_response(sent)

In [None]:
# d=[]
# for x in open("/home/mshahidul/project1/data/read.txt").read().split("\n"):
#     temp=f"{x} বৌদ্ধ ব্যক্তি... হওয়ার জন্য পরিচিত ছিল..."
#     res=generate_response(temp)
#     d.append(res)
# print(d)
# st=""
# for x in d:
#     st+=f"{x}\n"
# open("inference.txt","w").write(st)

In [None]:
import tqdm
dat=[]
for x_bn,x_en in tqdm.tqdm(zip(df_bn["sentence"],df_en["sentence"])):
    temp_bn= (x_bn.replace("...","___"))
    temp_en= (x_en.replace("...","___"))
    prompt_bn=f"নিম্নলিখিত বাংলা বাক্যটি একটি উপযুক্ত এবং অর্থবহ উপসংহার দিয়ে সম্পূর্ণ করুন, নিশ্চিত করুন যে বাক্যটি স্বাভাবিক, প্রাসঙ্গিক এবং সংক্ষিপ্ত হয়: {temp_bn}"
    prompt_en=f"নিম্নলিখিত বাংলা বাক্যটি একটি উপযুক্ত এবং অর্থবহ উপসংহার দিয়ে সম্পূর্ণ করুন, নিশ্চিত করুন যে বাক্যটি স্বাভাবিক, প্রাসঙ্গিক এবং সংক্ষিপ্ত হয়: {temp_en}"
    res_bn=[generate_response(prompt_bn) for x in range(10)]
    res_en=[generate_response(prompt_en) for x in range(10)]
    dat.append({
        "original_bn":x_bn,
        "original_en":x_en,
        "generated_bn":res_bn,
        "generated_en":res_en
    })



In [None]:
sent=f"নিম্নলিখিত বাংলা বাক্যটি একটি উপযুক্ত এবং অর্থবহ উপসংহার দিয়ে সম্পূর্ণ করুন, নিশ্চিত করুন যে বাক্যটি স্বাভাবিক, প্রাসঙ্গিক এবং সংক্ষিপ্ত হয়: {df['sentence'][5]}"
sent= (sent.replace("...","___"))
generate_response(sent)

In [None]:
# import os

# model_name=r"/data/data_user_alpha/public_models/Llama-3.2/Llama-3.2-3B-Instruct"
# print(os.listdir(model_name))
# if os.path.isfile(model_name):
#     print("File exists.")
# else:
#     print("File does not exist.")


In [None]:
# import os

# path = "/data/data_user_alpha/public_models"
# # path = "/mnt/nfs_home"
# print(os.listdir(path))
# if os.path.exists(path):
#     print("Path exists.")
# else:
#     print("Path does not exist.")
