In [1]:
import torch
from transformers import LlamaTokenizer, LlamaForCausalLM

# Ensure CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base')
model = LlamaForCausalLM.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base', torch_dtype=torch.bfloat16).to(device)

# Input prompt
prompt = "मैं एक अच्छा हाथी हूँ"

# Tokenize input and move tensors to the GPU if available
inputs = tokenizer(prompt, return_tensors="pt").to(device)

# Generate output
generate_ids = model.generate(inputs.input_ids, max_length=30)

# Decode the generated tokens to text
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

print(output)


tokenizer_config.json:   0%|          | 0.00/936 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/968k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.85M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/667 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.81G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/184 [00:00<?, ?B/s]

मैं एक अच्छा हाथी हूँ।

I'm a good elephant, I'm a good elephant,


In [44]:
def generate_response(prompt, max_length=50):
    """
    Generate a response for a given prompt using a pre-trained LLM model.
    
    Args:
        prompt (str): Input prompt to generate a response for.
        max_length (int): Maximum length of the generated response.
    
    Returns:
        str: Generated response from the model.
    """
    # Tokenize input and move tensors to the GPU if available
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    
    # Generate output
    generate_ids = model.generate(inputs.input_ids, max_length=max_length)
    
    # Decode the generated tokens to text
    output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
    
    return output

In [18]:
# 1st hallucination  Fact check

# Example usage
prompt = "what is the 2nd element in periodic table?"
response = generate_response(prompt)
print(response) 


what is the 2nd element in periodic table?
---
aavart saarni mein doosra tatva kailshiyam hai. Calcium is a chemical element with the symbol Ca and atomic number 20. yah ek dhaatu hai jo aamtaur par safed ya peele rang ki hoti hai aur iska upyog kayi anuprayogon mein kiya jaata


In [22]:
# 2nd hallucination  self-consistency

# Example usage
prompt = "how many cm in inch?"
response = generate_response(prompt)
print(response) 


how many cm in inch?

चरण 1: दिए गए कथन को पढ़ें।
The statement provided is: "The diameter of the circle is 10 cm."

चरण 2: कथन में दी गई जानकारी की पहचान करें।
The information provided is the diameter of the circle, which is 10 cm.

चरण 3: दी गई जानकारी के आधार पर उत्तर निर्धारित करें।
The diameter of a circle is the distance from one side of


In [60]:
# 2.5nd hallucination  fact check

# Example usage
prompt = "which one is most reactive element?"
response = generate_response(prompt)
print(response) 

which one is most reactive element?
---
मैं आपको बता सकता हूं कि ऑक्सीजन सबसे अधिक प्रतिक्रियाशील तत्व है। it is the most reactive element because it is the most electronegative element. इसका मतलब है


In [24]:
# 3rd non-hallucination

# Example usage
prompt = "how many cm in inch?"
response = generate_response(prompt)
print(response) 


how many cm in inch?
---
इंच में सेंटीमीटर की संख्या इंच में सेंटीमीटर में सेंटीमीटर की संख्या के बराबर होती है। For example, 1 inch = 2.54 cm. तो, इंच में 1 सेंटीमीटर की संख्या इंच में 1 सेंटीमीटर की संख्या के बराबर होती है।


In [26]:
#  4th hallucination Fact check
# Example usage
prompt = "kya aam neela hota hai?"
response = generate_response(prompt)
print(response) 


kya aam neela hota hai?
---
Yes, the common blue is a type of butterfly that is found in many parts of the world. yah ek sundar titli hai jiske pankhon par neele aur safed rang ke vishisht paitarn hote hain. The common blue is a popular butterfly among lepidopterists, or people who study butterflies and moths. y


In [27]:
#  4th hallucination Self consistency
# Example usage
prompt = "Are mangoes blue?"
response = generate_response(prompt)
print(response) 

Are mangoes blue?
---
नहीं, आम नीले रंग के नहीं होते हैं। Mangoes are a bright yellow color, with a slightly greenish tint. वे आम तौर पर एक मीठे, रसदार बनावट के साथ एक नरम, मांसल फल होते हैं।


In [63]:
#  4th hallucination Self consistency
# Example usage
prompt = "kya oranges green hote hai?"
response = generate_response(prompt)
print(response) 

kya oranges green hote hai?

OPTIONS:
 - No
 - haan
---
- Yes

oranges are green. oranges are a type of fruit that comes from orange trees. ye ped


In [66]:
#  4th hallucination Self consistency
# Example usage
prompt = "are oranges green?"
response = generate_response(prompt)
print(response) 

are oranges green?
---
नहीं, संतरे हरे नहीं होते हैं। Oranges are a bright orange color because of the pigments in their skin. संतरे की त्वचा में कैरोटीनॉइड नामक वर्णक होते हैं


In [77]:
import re
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')  # Download the Punkt tokenizer

# Basic transliteration mapping (for demonstration purposes)
transliteration_dict = {
    'अ': 'a', 'आ': 'aa', 'इ': 'i', 'ई': 'ii', 'उ': 'u', 'ऊ': 'uu', 'ए': 'e', 'ऐ': 'ai', 'ओ': 'o', 'औ': 'au',
    'क': 'k', 'ख': 'kh', 'ग': 'g', 'घ': 'gh', 'च': 'ch', 'छ': 'chh', 'ज': 'j', 'झ': 'jh', 'ट': 't', 'ठ': 'th',
    'ड': 'd', 'ढ': 'dh', 'ण': 'n', 'त': 't', 'थ': 'th', 'द': 'd', 'ध': 'dh', 'न': 'n', 'प': 'p', 'फ': 'ph',
    'ब': 'b', 'भ': 'bh', 'म': 'm', 'य': 'y', 'र': 'r', 'ल': 'l', 'व': 'v', 'श': 'sh', 'ष': 'sh', 'स': 's',
    'ह': 'h', 'ऽ': 'a', '।': '.', ' ': ' '
}

def transliterate(text):
    return ''.join(transliteration_dict.get(char, char) for char in text)

def preprocess_text(text):
    text = transliterate(text)  # Apply transliteration
    text = text.lower()  # Lowercasing
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)  # Remove special characters
    return ' '.join(nltk.word_tokenize(text))  # Tokenization

# Example corpus and query
corpus = [
    "This is an English text example.",
    "यह हिंदी पाठ का उदाहरण है।",
    "Yeh Hinglish text ka example hai."
]
query = "udharan of hindi path"

# Preprocess the corpus and query
preprocessed_corpus = [preprocess_text(doc) for doc in corpus]
preprocessed_query = preprocess_text(query)

# Vectorize the corpus and query using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(preprocessed_corpus)
query_vec = vectorizer.transform([preprocessed_query])

# Compute cosine similarity between the query and documents
similarity_scores = cosine_similarity(query_vec, X)

# Get the index of the most similar document
most_similar_index = similarity_scores.argmax()
print(f"Most similar document index: {most_similar_index}")
print(f"Most similar document: {corpus[most_similar_index]}")


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Most similar document index: 0
Most similar document: This is an English text example.


In [78]:
corpus = [
    # 1st hallucination - Fact Check
    {
        "prompt": "what is the 2nd element in periodic table?",
        "response": "Calcium is a chemical element with the symbol Ca and atomic number 20. It is a metal that is commonly found in nature and used in various applications.",
        "category": "fact-check"
    },
    # 2nd hallucination - Self-Consistency
    {
        "prompt": "how many cm in inch?",
        "response": "1 inch is equal to 2.54 centimeters.",
        "category": "self-consistency"
    },
    # 2.5nd hallucination - Fact Check
    {
        "prompt": "which one is most reactive element?",
        "response": "Fluorine is the most reactive element.",
        "category": "fact-check"
    },
    # 3rd non-hallucination - Self-Consistency
    {
        "prompt": "how many cm in inch?",
        "response": "1 inch is equal to 2.54 centimeters.",
        "category": "self-consistency"
    },
    # 4th hallucination - Fact Check
    {
        "prompt": "kya aam neela hota hai?",
        "response": "No, mangoes are not blue. Mangoes are typically yellow or green when ripe.",
        "category": "fact-check"
    },
    # 4th hallucination - Self-Consistency
    {
        "prompt": "Are mangoes blue?",
        "response": "No, mangoes are not blue. They are usually yellow or green in color when ripe.",
        "category": "self-consistency"
    },
    # 4th hallucination - Self-Consistency
    {
        "prompt": "kya oranges green hote hai?",
        "response": "Nahi, oranges hote hai aam taur par orange rang ke. Kabhi-kabhi wo green bhi ho sakte hain, lekin wo ripe hone par orange rang ke hote hain.",
        "category": "self-consistency"
    },
    # 4th hallucination - Self-Consistency
    {
        "prompt": "are oranges green?",
        "response": "No, oranges are typically orange in color. Sometimes, they may appear greenish due to the chlorophyll in their skin, but they are ripe when they are orange.",
        "category": "self-consistency"
    }
]


In [82]:
import torch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from transformers import LlamaTokenizer, LlamaForCausalLM

# Ensure CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load tokenizer and model
tokenizer = LlamaTokenizer.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base')
model = LlamaForCausalLM.from_pretrained('sarvamai/OpenHathi-7B-Hi-v0.1-Base', torch_dtype=torch.bfloat16).to(device)

model.eval()  # Set the model to evaluation mode

# Example dataset
corpus = [
    {"prompt": "what is the 2nd element in periodic table?",
     "response": "Calcium is a chemical element with the symbol Ca and atomic number 20. It is a metal that is commonly found in nature and used in various applications.",
     "category": "fact-check"},
    {"prompt": "how many cm in inch?",
     "response": "1 inch is equal to 2.54 centimeters.",
     "category": "self-consistency"},
    {"prompt": "which one is most reactive element?",
     "response": "Fluorine is the most reactive element.",
     "category": "fact-check"},
    {"prompt": "how many cm in inch?",
     "response": "1 inch is equal to 2.54 centimeters.",
     "category": "self-consistency"},
    {"prompt": "kya aam neela hota hai?",
     "response": "No, mangoes are not blue. Mangoes are typically yellow or green when ripe.",
     "category": "fact-check"},
    {"prompt": "Are mangoes blue?",
     "response": "No, mangoes are not blue. They are usually yellow or green in color when ripe.",
     "category": "self-consistency"},
    {"prompt": "kya oranges green hote hai?",
     "response": "Nahi, oranges hote hai aam taur par orange rang ke. Kabhi-kabhi wo green bhi ho sakte hain, lekin wo ripe hone par orange rang ke hote hain.",
     "category": "self-consistency"},
    {"prompt": "are oranges green?",
     "response": "No, oranges are typically orange in color. Sometimes, they may appear greenish due to the chlorophyll in their skin, but they are ripe when they are orange.",
     "category": "self-consistency"}
]

# Extract responses for TF-IDF
documents = [entry['response'] for entry in corpus]

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(documents)

def retrieve_document(query):
    """
    Retrieve the most relevant document based on a query.
    
    Args:
        query (str): Input query to retrieve a relevant document for.
    
    Returns:
        dict: Retrieved document.
    """
    query_vec = vectorizer.transform([query])
    similarities = cosine_similarity(query_vec, X)
    most_similar_idx = similarities.argmax()
    return corpus[most_similar_idx]

def generate_text(prompt):
    """
    Generate a text completion for a given prompt using OpenHathi model.
    
    Args:
        prompt (str): Input prompt to generate a text completion for.
    
    Returns:
        str: Generated text.
    """
    inputs = tokenizer(prompt, return_tensors='pt').to(device)  # Move inputs to the device
    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs['input_ids'],
            max_length=100,  # Maximum length of generated text
            num_return_sequences=1
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Example query
query = "how many cm in inch?"
retrieved_document = retrieve_document(query)
generated_response = generate_text(retrieved_document['response'])

print(f"Query: {query}")
print(f"Retrieved Document: {retrieved_document['response']}")
print(f"Generated Response: {generated_response}")
print(f"Category: {retrieved_document['category']}")


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 86.00 MiB. GPU 0 has a total capacity of 15.89 GiB of which 23.12 MiB is free. Process 3901 has 15.86 GiB memory in use. Of the allocated memory 15.56 GiB is allocated by PyTorch, and 15.25 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)