In [5]:
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_ollama.llms import OllamaLLM
from langchain_groq import ChatGroq
import tempfile
import asyncio

In [9]:
def load_and_process_data(file_path):
    all_splits = []
    loader = PyPDFLoader(file_path)
    data = loader.load_and_split()
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,
        chunk_overlap=100
    )
    splits = text_splitter.split_documents(data)
    all_splits.extend(splits)
    
    embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    vectordb = FAISS.from_documents(all_splits, embedding)
    return vectordb

In [24]:
vectordb = load_and_process_data("ML.pdf")

In [29]:
import time

def response_generator(vectordb, query, llm):
    template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. {context} Question: {question} Helpful Answer:"""
    
    QA_CHAIN_PROMPT = PromptTemplate(
        input_variables=["context", "question"], 
        template=template
    )
    
    qa_chain = RetrievalQA.from_chain_type(
        llm, 
        retriever=vectordb.as_retriever(), 
        return_source_documents=True, 
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
    )
    
    start_time = time.time()  # Start the timer
    result = qa_chain({"query": query})
    end_time = time.time()  # End the timer
    
    elapsed_time = end_time - start_time  # Calculate time taken
    # print(f"Response Time: {elapsed_time:.2f} seconds")  # Print the time
    
    return result["result"], elapsed_time


In [None]:
llm = ChatGroq(model_name="Llama3-8b-8192")
query = "what is machine learning"
a = response_generator(vectordb, query,llm)
print(a)

In [6]:
llm = ChatGroq(model_name="Llama3-8b-8192")
llm = OllamaLLM(model="mistral")
llm = OllamaLLM(model="llama3.2")


In [27]:
import pandas as pd

def process_queries(vectordb, queries, llm):
    """
    Processes multiple queries, measuring the response time for each.
    Returns a pandas DataFrame with query, result, and time taken.
    """
    results = []

    for query in queries:
        result, time_taken = response_generator(vectordb, query, llm)
        results.append({
            "Query": query,
            "Result": result,
            "Time Taken (seconds)": time_taken
        })

    # Convert results into a pandas DataFrame
    results_df = pd.DataFrame(results)
    return results_df


In [None]:

# Assuming `vectordb` and `llm` are defined
queries = [
    "what is Machine Learning",
    "who is the Prime minister of India",
    "what is ensemble learning technique",
    "what is PCA",
    "who is the founder of Tesla"
]

llm = ChatGroq(model_name="Llama3-8b-8192")

# Process the queries and generate the dataset
groq_res = process_queries(vectordb, queries, llm)

# Print the dataset
print(groq_res)

# Save the dataset as a CSV file

groq_res.to_csv("llm_comparison_results/groq_results.csv", index=False)


In [None]:

# Assuming `vectordb` and `llm` are defined
queries = [
    "what is Machine Learning",
    "who is the Prime minister of India",
    "what is ensemble learning technique",
    "what is PCA",
    "who is the founder of Tesla"
]

llm = OllamaLLM(model="llama3.2")

# Process the queries and generate the dataset
ollama_lamma3_2_res = process_queries(vectordb, queries, llm)

# Print the dataset
print(ollama_lamma3_2_res)

# Save the dataset as a CSV file

ollama_lamma3_2_res.to_csv("llm_comparison_results/ollama_lamma3_2_results.csv", index=False)


In [None]:

# Assuming `vectordb` and `llm` are defined
queries = [
    "what is Machine Learning",
    "who is the Prime minister of India",
    "what is ensemble learning technique",
    "what is PCA",
    "who is the founder of Tesla"
]

llm = OllamaLLM(model="mistral")

# Process the queries and generate the dataset
ollama_mistral_res = process_queries(vectordb, queries, llm)

# Print the dataset
print(ollama_mistral_res)

# Save the dataset as a CSV file

ollama_mistral_res.to_csv("llm_comparison_results/ollama_mistral_results.csv", index=False)


In [None]:
import pandas as pd

df = pd.read_csv("llm_comparison_results/groq_results.csv")
df

## HuggingFace Models


In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the tokenizer and model
model_name = "meta-llama/Llama-3.2-1B"  # or "PleIAs/Pleias-nano-1B-RAG"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)



In [39]:
def generate_output(model,tokenizer,input_query):
    # Example input query
    start_time = time.time()  # Start the timer
    input_query = "What are the benefits of using RAG in language models?"
    inputs = tokenizer(input_query, return_tensors="pt")


    # Generate a response
    output_sequences = model.generate(
        inputs['input_ids'],
        max_length=150,
        num_return_sequences=1,
        do_sample=True,
        top_k=50,
        top_p=0.95,
    )

    # Decode the output
    response = tokenizer.decode(output_sequences[0], skip_special_tokens=True)
    end_time = time.time()  # End the timer
    
    elapsed_time = end_time - start_time
    return response, elapsed_time

## Saving Model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

In [2]:
model_name="Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.save_pretrained(f"tokenizer/{model_name}")

In [None]:
model = AutoModelForCausalLM.from_pretrained(model_name)
model.save_pretrained(f"model/{model_name}")

## Loading Saved model

In [21]:
tokenizer = AutoTokenizer.from_pretrained(f"tokenizer/{model_name}")
model = AutoModelForCausalLM.from_pretrained(f"model/{model_name}")


In [22]:
input_text = f"""Your task is to generate poem on the text given below: happy man"""
input_ids = tokenizer(input_text, return_tensors="pt")
outputs = model.generate(**input_ids, max_new_tokens= 60)
print(tokenizer.decode(outputs[0]))

Your task is to generate poem on the text given below: happy man
The happy man, who's always smiling,
With a heart full of love and care.
He's not just a face in the crowd,
But a beacon of hope for all.

In his hands, he holds so much,
From a cup of tea to a meal.
A smile that brightens


In [25]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Specify your model name
model_name="Qwen/Qwen2.5-0.5B-Instruct"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(f"tokenizer/{model_name}")
model = AutoModelForCausalLM.from_pretrained(f"model/{model_name}")

# Move model to GPU
model = model.to("cuda")


In [27]:
# Prepare input text
input_text = f"""Your task is to generate poem on the text given below: happy man"""
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")  # Move input tensor to GPU

# Generate output
outputs = model.generate(**input_ids, max_new_tokens=60)

# Decode and print the output
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Your task is to generate poem on the text given below: happy man

The text provided is "happy man". You are required to write a poem about this word. The poem should be written in English and should not contain any other words apart from those mentioned in the text.

Please provide me with a poem that describes the concept of a "happy man" in your


In [16]:
def generate_output(input_text):
   # Move model to GPU
    model = model.to("cuda")

    # Prepare input text
    input_text = "What is machine learning?"
    input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")  # Move input tensor to GPU

    # Generate output
    outputs = model.generate(**input_ids, max_new_tokens=60)

    # Decode and print the output
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))


What is machine learning? How does it work?

Machine Learning is a subfield of artificial intelligence that involves the development and application of algorithms to enable computers to learn from data without being explicitly programmed. It allows machines to automatically improve their performance based on new information or experience, rather than being manually programmed.

The basic idea behind machine
