In [1]:
#!pip install langchain-community==0.2.4 langchain==0.2.3 faiss-cpu==1.8.0 unstructured==0.14.5 unstructured[pdf]==0.14.5 transformers==4.41.2 sentence-transformers==3.0.1

In [30]:
import os

from langchain_community.llms import Ollama
from langchain.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
import random


In [3]:
# loading the LLM
llm = Ollama(
    model="laddo",
    temperature=0
)

In [4]:
# loading the document
loader = UnstructuredFileLoader("PP Unit 2 Tesseract.pdf")
documents = loader.load()

In [13]:
documents

[Document(metadata={'source': 'PP Unit 2 Tesseract.pdf'}, page_content='UNIT-2\n\nTOPIC-1\n\nParallel Programming on CPU-I\n\nVectorization\n\nVectorization is a technique used in computer science to perform operations on entire arrays or sequences\n\nof data elements simultaneously, instead of processing each element individually. It\'s commonly used in\n\nnumerical and scientific computing, as well as in various data analysis and machine learning tasks. In\n\nParallel computing, processors have special vector units that can load and operate on more than one data\n\nelement at a time.\n\nSIMD overview\n\nVectorization is an example of single instruction, multiple data (SIMD) processing because it executes a\n\nsingle operation (e.g., addition, division) over a large dataset. A scalar operation, in the context of\n\nmathematics and computer science, refers to an operation that is performed on a single scalar value, as\n\nopposed to a vector, matrix, or any other data structure. Scalars

In [5]:
# create document chunks
text_splitter = CharacterTextSplitter(separator="/n",
                                      chunk_size=7500,
                                      chunk_overlap=200)

In [6]:
text_chunks = text_splitter.split_documents(documents)

In [7]:
embeddings = HuggingFaceEmbeddings()

  embeddings = HuggingFaceEmbeddings()
  from tqdm.autonotebook import tqdm, trange


In [8]:
knowledge_base = FAISS.from_documents(text_chunks, embeddings)

In [9]:
# retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=knowledge_base.as_retriever())

In [24]:
Questions =input("Enter Noof Questions:")
Type = input("Enter the type(Mcqs/True or false/Fill in the blanks)")

In [25]:
Questions
Type

'True or false'

In [26]:
Prompt = f"Generate {Questions} {Type}"
Prompt


'Generate 5 True or false'

In [27]:
response = qa_chain.invoke({"query": Prompt})
print(response["result"])

  Sure! Here are five true or false statements related to Cartesian topology in MPI:

1. True: In Cartesian topology, each process has a unique rank within the communicator. (True)
2. False: The number of dimensions in a Cartesian grid is fixed and cannot be changed dynamically. (False - you can modify the number of dimensions at runtime using MPI_Cart_create with a non-zero value for ndims)
3. True: In a Cartesian topology, processes are arranged in a linear or one-dimensional structure. (True)
4. False: The MPI_Cart_coords function returns the coordinates of each process in the Cartesian grid. (False - it returns an integer array containing the coordinates of the process in the specified dimension(s))
5. True: In a Cartesian topology, communication between processes is always point-to-point. (False - you can perform collective operations like MPI_Allreduce or MPI_Gather on groups of processes)


In [10]:
question = "What is this document about?"
response = qa_chain.invoke({"query": question})
print(response["result"])

  This document appears to be a guide on how to use the Message Passing Interface (MPI) for parallel computing, specifically focusing on Cartesian topology support in MPI. The document covers the following topics:

1. Initializing MPI and determining the size and rank of processes in a communicator.
2. Creating a Cartesian communicator using MPI_Cart_create() and defining the dimensions and periodicity of the grid.
3. Retrieving the coordinates of each process in the Cartesian grid using MPI_Cart_coords().
4. Performing point-to-point communication or collective operations specific to your application's grid structure, such as MPI_Send, MPI_Recv, and collective operations like MPI_Allreduce, MPI_Gather, or MPI_Scatter.
5. Freeing the Cartesian communicator after use using MPI_Comm_free().


In [11]:
question = "generate 5 mcqs on Vectorization methods with answers"
response = qa_chain.invoke({"query": question})
print(response["result"])

  Sure! Here are five MCQs related to vectorization methods, along with their answers:

MCQ1: What is the primary advantage of using vectorization techniques in programming?
A. Improved code readability
B. Faster execution time
C. Simplified data manipulation
D. Better memory management
Answer: B. Faster execution time

MCQ2: Which of the following vectorization methods is most suitable for performing matrix operations?
A. Recursion
B. Iteration
C. Looping
D. Function calls
Answer: C. Looping

MCQ3: What is the purpose of using a hash table in vectorization?
A. To store data in a sorted order
B. To reduce memory usage by storing data in an array
C. To perform fast lookups for elements in a large dataset
D. To implement recursion in algorithms
Answer: C. To perform fast lookups for elements in a large dataset

MCQ4: Which of the following vectorization techniques is most efficient for searching an element in a sorted array?
A. Linear search
B. Binary search
C. Iteration through all elem

In [None]:
'''question = "generate 10 true/false questions  with answers on Vectorization methods with answers"
response = qa_chain.invoke({"query": question})
print(response["result"])'''

  Sure! Here are 10 true or false questions related to vectorization methods in parallel computing, along with their answers:

1. True or False: The Parallel Random Access (PRA) method is a vectorization technique that involves dividing the data into smaller chunks and processing them in parallel. (True)
2. True or False: The Data Dependence Graph (DDG) is a graphical representation of the dependencies between variables in a program, which can be used to identify potential parallelism. (False - DDG is actually a technique for analyzing the data dependencies in a program to identify opportunities for parallelization.)
3. True or False: The OpenMP compiler generates optimized code by automatically vectorizing loops. (True)
4. True or False: The MPI_Allreduce() function in MPI is used for collective operations such as summing the values of all processors. (False - MPI_Allreduce() is actually a function for reducing the values of all processors.)
5. True or False: The OpenMP directives #PA

In [39]:
from langchain.document_loaders import PyPDFLoader
from langchain.llms import Ollama
from fpdf import FPDF
import os

# Function to save response to a PDF
def save_to_pdf(response, output_file):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, response)
    pdf.output(output_file)
    print(f"Response saved to {output_file}")

# RAG Setup and PDF Saving
def main():
    # Example response for testing purposes
    # In a real scenario, you will be working with your RAG pipeline to get this result.
    result = response["result"]

    # Get current working directory to save the output PDF
    current_directory = os.getcwd()
    print(f"Saving PDF in directory: {current_directory}")

    x = random.randint(1, 1000)  # Use a larger range to reduce conflicts
    output_file = os.path.join(current_directory, f"{Type}_{Questions}_{x}.pdf")
    # Save result to PDF
    save_to_pdf(result, output_file)

# Run the main function
if __name__ == "__main__":
    main()


Saving PDF in directory: c:\Users\Srujana\OneDrive\Desktop\MODEL
Response saved to c:\Users\Srujana\OneDrive\Desktop\MODEL\True or false_5_351.pdf
