In [1]:
#!pip install langchain-community==0.2.4 langchain==0.2.3 faiss-cpu==1.8.0 unstructured==0.14.5 unstructured[pdf]==0.14.5 transformers==4.41.2 sentence-transformers==3.0.1

In [2]:
import os

from langchain_community.llms import Ollama
from langchain.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document

from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
import random


In [3]:
# loading the LLM
llm = Ollama(
    model="laddo",
    temperature=0
)

In [4]:
    import os
    import speech_recognition as sr
    from pydub import AudioSegment

    # Step 2: Specify the path to the MP3 file
    mp3_file = r"C:\Users\Srujana\OneDrive\Desktop\MODEL\Ferrari.mp3"

    # Initialize the wav_file variable
    wav_file = None

    # Step 3: Check and format the file path
    if not os.path.exists(mp3_file):
        print(f"Error: The file at {mp3_file} does not exist. Please check the path.")
        exit()

    # Step 4: Convert MP3 to WAV
    try:
        print(f"Converting {mp3_file} to WAV format...")
        audio = AudioSegment.from_mp3(mp3_file)
        wav_file = "converted_audio.wav"
        audio.export(wav_file, format="wav")
        print(f"Converted {mp3_file} to {wav_file}")
    except Exception as e:
        print(f"An error occurred during conversion: {e}")
        exit()

    # Step 5: Transcribe the audio
    recognizer = sr.Recognizer()

    if wav_file and os.path.exists(wav_file):
        try:
            # Load the WAV file
            with sr.AudioFile(wav_file) as source:
                print("Recognizing speech...")
                audio_data = recognizer.record(source)
                try:
                    # Convert speech to text
                    audio_transcription = recognizer.recognize_google(audio_data)
                    print("Transcription:")
                    print(audio_transcription)
                except sr.UnknownValueError:
                    print("Sorry, could not understand the audio.")
                except sr.RequestError as e:
                    print(f"Error with the recognition service: {e}")
        except Exception as e:
            print(f"An error occurred during recognition: {e}")
    else:
        print("Error: WAV file not found. Conversion may have failed.")


Converting C:\Users\Srujana\OneDrive\Desktop\MODEL\Ferrari.mp3 to WAV format...
Converted C:\Users\Srujana\OneDrive\Desktop\MODEL\Ferrari.mp3 to converted_audio.wav
Recognizing speech...
Transcription:
hi today we are going to learn about scuderia Ferrari Ferrari is a team in Motorsport famous for winning the lemon series and dominating the Formula 1 calendar every year in fact they were the only team which has participated in each and every Grand Prix that has been conducted by Formula 1 hence they get a heritage income of 20 million dollars every year from the Formula 1 Association because they were loyal to the brand then the most second most privileged team on the grid is McLaren has had many great drivers such as Allen Frost and return and Ferrari have had very high intense level rivalries between them ever since the beginning which is also resulted in death of the famous written Cena and vertices that Michael Schumacher still regrets and by the way Michael Schumacher is in coma o

In [5]:

if audio_transcription:
    documents = [Document(page_content=audio_transcription)]


In [6]:
documents

[Document(page_content='hi today we are going to learn about scuderia Ferrari Ferrari is a team in Motorsport famous for winning the lemon series and dominating the Formula 1 calendar every year in fact they were the only team which has participated in each and every Grand Prix that has been conducted by Formula 1 hence they get a heritage income of 20 million dollars every year from the Formula 1 Association because they were loyal to the brand then the most second most privileged team on the grid is McLaren has had many great drivers such as Allen Frost and return and Ferrari have had very high intense level rivalries between them ever since the beginning which is also resulted in death of the famous written Cena and vertices that Michael Schumacher still regrets and by the way Michael Schumacher is in coma or not in coma but he is not able to drive right now the greatest driver is not able to drive because he had a skiing accident back in 2014 after he retired from Formula 1 and his

In [7]:
# create document chunks
text_splitter = CharacterTextSplitter(separator="/n",
                                      chunk_size=7500,
                                      chunk_overlap=200)

In [8]:
text_chunks = text_splitter.split_documents(documents)

In [9]:
embeddings = HuggingFaceEmbeddings()

  embeddings = HuggingFaceEmbeddings()
  from tqdm.autonotebook import tqdm, trange


In [10]:
knowledge_base = FAISS.from_documents(text_chunks, embeddings)

In [11]:
# retrieval QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=knowledge_base.as_retriever())

In [12]:
Questions =input("Enter Noof Questions:")
Type = input("Enter the type(Mcqs/True or false/Fill in the blanks)")

In [13]:
Prompt = f"Generate {Questions} {Type}"
Prompt


'Generate 3 Mcqs'

In [14]:
response = qa_chain.invoke({"query": Prompt})
print(response["result"])

  Sure, here are three multiple-choice questions based on the context provided:

1. Which team has won the most Formula 1 championships?
A) Scuderia Ferrari
B) McLaren
C) Mercedes AMG
D) Red Bull Racing
2. Who is the greatest driver according to Michael Schumacher?
A) Lewis Hamilton
B) Sebastian Vettel
C) Fernando Alonso
D) Ayrton Senna
3. What happened to Michael Schumacher after he retired from Formula 1 in 2014?
A) He became a race car instructor
B) He suffered a skiing accident and is currently in a coma
C) He started his own racing team
D) He became a professional golfer


In [16]:
from langchain.document_loaders import PyPDFLoader
from langchain.llms import Ollama
from fpdf import FPDF
import os

# Function to save response to a PDF
def save_to_pdf(response, output_file):
    pdf = FPDF()
    pdf.set_auto_page_break(auto=True, margin=15)
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, response)
    pdf.output(output_file)
    print(f"Response saved to {output_file}")

# RAG Setup and PDF Saving
def main():
    # Example response for testing purposes
    # In a real scenario, you will be working with your RAG pipeline to get this result.
    result = response["result"]

    # Get current working directory to save the output PDF
    current_directory = os.getcwd()
    print(f"Saving PDF in directory: {current_directory}")

    x = random.randint(1, 1000)  # Use a larger range to reduce conflicts
    output_file = os.path.join(current_directory, f"{Type}_{Questions}_{x}.pdf")
    # Save result to PDF
    save_to_pdf(result, output_file)

# Run the main function
if __name__ == "__main__":
    main()


Saving PDF in directory: c:\Users\Srujana\OneDrive\Desktop\MODEL
Response saved to c:\Users\Srujana\OneDrive\Desktop\MODEL\Mcqs_6_494.pdf
