In [8]:
# Install necessary libraries
!pip install PyMuPDF pytesseract pillow google-generativeai

import os
import pytesseract
from PIL import Image
import fitz  # PyMuPDF for PDF handling
import google.generativeai as genai

# Configure API key
GOOGLE_AI_API_KEY = ''  # Replace with your actual Google AI API key
genai.configure(api_key=GOOGLE_AI_API_KEY)

# Dictionary to store extracted text for each user
user_text_data = {}

# OCR function using Tesseract
def extract_text_from_image(image_path):
    try:
        text = pytesseract.image_to_string(Image.open(image_path))
        return text
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Extract text from PDF (PyMuPDF)
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# Function to call Google Generative AI API to answer questions based on extracted text
def ask_google_ai_question(question, context_text):
    prompt = f"Context: {context_text}\n\nQuestion: {question}\nAnswer:"
    # Create GenerativeModel instance
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)
    return response.text

# Function to upload a file and extract text
def upload_and_extract():
    from google.colab import files
    uploaded = files.upload()  # Allow users to upload files
    for filename in uploaded.keys():
        if filename.endswith('.pdf'):
            extracted_text = extract_text_from_pdf(filename)
        elif filename.endswith(('.png', '.jpg', '.jpeg')):
            extracted_text = extract_text_from_image(filename)
        else:
            extracted_text = "Unsupported file type. Please upload a PDF or an image."

        # Store the extracted text
        if extracted_text:
            user_text_data[0] = extracted_text  # Use a single user ID for simplicity
            print("Text has been extracted from the document.")
            print(extracted_text)
        else:
            print("Failed to extract text. Please try again with a different file.")

# Start chatting
def chat():
    print("Welcome to the Chatbot! Type 'exit' to stop chatting.")
    while True:
        if not user_text_data:
            print("Please upload a document or image first to extract text.")
            upload_and_extract()

        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Goodbye!")
            break

        context_text = user_text_data.get(0, "")
        if context_text:
            answer = ask_google_ai_question(user_message, context_text)
            print(f"Bot: {answer}")
        else:
            print("No context available. Please upload a document or image first.")

# Run the chat function
chat()


Welcome to the Chatbot! Type 'exit' to stop chatting.
Please upload a document or image first to extract text.


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Over the past decade or so, health
and education indicators have come
to be widely used along with income
as a measure of development. For
instance, Human Development
Report published by UNDP compares
countries based on the educational
levels of the people, their health
status and per capita income. It
would be interesting to look at
certain relevant data regarding India
and its neighbours from Human
Development Report 2006.
TABLE 1.6 SOME DATA REGARDING INDIA AND ITS NEIGHBOURS
FOR 2004
Country
Per Capita
Life
Literacy rate
Gross enrolment
HDI rank in
Income
expectancy
for 15+ yrs
ratio for three
the world
 in US$
at birth
population
levels
Sri Lanka
4390
74
91
69
93
India
3139
64
61
60
126
Myanmar
1027
61
90
48
130
Pakistan
2225
63
50
35
134
Nepal
1490
62
50
61
138
Bangladesh
1870
63
41
53
137
NOTES
1. HDI stands for Human Development Index. HDI ranks in above table are out of 177 countries in all.
2. Life expectancy at

KeyboardInterrupt: Interrupted by user

In [1]:
pip install transformers PyPDF2 python-docx


Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx, PyPDF2
Successfully installed PyPDF2-3.0.1 python-docx-1.1.2


In [5]:
import random
from transformers import pipeline
from PyPDF2 import PdfReader
import docx

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file_path):
    reader = PdfReader(pdf_file_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

# Function to extract text from a DOCX
def extract_text_from_docx(docx_file_path):
    doc = docx.Document(docx_file_path)
    text = "\n".join([para.text for para in doc.paragraphs])
    return text

# Function to generate questions using NLP
def generate_questions(text, num_questions=5):
    question_generator = pipeline('text2text-generation', model='valhalla/t5-small-qg-hl')
    sentences = text.split('. ')
    random.shuffle(sentences)  # Shuffle sentences to get diverse questions
    selected_text = ". ".join(sentences[:10])  # Select a portion of the text
    input_text = f"generate questions: {selected_text}"

    # Using beam search for generating multiple questions
    questions = question_generator(input_text, max_length=128, num_return_sequences=num_questions, num_beams=num_questions)
    return questions, sentences

# Function to generate multiple choices for MCQs
def generate_mcq_options(correct_answer, sentences, num_choices=4):
    random.shuffle(sentences)  # Shuffle to get random distractors
    choices = [correct_answer]  # Start with the correct answer

    # Add random sentences as distractors
    for sentence in sentences:
        if len(choices) >= num_choices:
            break
        if sentence != correct_answer:
            choices.append(sentence)

    random.shuffle(choices)  # Shuffle the options
    return choices

# Chatbot function
def quiz_bot():
    print("Hello! I'm here to generate quizzes from your document.")

    # Upload file
    file_path = input("Please upload the document file path (PDF/DOCX): ")

    if file_path.endswith(".pdf"):
        text = extract_text_from_pdf(file_path)
    elif file_path.endswith(".docx"):
        text = extract_text_from_docx(file_path)
    else:
        print("Unsupported file format. Please upload a PDF or DOCX file.")
        return

    if text:
        print("\nDocument text extracted successfully!")
        print("\nHere is a portion of the extracted text for reference:\n")
        sentences = text.split('. ')
        print(". ".join(sentences[:10]) + ".\n")  # Print first few sentences as context

        # Generate questions
        num_questions = int(input("How many questions would you like to generate? "))
        questions, all_sentences = generate_questions(text, num_questions)

        print("\nHere are the generated quiz questions with multiple choices:\n")

        for i, question in enumerate(questions, 1):
            question_text = question['generated_text']
            correct_answer = question_text.split('Answer: ')[-1] if 'Answer: ' in question_text else "Not Available"

            # Generate multiple choice options
            mcq_choices = generate_mcq_options(correct_answer, all_sentences)

            # Print question
            print(f"{i}. {question_text}")
            print("Options:")
            for idx, choice in enumerate(mcq_choices):
                print(f"   {chr(65+idx)}. {choice}")
            print()

        # Adding different question types (True/False, Fill-in-the-Blank)
        print("Generating other types of questions:\n")
        for j, sentence in enumerate(all_sentences[:2], num_questions + 1):
            print(f"{j}. True/False: The following statement is true:\n   {sentence.strip()}.")
            print(f"   A. True\n   B. False\n")

        for k, sentence in enumerate(all_sentences[2:4], num_questions + 3):
            words = sentence.split()
            if len(words) > 4:
                missing_word = words[3]
                sentence_fill = sentence.replace(missing_word, "_____", 1)
                print(f"{k}. Fill-in-the-Blank:\n   {sentence_fill}")
                print(f"   Answer: {missing_word}\n")

    else:
        print("Failed to extract text from the document.")

# Run the chatbot
quiz_bot()


Hello! I'm here to generate quizzes from your document.
Please upload the document file path (PDF/DOCX): /content/NCERT-Class-10-Economics.pdf

Document text extracted successfully!

Here is a portion of the extracted text for reference:

~ . · .· .~· .. , ..... · 
~ .. 
•• 11'_ ••• .... 
...... · ..... ... . ,..,.· CONTENTS
Foreword iii
A few introductory words for teachers v
Chapter 1
Chapter 4Chapter 2
Chapter 3
Chapter 5
Suggested Readings 90DEVELOPMENT 2
SECTORS OF THE INDIAN ECONOMY 18
MONEY AND CREDIT 38
GLOBALISATION AND THE INDIAN ECONOMY 54
CONSUMER RIGHTS 74NOTES FOR THE TEACHER
This chapter proposes to discuss the issue of
consumer rights within the context of the ways
markets operate in our country.

How many questions would you like to generate? 3


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.



Here are the generated quiz questions with multiple choices:

1. What is the percentage of employment in the secondary sector?
Options:
   A. After
a year or two, if the group is regularin savings, it becomes eligible for
availing loan from the bank.
Loan is sanctioned in thename of the group and is
meant to create self-
employment opportunitiesfor the members
   B. Out of this Rs 32,000 million was generated
in the organised sector
   C. Not Available
   D. Do you keep the bill carefully?
3

2. What is the percentage of employment in India?
Options:
   A. Not Available
   B. If you
get a job in a far off place, beforeaccepting it you would try to consider
many factors, apart from income,
such as facilities for your family,working atmosphere, or opportunity
to learn
   C. Imagine how much moredifficult it would be if the shoemanufacturer had to directlyexchange shoes for wheat without the
use of money
   D. Insome of these transactions, services
are being exchanged with money

3. What

In [9]:
!pip install PyMuPDF


Collecting PyMuPDF
  Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m40.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF
Successfully installed PyMuPDF-1.24.11


In [7]:
!pip install python-telegram-bot


Collecting python-telegram-bot
  Downloading python_telegram_bot-21.6-py3-none-any.whl.metadata (17 kB)
Downloading python_telegram_bot-21.6-py3-none-any.whl (652 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/652.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m645.1/652.1 kB[0m [31m23.1 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m652.1/652.1 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-telegram-bot
Successfully installed python-telegram-bot-21.6


In [2]:
pip install openai


Collecting openai
  Downloading openai-1.52.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.52.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.9/386.9 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━

In [19]:
!pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.52.0
    Uninstalling openai-1.52.0:
      Successfully uninstalled openai-1.52.0
Successfully installed openai-0.28.0


In [7]:
import google.generativeai as genai

# Configure API key
genai.configure(api_key='AIzaSyBq-dB3v_fW0IM_7boKFIKYMZFZuSafZa4')
# Create GenerativeModel instance
model = genai.GenerativeModel('gemini-1.5-flash')
response = model.generate_content("What is python?")
print(response.text)

Python is a **high-level, interpreted, general-purpose programming language**.  Here's a breakdown of what that means:

**High-Level:** Python is designed to be easy for humans to read and write, focusing on readability and simplicity. It handles a lot of low-level details behind the scenes, making it easier for you to concentrate on the logic of your program.

**Interpreted:** Unlike languages like C or C++, which need to be compiled into machine code before running, Python code is executed line by line by an interpreter. This means you can write and run Python code quickly and easily.

**General-Purpose:** Python is incredibly versatile. It can be used for a wide range of applications, including:

* **Web Development:** Python frameworks like Django and Flask make it a popular choice for building websites and web applications.
* **Data Science and Machine Learning:** Python libraries like NumPy, Pandas, Scikit-learn, and TensorFlow are essential tools for data analysis, visualization

In [4]:
!pip install PyMuPDF pytesseract pillow


Collecting PyMuPDF
  Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF
Successfully installed PyMuPDF-1.24.11


In [2]:
!pip install pytesseract


Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [1]:
# Install necessary libraries
!pip install PyMuPDF pytesseract pillow

import os
import requests
import pytesseract
from PIL import Image
import fitz  # PyMuPDF for PDF handling

# Set Google AI Studio API Key
GOOGLE_AI_API_KEY = ''  # Replace with your actual Google AI API key

# Dictionary to store extracted text for each user
user_text_data = {}
keyword_count = {}
quiz_questions = []  # List to hold quiz questions

# OCR function using Tesseract
def extract_text_from_image(image_path):
    try:
        text = pytesseract.image_to_string(Image.open(image_path))
        return text
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Extract text from PDF (PyMuPDF)
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# Function to call Google AI Studio API to answer questions based on extracted text
def ask_google_ai_question(question, context_text):
    prompt = f"Context: {context_text}\n\nQuestion: {question}\nAnswer:"
    url = "https://your-google-ai-studio-api-endpoint"  # Replace with the actual endpoint

    payload = {
        "prompt": prompt,
        "max_tokens": 150
    }

    headers = {
        "Authorization": f"Bearer {GOOGLE_AI_API_KEY}",
        "Content-Type": "application/json"
    }

    try:
        # Make the request to the Google AI API
        response = requests.post(url, json=payload, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses

        # Get the response text
        answer = response.json().get("choices")[0].get("message").get("content").strip()
        return answer
    except requests.exceptions.HTTPError as e:
        return f"Error fetching answer from Google AI: {str(e)}"
    except Exception as e:
        return f"Error fetching answer from Google AI: {str(e)}"

# Function to upload a file and extract text
def upload_and_extract():
    from google.colab import files
    uploaded = files.upload()  # Allow users to upload files
    for filename in uploaded.keys():
        if filename.endswith('.pdf'):
            extracted_text = extract_text_from_pdf(filename)
        elif filename.endswith(('.png', '.jpg', '.jpeg')):
            extracted_text = extract_text_from_image(filename)
        else:
            extracted_text = "Unsupported file type. Please upload a PDF or an image."

        # Store the extracted text
        if extracted_text:
            user_text_data[0] = extracted_text  # Use a single user ID for simplicity
            print("Text has been extracted from the document.")
        else:
            print("Failed to extract text. Please try again with a different file.")

# Function to handle quiz questions
def handle_quiz():
    global quiz_questions
    if quiz_questions:
        for question in quiz_questions:
            print(question['question'])
            for idx, option in enumerate(question['options']):
                print(f"{idx + 1}. {option}")
            user_answer = input("Your answer (1-4): ")
            if user_answer.isdigit() and int(user_answer) - 1 == question['correct_answer']:
                print("Correct! Here's another question.")
                # Ask another question
                continue
            else:
                print("Incorrect! Here are two more questions.")
                for _ in range(2):  # Ask two more questions
                    if quiz_questions:
                        question = quiz_questions.pop(0)
                        print(question['question'])
                        for idx, option in enumerate(question['options']):
                            print(f"{idx + 1}. {option}")
                        user_answer = input("Your answer (1-4): ")
                        if user_answer.isdigit() and int(user_answer) - 1 == question['correct_answer']:
                            print("Correct!")
                        else:
                            print("Incorrect.")
                break
    else:
        print("No quiz questions available.")

# Start chatting
def chat():
    print("Welcome to the Chatbot! Type 'exit' to stop chatting.")
    while True:
        if not user_text_data:
            print("Please upload a document or image first to extract text.")
            upload_and_extract()

        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Goodbye!")
            break

        context_text = user_text_data.get(0, "")
        if context_text:
            # Track keywords
            for keyword in user_message.split():
                keyword_count[keyword] = keyword_count.get(keyword, 0) + 1
                if keyword_count[keyword] > 2:  # Save keyword if asked multiple times
                    quiz_questions.append({
                        'question': f"What is related to the keyword '{keyword}'?",
                        'options': ["Option 1", "Option 2", "Option 3", "Option 4"],
                        'correct_answer': 0  # Index of the correct answer
                    })
            answer = ask_google_ai_question(user_message, context_text)
            print(f"Bot: {answer}")

            # Handle quiz after answering the question
            handle_quiz()
        else:
            print("No context available. Please upload a document or image first.")

# Run the chat function
chat()


Collecting PyMuPDF
  Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m17.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract, PyMuPDF
Successfully installed PyMuPDF-1.24.11 pytesseract-0.3.13
Welcome to the Chatbot! Type 'exit' to stop chatting.
Please upload a document or image first to extract text.


Saving NCERT-Class-10-Economics.pdf to NCERT-Class-10-Economics.pdf
Text has been extracted from the document.
You: Based on first chapter describe summary and explain average income
Bot: Error fetching answer from Google AI: HTTPSConnectionPool(host='your-google-ai-studio-api-endpoint', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x78d43f6d0a60>: Failed to resolve 'your-google-ai-studio-api-endpoint' ([Errno -2] Name or service not known)"))
No quiz questions available.


KeyboardInterrupt: Interrupted by user

In [1]:
# Install necessary libraries
!pip install PyMuPDF pytesseract pillow google-generativeai

import os
import pytesseract
from PIL import Image
import fitz  # PyMuPDF for PDF handling
import google.generativeai as genai

# Configure API key
GOOGLE_AI_API_KEY = 'AIzaSyBq-dB3v_fW0IM_7boKFIKYMZFZuSafZa4'  # Replace with your actual Google AI API key
genai.configure(api_key=GOOGLE_AI_API_KEY)

# Dictionary to store extracted text for each user
user_text_data = {}
unclear_topics = []  # List to store topics the user is unclear about

# OCR function using Tesseract
def extract_text_from_image(image_path):
    try:
        text = pytesseract.image_to_string(Image.open(image_path))
        return text
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Extract text from PDF (PyMuPDF)
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# Function to call Google Generative AI API to answer questions based on extracted text
def ask_google_ai_question(question, context_text):
    prompt = f"Context: {context_text}\n\nQuestion: {question}\nAnswer:"
    # Create GenerativeModel instance
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)
    return response.text

# Function to call Google AI to generate a quiz question based on the topic
def ask_quiz_question(topic):
    prompt = f"Generate a quiz question about {topic}"
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)
    return response.text

# Function to upload a file and extract text
def upload_and_extract():
    from google.colab import files
    uploaded = files.upload()  # Allow users to upload files
    for filename in uploaded.keys():
        if filename.endswith('.pdf'):
            extracted_text = extract_text_from_pdf(filename)
        elif filename.endswith(('.png', '.jpg', '.jpeg')):
            extracted_text = extract_text_from_image(filename)
        else:
            extracted_text = "Unsupported file type. Please upload a PDF or an image."

        # Store the extracted text
        if extracted_text:
            user_text_data[0] = extracted_text  # Use a single user ID for simplicity
            print("Text has been extracted from the document.")
        else:
            print("Failed to extract text. Please try again with a different file.")

# Start chatting
def chat():
    print("Welcome to the Chatbot! Type 'exit' to stop chatting.")
    while True:
        if not user_text_data:
            print("Please upload a document or image first to extract text.")
            upload_and_extract()

        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Goodbye!")
            break

        context_text = user_text_data.get(0, "")
        if context_text:
            answer = ask_google_ai_question(user_message, context_text)
            print(f"Bot: {answer}")

            # Check if the user is unclear on a particular topic
            if 'unclear' in user_message or 'not sure' in user_message:
                topic = input("Bot: What topic are you unclear about? ")
                unclear_topics.append(topic)
                print(f"Bot: I've noted that you're unclear about {topic}. Let's quiz you on this.")

                # Ask a quiz question based on the unclear topic
                quiz_question = ask_quiz_question(topic)
                print(f"Bot: {quiz_question}")

                user_answer = input("Your answer: ")
                if "correct" in user_answer.lower():  # Placeholder for real validation
                    print("Bot: That's correct! Here's one more question.")
                    second_question = ask_quiz_question(topic)
                    print(f"Bot: {second_question}")
                    user_answer = input("Your answer: ")
                    print("Bot: Great job! We're done for now.")
                else:
                    print("Bot: That's not quite right. Let's revisit this topic later.")
        else:
            print("No context available. Please upload a document or image first.")

# Run the chat function
chat()


Collecting PyMuPDF
  Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading PyMuPDF-1.24.11-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (19.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.6/19.6 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract, PyMuPDF
Successfully installed PyMuPDF-1.24.11 pytesseract-0.3.13
Welcome to the Chatbot! Type 'exit' to stop chatting.
Please upload a document or image first to extract text.


Saving NCERT-Class-10-Economics.pdf to NCERT-Class-10-Economics.pdf
Text has been extracted from the document.
You: Explain about first chapter 
Bot: The first chapter of the textbook you provided is about **Development**.  Here's a breakdown of the key points:

**1. Development Means Different Things to Different People:**

* **Individual Aspirations:**  The chapter begins by emphasizing that development is a subjective concept. Different people have different goals and aspirations. A landless laborer might dream of better wages and education for their children, while a wealthy urban family might prioritize access to luxury goods and foreign travel. 
* **Conflicting Goals:** The chapter also highlights how development goals can conflict. For example, building a dam to provide electricity might displace tribal communities who depend on the land.
* **Beyond Income:** While income is a common measure of development, the chapter argues that non-material things like freedom, equality, and 

KeyboardInterrupt: Interrupted by user

In [None]:
# Install necessary libraries
!pip install PyMuPDF pytesseract pillow google-generativeai

import os
import pytesseract
from PIL import Image
import fitz  # PyMuPDF for PDF handling
import google.generativeai as genai

# Configure API key
GOOGLE_AI_API_KEY = 'AIzaSyBq-dB3v_fW0IM_7boKFIKYMZFZuSafZa4'  # Replace with your actual Google AI API key
genai.configure(api_key=GOOGLE_AI_API_KEY)

# Dictionary to store extracted text for each user
user_text_data = {}
unclear_topics = []  # List to store topics the user is unclear about

# OCR function using Tesseract
def extract_text_from_image(image_path):
    try:
        text = pytesseract.image_to_string(Image.open(image_path))
        return text
    except Exception as e:
        return f"Error processing image: {str(e)}"

# Extract text from PDF (PyMuPDF)
def extract_text_from_pdf(pdf_path):
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

# Function to call Google Generative AI API to answer questions based on extracted text
def ask_google_ai_question(question, context_text):
    prompt = f"Context: {context_text}\n\nQuestion: {question}\nAnswer:"
    # Create GenerativeModel instance
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)
    return response.text

# Function to call Google AI to generate a quiz question based on the topic
def ask_quiz_question(topic):
    prompt = f"Generate a quiz question about {topic}"
    model = genai.GenerativeModel('gemini-1.5-flash')
    response = model.generate_content(prompt)
    return response.text

# Function to provide the correct answer and definition if the user's answer is incorrect
def provide_correct_answer_and_definition(question):
    # Get the correct answer and definition for the question
    correct_answer_prompt = f"What is the correct answer to this question: {question}?"
    definition_prompt = f"Provide the definition for this concept: {question}"

    model = genai.GenerativeModel('gemini-1.5-flash')

    correct_answer_response = model.generate_content(correct_answer_prompt)
    definition_response = model.generate_content(definition_prompt)

    return correct_answer_response.text, definition_response.text

# Function to upload a file and extract text
def upload_and_extract():
    from google.colab import files
    uploaded = files.upload()  # Allow users to upload files
    for filename in uploaded.keys():
        if filename.endswith('.pdf'):
            extracted_text = extract_text_from_pdf(filename)
        elif filename.endswith(('.png', '.jpg', '.jpeg')):
            extracted_text = extract_text_from_image(filename)
        else:
            extracted_text = "Unsupported file type. Please upload a PDF or an image."

        # Store the extracted text
        if extracted_text:
            user_text_data[0] = extracted_text  # Use a single user ID for simplicity
            print("Text has been extracted from the document.")
        else:
            print("Failed to extract text. Please try again with a different file.")

# Start chatting
def chat():
    print("Welcome to the Chatbot! Type 'exit' to stop chatting.")
    while True:
        if not user_text_data:
            print("Please upload a document or image first to extract text.")
            upload_and_extract()

        user_message = input("You: ")
        if user_message.lower() == 'exit':
            print("Goodbye!")
            break

        context_text = user_text_data.get(0, "")
        if context_text:
            answer = ask_google_ai_question(user_message, context_text)
            print(f"Bot: {answer}")

            # Check if the user is unclear on a particular topic
            if 'unclear' in user_message or 'not sure' in user_message:
                topic = input("Bot: What topic are you unclear about? ")
                unclear_topics.append(topic)
                print(f"Bot: I've noted that you're unclear about {topic}. Let's quiz you on this.")

                # Ask a quiz question based on the unclear topic
                quiz_question = ask_quiz_question(topic)
                print(f"Bot: {quiz_question}")

                user_answer = input("Your answer: ")
                if "correct" in user_answer.lower():  # Placeholder for real validation
                    print("Bot: That's correct! Here's one more question.")
                    second_question = ask_quiz_question(topic)
                    print(f"Bot: {second_question}")
                    user_answer = input("Your answer: ")
                    print("Bot: Great job! We're done for now.")
                else:
                    # If the answer is incorrect, provide the correct answer and definition
                    print("Bot: That's not correct. Let me help you.")
                    correct_answer, definition = provide_correct_answer_and_definition(quiz_question)
                    print(f"Bot: The correct answer is: {correct_answer}")
                    print(f"Bot: Here's the definition for the concept: {definition}")
        else:
            print("No context available. Please upload a document or image first.")

# Run the chat function
chat()


Welcome to the Chatbot! Type 'exit' to stop chatting.
Please upload a document or image first to extract text.


Saving NCERT-Class-10-Economics.pdf to NCERT-Class-10-Economics (1).pdf
Text has been extracted from the document.
You: what is average income
Bot: Average income, also known as per capita income, is a measure of the total income of a country divided by its total population. It represents the average amount of income earned by each person in that country. 

You: unclear
Bot: The provided text is a textbook excerpt about consumer rights, and it appears to be missing a question.  Please provide the question you'd like me to answer, and I'll be happy to help! 

Bot: What topic are you unclear about? average income
Bot: I've noted that you're unclear about average income. Let's quiz you on this.
Bot: ## Quiz Question:

**In a certain country, the average income of a household is $50,000. However, the median income is only $35,000. What does this tell us about the distribution of income in this country?**

**A) The income distribution is skewed towards higher earners.**
**B) The income dist

In [None]:
pip install flask transformers pdfplumber moviepy openai


In [None]:
from flask import Flask, request, jsonify, send_file
from transformers import pipeline
from moviepy.editor import ImageClip, concatenate_videoclips
import pdfplumber
import fitz  # PyMuPDF
import openai  # For DALL·E integration (or use an alternative image generator)
import os

app = Flask(__name__)

# Initialize summarization pipeline
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Function to extract text from a PDF document using PyMuPDF
def extract_text_from_pdf(file_path):
    text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Summarize text using transformers (Huggingface)
def generate_summary(text):
    return summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']

# Generate image using DALL·E or similar model via OpenAI API
def generate_image(prompt):
    response = openai.Image.create(
        prompt=prompt,
        n=1,
        size="1024x1024"
    )
    image_url = response['data'][0]['url']
    # Download the image
    image_path = "generated_image.png"
    os.system(f"wget {image_url} -O {image_path}")
    return image_path

# Generate video from images and text using MoviePy
def create_video_from_images(image_paths, text_summary):
    clips = []
    duration_per_image = 5  # seconds per image

    # Generate clips for each image and text
    for img_path in image_paths:
        clip = ImageClip(img_path).set_duration(duration_per_image)
        clips.append(clip)

    # Combine all image clips into a video
    video = concatenate_videoclips(clips, method="compose")
    output_video_path = "generated_video.mp4"
    video.write_videofile(output_video_path, fps=24)

    return output_video_path

# Flask route to upload PDF and generate video
@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({"error": "No file part"}), 400

    file = request.files['file']

    if file.filename == '':
        return jsonify({"error": "No selected file"}), 400

    # Save the uploaded PDF file
    file_path = os.path.join("uploads", file.filename)
    file.save(file_path)

    # Extract text from the PDF
    extracted_text = extract_text_from_pdf(file_path)

    # Generate summary from extracted text
    summary = generate_summary(extracted_text)

    # Generate image from summary (or key points)
    image_path = generate_image(summary)

    # Create a video using the generated image and summary text
    video_path = create_video_from_images([image_path], summary)

    # Return the generated video
    return send_file(video_path, as_attachment=True)

if __name__ == '__main__':
    if not os.path.exists("uploads"):
        os.makedirs("uploads")
    app.run(debug=True)
