In [1]:
# Install necessary libraries
!pip install python-docx openai scikit-learn nltk tiktoken

# Import all required libraries
from docx import Document
from openai import OpenAI
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk
from nltk.tokenize import sent_tokenize
import re
import os
from google.colab import files

# Download NLTK data
nltk.download('punkt')
nltk.download('punkt_tab') # Added to download the missing resource

print("All libraries installed successfully!")

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


All libraries installed successfully!


In [2]:

# SETUP OPENAI API

import getpass

print("Please enter your OpenAI API key:")
api_key = getpass.getpass("API key: ")
client = OpenAI(api_key=api_key)

print("API key configured successfully!")

Please enter your OpenAI API key:
API key: ··········
API key configured successfully!


In [3]:

#  UPLOAD WORD FILE

print("Step 1: Upload your 'English For Today.docx' file")
print("Please click the 'Choose Files' button below and select your Word file...")

# Upload the file
uploaded = files.upload()

# Get the uploaded filename
filename = list(uploaded.keys())[0]
print(f"File uploaded: {filename}")


Step 1: Upload your 'English For Today.docx' file
Please click the 'Choose Files' button below and select your Word file...


Saving English For Today.docx to English For Today.docx
File uploaded: English For Today.docx


In [4]:

# READ WORD FILE CONTENT

print("\nStep 2: Reading textbook content from Word file...")

try:
    doc = Document(filename)
    full_text = ""

    for para in doc.paragraphs:
        if para.text.strip():  # Skip empty paragraphs
            full_text += para.text + "\n"

    print(f"Textbook loaded successfully!")
    print(f"Total characters: {len(full_text)}")
    print(f"Sample content: {full_text[:200]}...")

except Exception as e:
    print(f"Error reading Word file: {e}")
    print("Please make sure you uploaded the correct file.")
    exit()



Step 2: Reading textbook content from Word file...
Textbook loaded successfully!
Total characters: 129111
Sample content: "English For Today - Class 5":
Preface
A child is a great wonder. There is no end to the thinking about his/her world of wonder. A child is a subject of contemplation for educationists, scientists, ph...


In [5]:

# PROCESS TEXT INTO CHUNKS

print("Step 3: Processing textbook into chunks...")

def create_smart_chunks(text, max_chunk_size=300):
    """Split textbook into manageable chunks"""

    # Clean the text
    # Remove markdown
    text = re.sub(r'\*\*', '', text)
    text = re.sub(r'\n+', '\n', text)


     # Remove extra newlines
    # Split into sentences
    sentences = sent_tokenize(text)

    chunks = []
    current_chunk = ""

    for sentence in sentences:
        # If adding this sentence doesn't exceed limit, add it
        if len(current_chunk) + len(sentence) <= max_chunk_size:
            current_chunk += " " + sentence if current_chunk else sentence
        else:
            # If current chunk is not empty, save it
            if current_chunk:
                chunks.append(current_chunk.strip())
            # Start new chunk with current sentence
            current_chunk = sentence

    # Add the last chunk if not empty
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

# Create chunks from textbook
chunks = create_smart_chunks(full_text)
print(f"Created {len(chunks)} text chunks")
print(f"Sample chunk: {chunks[0][:100]}...")

Step 3: Processing textbook into chunks...
Created 479 text chunks
Sample chunk: "English For Today - Class 5":
Preface
A child is a great wonder. There is no end to the thinking ab...


In [6]:

#  CREATE EMBEDDINGS (ONLY ONCE)

print("\n Step 4: Creating embeddings from textbook content...")
print("This may take a few minutes depending on textbook size...")

def create_embeddings(chunks):
    """Create embeddings for all text chunks - RUNS ONLY ONCE"""
    embeddings = []

    for i, chunk in enumerate(chunks):
        try:
            # Create embedding for this chunk
            response = client.embeddings.create(
                input=chunk,
                model="text-embedding-3-small"
            )
            embedding = response.data[0].embedding
            embeddings.append(embedding)

            # Show progress every 10 chunks
            if (i + 1) % 10 == 0:
                print(f"  📦 Processed {i + 1}/{len(chunks)} chunks")

        except Exception as e:
            print(f"Error with chunk {i}: {e}")
            # Add zero vector as fallback
            embeddings.append([0] * 1536)

    return embeddings

#  THIS RUNS ONLY ONCE
embeddings = create_embeddings(chunks)
print(" Embeddings created successfully!")
print(f"Embeddings shape: {np.array(embeddings).shape}")


 Step 4: Creating embeddings from textbook content...
This may take a few minutes depending on textbook size...
Error with chunk 0: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-AdfvF***************************************bggl. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Error with chunk 1: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-AdfvF***************************************bggl. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Error with chunk 2: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-AdfvF***************************************bggl. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Er

In [None]:

# QUESTION ANSWERING SYSTEM

print("\n Step 5: Q&A System is ready!")

def get_relevant_textbook_content(question, top_k=3):
    """Find the most relevant parts of the textbook for a question"""
    try:
        # Create embedding for the question
        response = client.embeddings.create(
            input=question,
            model="text-embedding-3-small"
        )
        question_embedding = response.data[0].embedding

        # Compare with all textbook embeddings
        similarities = cosine_similarity([question_embedding], embeddings)

        # Get top matching chunks
        top_indices = np.argsort(similarities[0])[-top_k:][::-1]
        relevant_chunks = [chunks[i] for i in top_indices]

        return "\n\n".join(relevant_chunks)

    except Exception as e:
        print(f"Error finding relevant content: {e}")
        return chunks[0] if chunks else "No content available"

def answer_question(question):
    """Answer question using the textbook content"""
    try:
        # Get relevant textbook content
        context = get_relevant_textbook_content(question)

        # Create prompt for AI
        prompt = f"""
        You are a helpful tutor for Class 5 students using the "English For Today" textbook.

        Relevant Textbook Content:
        {context}

        Question: {question}

        Instructions:
        1. Answer clearly and simply for a Class 5 student
        2. Use ONLY information from the textbook content above
        3. If the answer isn't in the textbook, say "I cannot find this in your textbook"
        4. Keep answer short and easy to understand
        5. Be friendly and encouraging

        Answer:
        """

        # Get answer from AI
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are a helpful Class 5 English tutor."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=150
        )

        return response.choices[0].message.content

    except Exception as e:
        return f"Error: {e}"


 Step 5: Q&A System is ready!


In [None]:

# TEST THE SYSTEM

print("\nTesting the system with sample questions...")

test_questions = [
    "What should we eat to be healthy?",
    "Tell me about Saikat's family",
    "How many days are in a leap year?",
    "What is the Food Pyramid?",
    "How do we greet people in English?"
]

for i, question in enumerate(test_questions, 1):
    print(f"\n{i}.Question: {question}")
    answer = answer_question(question)
    print(f"Answer: {answer}")


Testing the system with sample questions...

1.Question: What should we eat to be healthy?
Error finding relevant content: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************cc63. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}
Answer: Error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************cc63. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

2.Question: Tell me about Saikat's family
Error finding relevant content: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************cc63. You can find your API key at https://platform.op

In [None]:

#  INTERACTIVE Q&A SESSION
print("\n" + "="*60)
print("ENGLISH FOR TODAY - CLASS 5 Q&A SYSTEM")
print("="*60)
print("I can answer questions from your textbook!")
print("Type 'exit' to quit")
print("Type 'help' for example questions")
print("Type 'sample' to see textbook content")
print("="*60)

example_questions = [
    "What should we eat to stay healthy?",
    "Tell me about the hare and tortoise story",
    "How many months are in a year?",
    "What are the tourist places in Bangladesh?",
    "How do I introduce myself in English?"
]

while True:
    print("\n" + "-"*40)
    user_input = input("\n Your question: ").strip()

    if user_input.lower() == 'exit':
        print(" Goodbye! Happy learning!")
        break

    elif user_input.lower() == 'help':
        print("\n Try these questions:")
        for i, example in enumerate(example_questions, 1):
            print(f"   {i}. {example}")
        continue

    elif user_input.lower() == 'sample':
        print("\nTextbook sample (first 3 chunks):")
        for i, chunk in enumerate(chunks[:3], 1):
            print(f"   {i}. {chunk[:100]}...")
        continue

    elif not user_input:
        print("Please enter a question.")
        continue

    # Answer the question
    print("Searching textbook...")
    answer = answer_question(user_input)
    print(f"\nAnswer: {answer}")

print("\n:Thank you for using the English For Today Q&A System!")


ENGLISH FOR TODAY - CLASS 5 Q&A SYSTEM
I can answer questions from your textbook!
Type 'exit' to quit
Type 'help' for example questions
Type 'sample' to see textbook content

----------------------------------------

 Your question: sakirars father name
Searching textbook...
Error finding relevant content: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************cc63. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

Answer: Error: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-or-v1*************************************************************cc63. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

----------------------------------------

 Your question: exit
 Goodbye! Happy l