In [1]:
!pip install -q llama-cpp-python pypdf sentence-transformers faiss-cpu pdfminer.six tqdm PyPDF2
!wget -q https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/main/llama-2-7b-chat.Q4_K_M.gguf

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.9/67.9 MB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m303.4/303.4 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m26.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m232.6/232.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

# Model Setep



In [2]:
from llama_cpp import Llama
import os

# Initialize the model
llm = Llama(
    model_path="llama-2-7b-chat.Q4_K_M.gguf",
    n_ctx=2048,
    n_threads=4,
    n_gpu_layers=40 if 'CUDA_VISIBLE_DEVICES' in os.environ else 0
)


llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat.Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 

# PDF Processing

In [3]:
import PyPDF2
from io import BytesIO
from pdfminer.high_level import extract_text
from pdfminer.layout import LAParams
from pdfminer.high_level import extract_text_to_fp

def pdf_to_text(file_bytes):
    """Extract text from PDF using multiple methods"""
    text = ""

    # Method 1: Try pdfminer first
    try:
        output = BytesIO()
        laparams = LAParams(line_margin=0.5)
        extract_text_to_fp(BytesIO(file_bytes), output, laparams=laparams)
        text = output.getvalue().decode('utf-8')
    except Exception as e:
        print(f"pdfminer failed: {str(e)}")

    # Method 2: Fallback to PyPDF2
    if len(text) < 100:
        try:
            reader = PyPDF2.PdfReader(BytesIO(file_bytes))
            text = "\n".join([page.extract_text() for page in reader.pages if page.extract_text()])
        except Exception as e:
            print(f"PyPDF2 failed: {str(e)}")

    # Basic cleaning
    text = ' '.join(text.split())
    return text[:50000]  # Limit to 50k characters


# Questions generating

In [4]:
from tqdm import tqdm
import gc

def generate_questions_from_chunk(chunk):
    """Generate questions from a text chunk"""
    prompt = f"""Generate 3 quiz questions from this text:
    {chunk}

    Format each question like this:
    Q) [Question text]
    A) [Option A] B) [Option B] C) [Option C] D) [Option D]
    Answer: [Correct letter]
    Explanation: [Brief explanation]
    ---
    """

    try:
        response = llm.create_completion(
            prompt=prompt,
            max_tokens=1000,
            temperature=0.7,
            stop=["Q)"]
        )
        return response['choices'][0]['text']
    except Exception as e:
        print(f"Question generation failed: {str(e)}")
        return ""

def generate_quiz(text, num_questions=10):
    """Main quiz generation function"""
    if not text:
        return []

    chunk_size = 1500
    questions = []

    # Process text in chunks
    for i in tqdm(range(0, len(text), chunk_size), desc="Processing text"):
        chunk = text[i:i+chunk_size]
        if not chunk.strip():
            continue

        generated = generate_questions_from_chunk(chunk)
        if generated:
            questions.extend([q.strip() for q in generated.split('---') if q.strip()])

        if len(questions) >= num_questions:
            break

        gc.collect()  # Help with memory management

    return questions[:num_questions]

# Main Application

In [5]:
from google.colab import files
import time

class QuizGenerator:
    def __init__(self):
        self.llm = llm
        self.current_text = ""
        self.questions = []

    def upload_and_process(self):
        """Handle PDF upload and processing"""
        print("📤 Upload a PDF file:")
        uploaded = files.upload()

        if not uploaded:
            print("No file uploaded!")
            return False

        file_name = next(iter(uploaded))
        print(f"Processing {file_name}...")

        # Extract text
        self.current_text = pdf_to_text(uploaded[file_name])
        if not self.current_text:
            print("Failed to extract text from PDF")
            return False

        # Generate questions
        print("Generating questions...")
        self.questions = generate_quiz(self.current_text)
        return True

    def show_questions(self):
        """Display generated questions"""
        if not self.questions:
            print("No questions generated yet")
            return

        print("\nGenerated Questions:")
        for i, q in enumerate(self.questions, 1):
            print(f"\nQuestion {i}:")
            print(q)
            print("-" * 50)

    def chat_about_content(self):
        """Chat about the PDF content"""
        if not self.current_text:
            print("No content available - upload a PDF first")
            return

        print("\nChat about the content (type 'quit' to exit):")
        while True:
            user_input = input("\nYour question: ").strip()
            if user_input.lower() == 'quit':
                break

            try:
                response = self.llm.create_chat_completion(
                    messages=[{
                        "role": "system",
                        "content": "You're an assistant helping with document understanding."
                    },{
                        "role": "user",
                        "content": f"Document content: {self.current_text[:3000]}\n\nQuestion: {user_input}"
                    }],
                    max_tokens=500,
                    temperature=0.7
                )
                print("\nAssistant:", response['choices'][0]['message']['content'])
            except Exception as e:
                print(f"Error: {str(e)}")


# Main Execution

In [6]:
if __name__ == "__main__":
    generator = QuizGenerator()

    if generator.upload_and_process():
        generator.show_questions()
        generator.chat_about_content()

📤 Upload a PDF file:


Saving GC22BEDE250- Operation Dimension of Education.pdf to GC22BEDE250- Operation Dimension of Education.pdf
Processing GC22BEDE250- Operation Dimension of Education.pdf...
Generating questions...


Processing text:   0%|          | 0/11 [00:00<?, ?it/s]llama_perf_context_print:        load time =  198151.62 ms
llama_perf_context_print: prompt eval time =  198150.65 ms /   534 tokens (  371.07 ms per token,     2.69 tokens per second)
llama_perf_context_print:        eval time =     841.20 ms /     1 runs   (  841.20 ms per token,     1.19 tokens per second)
llama_perf_context_print:       total time =  198994.49 ms /   535 tokens
Processing text:   9%|▉         | 1/11 [03:19<33:11, 199.11s/it]Llama.generate: 13 prefix-match hit, remaining 366 prompt tokens to eval
llama_perf_context_print:        load time =  198151.62 ms
llama_perf_context_print: prompt eval time =  123254.15 ms /   366 tokens (  336.76 ms per token,     2.97 tokens per second)
llama_perf_context_print:        eval time =    2023.78 ms /     3 runs   (  674.59 ms per token,     1.48 tokens per second)
llama_perf_context_print:       total time =  125282.52 ms /   369 tokens
Processing text:  18%|█▊        | 2/1


Generated Questions:

Question 1:
1.
--------------------------------------------------

Question 2:
Q1) Which of the following is an example of a macro-level policy that can regulate education systems?
    A) Compulsory education legislation
    B) Quality assurance structures
    C) Accreditation rules
    D) All of the above

Answer: D) All of the above

Explanation: Macro-level policies can include compulsory education legislation, quality assurance structures, and accreditation rules, which are all examples of regulating education systems.

Q2) Which of the following macro-level choices determines the patterns of distribution of financial resources across and between regions, education levels, and special programs?
    A) Allocations of education's proportion of national budgets
    B) Sub-national governments' funding formulas
    C) Investment priorities (e.g., STEM education versus vocational training)
    D) All of the above

Answer: A) Allocations of education's proportion o