# Project 1: Read a PDF File and Save Content to Text
**Objective:**  
Read a PDF file named *"Chemistry Questions.pdf"* from the `/content` folder, extract the text, and write it to a file named *"output.txt"* in the same folder.  
Include proper error handling for:
- Missing folder
- Missing PDF file
- Writing issues

In [3]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [28]:
import os
from PyPDF2 import PdfReader

In [29]:
folder_path = './content'
pdf_file = 'Chemistry Questions.pdf'
output_file = 'output.txt'

pdf_path = os.path.join(folder_path, pdf_file)
output_path = os.path.join(folder_path, output_file)

In [30]:
if not os.path.exists(folder_path):
    print(f"Folder '{folder_path}' not found. Creating...")
    os.makedirs(folder_path)
else:
    print(f"Folder '{folder_path}' exists")

Folder './content' exists


In [31]:
try:
    if not os.path.exists(pdf_path):
        raise FileNotFoundError(f"'{pdf_file}' not found in '{folder_path}'")

    pdf_reader = PdfReader(pdf_path)
    pdf_text = ''

    for page in pdf_reader.pages:
        pdf_text += page.extract_text() + '\n'

    print("Pdf file read successfully!")

except FileNotFoundError as e:
    print("Error:", e)
    pdf_text = ''

Pdf file read successfully!


In [32]:
try:
    if pdf_text.strip() == '':
        raise ValueError("No content to write to output file!")

    f = open(output_path, 'w', encoding='utf-8')
    f.write(pdf_text)

    print(f"Text written successfully to '{output_file}'")

except Exception as e:
    print("Error writing to file:", e)
    

Text written successfully to 'output.txt'


# Project 3: Read Content from a Particular Page in a PDF

In [34]:
try:
    page_num = (int(input("Enter page number you want to read:")))
    if page_num < 1:
        raise ValueError("Page number must be 1 or greater")

except ValueError as e:
    print(f"Invalid input: {e}")
    page_num = None

Enter page number you want to read: 1


In [35]:
if page_num:
    try:
        total_pages = len(pdf_reader.pages)
    
        if page_num > total_pages:
            raise IndexError(f"Page number out of range! Pdf has only '{total_pages}' pages")
    
        page = pdf_reader.pages[page_num - 1]
        page_text = page.extract_text()
    
        print(f"Successfully extracted text form page '{page_num}'")

    except (FileNotFoundError, IndexError) as e:
        print(f"Error: {e}")
        page_text = ''
    except Exception as e:
        print(f"Unexpected error: {e}")
        page_text = ''
    

Successfully extracted text form page '1'


In [37]:
try:
    if not page_text.strip():
        raise ValueError("No text extracted from the page.")

    f = open(output_path, 'w', encoding='utf-8') 
    f.write(page_text)

    print(f"Page {page_num} content written successfully to '{output_file}'.")

except Exception as e:
    print(f"Error writing to output file: {e}")


Page 1 content written successfully to 'output.txt'.


# Project 4: Extract PDF Content Using Regular Expression from a Config File

In [5]:
import os
import re
from PyPDF2 import PdfReader

In [7]:
folder_path = './content'
pdf_file = 'Chemistry Questions.pdf'
config_file = 'config.txt'
output_file = 'output.txt'

pdf_path = os.path.join(folder_path, pdf_file)
config_path = os.path.join(folder_path, config_file)
output_path = os.path.join(folder_path, output_file)

In [8]:
if not os.path.exists(folder_path):
    print(f"'{folder_path}' not found. Creating folder...")
    os.makedirs(folder_path)
else:
    print(f"Folder '{folder_path}' exists.")

Folder './content' exists.


In [9]:
try:
    page_num = int(input("Enter page number to read: "))
    if page_num < 1:
        raise ValueError("Page number must be 1 or greater.")
except ValueError as e:
    print(f" Invalid input: {e}")
    page_num = None

Enter page number to read:  10


In [10]:
page_text = ''
if page_num:
    try:
        if not os.path.exists(pdf_path):
            raise FileNotFoundError(f"'{pdf_file}' not found in '{folder_path}'.")

        pdf_reader = PdfReader(pdf_path)
        total_pages = len(pdf_reader.pages)

        if page_num > total_pages:
            raise IndexError(f"Page number out of range. PDF has only {total_pages} pages.")

        page = pdf_reader.pages[page_num - 1]
        page_text = page.extract_text() or ''
        print(f" Extracted text from page {page_num}.")

    except (FileNotFoundError, IndexError) as e:
        print(f" Error: {e}")
        page_text = ''

 Extracted text from page 10.


In [19]:
regex_pattern = None

try:
    if not os.path.exists(config_path):
        raise FileNotFoundError("Configuration file 'config.txt' not found.")

    f = open(config_path, 'r', encoding='utf-8')
    for line in f:
        if line.strip().startswith("regex"):
            key, value = line.split('=', 1)
            regex_pattern = value.strip()
            break

    if not regex_pattern:
        raise KeyError("No 'regex' key found in configuration file.")

    print(f" Loaded regex pattern: {regex_pattern}")

except FileNotFoundError as e:
    print(f" {e}")
except KeyError as e:
    print(f" {e}")
except Exception as e:
    print(f" Error reading config: {e}")

 Loaded regex pattern: \b[A-Z][a-z]+\b


In [13]:
matches = []
if regex_pattern and page_text:
    try:
        matches = re.findall(regex_pattern, page_text)
        if matches:
            print(f" Found {len(matches)} matches.")
        else:
            print(" No matches found for the given regex.")
    except re.error as e:
        print(f" Invalid regex pattern: {e}")

 Found 27 matches.


In [20]:
try:
    if not matches:
        raise ValueError("No data to write to output file.")

    f = open(output_path, 'w', encoding='utf-8')
    f.write('\n'.join(matches))

    print(f" Extracted matches written successfully to '{output_file}'.")

except Exception as e:
    print(f" Error writing to file: {e}")

 Extracted matches written successfully to 'output.txt'.


# Project 5: Store Extracted Questions in MySQL

In [29]:
!pip install pyodbc



In [47]:
import os
import re
import pyodbc
from PyPDF2 import PdfReader

In [73]:
folder_path = './content'
pdf_file = 'Chemistry Questions.pdf'
config_file = 'config.txt'

pdf_path = os.path.join(folder_path, pdf_file)
config_path = os.path.join(folder_path, config_file)

In [74]:
if not os.path.exists(folder_path):
    print(f"'{folder_path}' not found. Creating folder...")
    os.makedirs(folder_path)
else:
    print(f"Folder '{folder_path}' exists.")

Folder './content' exists.


In [75]:
regex_pattern = None

try:
    if not os.path.exists(config_path):
        raise FileNotFoundError("Configuration file not found.")

    f = open(config_path, 'r', encoding='utf-8')
    for line in f:
        if line.strip().startswith("regex"):
            key, value = line.split('=', 1)
            regex_pattern = value.strip()
            break

    if not regex_pattern:
        raise KeyError("No 'regex' key found in configuration file.")

    print(f" Loaded regex: {regex_pattern}")

except Exception as e:
    print(f" Config error: {e}")

 Loaded regex: \d+\.\s*(.*?\?)


In [76]:
extracted_questions = []

try:
    if not os.path.exists(pdf_path):
        raise FileNotFoundError(f"PDF file '{pdf_file}' not found.")

    reader = PdfReader(pdf_path)
    all_text = ""
    for page in reader.pages:
        all_text += page.extract_text() + "\n"

    if regex_pattern:
        extracted_questions = re.findall(regex_pattern, all_text, re.MULTILINE | re.DOTALL)

    if extracted_questions:
        print(f" Extracted {len(extracted_questions)} questions.")
    else:
        print(" No matches found for the regex pattern.")

except Exception as e:
    print(f" PDF reading or extraction error: {e}")

 Extracted 46 questions.


In [77]:
# --- Connect to SQL Server ---
try:
    conn = pyodbc.connect(
        'DRIVER={ODBC Driver 17 for SQL Server};'
        'SERVER=localhost\\SQLEXPRESS;'
        'DATABASE=python_assignment;'
        'Trusted_Connection=yes;'
    )
    cursor = conn.cursor()
    print(" Connected to SQL Server successfully!")

    # --- Create table if not exists ---
    cursor.execute('''
        IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='questions' AND xtype='U')
        CREATE TABLE questions (
            id INT IDENTITY(1,1) PRIMARY KEY,
            subject_name NVARCHAR(100),
            chapter_name NVARCHAR(100),
            question_text NVARCHAR(MAX),
            answer_options NVARCHAR(MAX)
        )
    ''')
    conn.commit()

    # --- Insert extracted data ---
    for q in extracted_questions:
        cursor.execute('''
            INSERT INTO questions (subject_name, chapter_name, question_text, answer_options)
            VALUES (?, ?, ?, ?)
        ''', ("Chemistry", "Chapter 1", q, "N/A"))

    conn.commit()
    print(f" Inserted {len(extracted_questions)} questions successfully!")

except pyodbc.Error as e:
    print(f" Database connection or SQL error: {e}")

finally:
    if 'conn' in locals():
        conn.close()

 Connected to SQL Server successfully!
 Inserted 46 questions successfully!


# Project 6: Load All Questions from a Chapter

In [82]:
import pyodbc

try:
    conn = pyodbc.connect(
        'DRIVER={ODBC Driver 17 for SQL Server};'
        'SERVER=localhost\\SQLEXPRESS;'
        'DATABASE=python_assignment;'
        'Trusted_Connection=yes;'
    )
    cursor = conn.cursor()
    print(" Connected to SQL Server successfully!")

except pyodbc.Error as e:
    print(f" Database connection error: {e}")
    conn = None

✅ Connected to SQL Server successfully!


In [84]:
if conn:
    chapter_name = input("Enter chapter name: ").strip()

    # --- Validate input ---
    if not chapter_name:
        print(" Error: Chapter name cannot be empty.")
    else:
        try:
            # Check if chapter exists
            cursor.execute("SELECT question_text FROM questions WHERE chapter_name = ?", (chapter_name,))
            rows = cursor.fetchall()

            if not rows:
                print(f" No questions found for chapter: '{chapter_name}'")
            else:
                print(f"\n Found {len(rows)} question(s) in '{chapter_name}':\n")
                for i, row in enumerate(rows, start=1):
                    print(f"Q{i}. {row[0].strip()}")

        except pyodbc.Error as e:
            print(f" SQL query error: {e}")

        finally:
            conn.close()


Enter chapter name:  


 Error: Chapter name cannot be empty.


# Project 8: Support Multiple Question Types Using OOP and Inheritance

In [85]:
import pyodbc
from abc import ABC, abstractmethod

# --- Database Connection Setup ---
def get_db_connection():
    try:
        conn = pyodbc.connect(
            'DRIVER={ODBC Driver 17 for SQL Server};'
            'SERVER=localhost\\SQLEXPRESS;'
            'DATABASE=python_assignment;'
            'Trusted_Connection=yes;'
        )
        print(" Connected to SQL Server.")
        return conn
    except pyodbc.Error as e:
        print(f" Database connection error: {e}")
        return None

In [86]:
# --- Abstract Base Class ---
class Question(ABC):
    def __init__(self, subject_name, chapter_name, question_text):
        self.subject_name = subject_name
        self.chapter_name = chapter_name
        self.question_text = question_text

    @abstractmethod
    def store(self, cursor):
        """Each subclass will implement its own storage logic."""
        pass

In [87]:
# --- Objective Question ---
class ObjectiveQuestion(Question):
    def __init__(self, subject_name, chapter_name, question_text, options):
        super().__init__(subject_name, chapter_name, question_text)
        self.options = options  # list of options

    def store(self, cursor):
        options_text = "\n".join(self.options)
        cursor.execute('''
            INSERT INTO questions (subject_name, chapter_name, question_text, answer_options)
            VALUES (?, ?, ?, ?)
        ''', (self.subject_name, self.chapter_name, self.question_text, options_text))
        print(f"✅ Stored Objective Question: {self.question_text}")


# --- True/False Question ---
class TrueFalseQuestion(Question):
    def __init__(self, subject_name, chapter_name, question_text, correct_answer):
        super().__init__(subject_name, chapter_name, question_text)
        self.correct_answer = correct_answer

    def store(self, cursor):
        cursor.execute('''
            INSERT INTO questions (subject_name, chapter_name, question_text, answer_options)
            VALUES (?, ?, ?, ?)
        ''', (self.subject_name, self.chapter_name, self.question_text, f"True/False\nAnswer: {self.correct_answer}"))
        print(f"✅ Stored True/False Question: {self.question_text}")


# --- Subjective Question ---
class SubjectiveQuestion(Question):
    def __init__(self, subject_name, chapter_name, question_text, sample_answer):
        super().__init__(subject_name, chapter_name, question_text)
        self.sample_answer = sample_answer

    def store(self, cursor):
        cursor.execute('''
            INSERT INTO questions (subject_name, chapter_name, question_text, answer_options)
            VALUES (?, ?, ?, ?)
        ''', (self.subject_name, self.chapter_name, self.question_text, f"Answer: {self.sample_answer}"))
        print(f"✅ Stored Subjective Question: {self.question_text}")


In [88]:
def add_question():
    conn = get_db_connection()
    if not conn:
        return

    cursor = conn.cursor()

    # Ensure table exists
    cursor.execute('''
        IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='questions' AND xtype='U')
        CREATE TABLE questions (
            id INT IDENTITY(1,1) PRIMARY KEY,
            subject_name NVARCHAR(100),
            chapter_name NVARCHAR(100),
            question_text NVARCHAR(MAX),
            answer_options NVARCHAR(MAX)
        )
    ''')
    conn.commit()

    try:
        print("\n--- Add a Question ---")
        qtype = input("Enter question type (objective / truefalse / subjective): ").strip().lower()
        subject = input("Enter subject name: ").strip()
        chapter = input("Enter chapter name: ").strip()
        question_text = input("Enter question text: ").strip()

        if qtype == "objective":
            options = []
            print("Enter multiple-choice options (type 'done' to finish):")
            while True:
                opt = input("Option: ").strip()
                if opt.lower() == "done":
                    break
                options.append(opt)
            q = ObjectiveQuestion(subject, chapter, question_text, options)

        elif qtype == "truefalse":
            correct = input("Enter correct answer (True/False): ").strip().capitalize()
            q = TrueFalseQuestion(subject, chapter, question_text, correct)

        elif qtype == "subjective":
            sample_answer = input("Enter sample answer: ").strip()
            q = SubjectiveQuestion(subject, chapter, question_text, sample_answer)

        else:
            print(" Invalid question type.")
            return

        q.store(cursor)
        conn.commit()
        print(" Question saved successfully!")

    except Exception as e:
        print(f" Error: {e}")

    finally:
        conn.close()


In [89]:
add_question()

 Connected to SQL Server.

--- Add a Question ---


Enter question type (objective / truefalse / subjective):  objective
Enter subject name:  Chemistry
Enter chapter name:  Basics of Chemistry
Enter question text:  What is the SI unit of mass


Enter multiple-choice options (type 'done' to finish):


Option:  A
Option:  B
Option:  done


✅ Stored Objective Question: What is the SI unit of mass
 Question saved successfully!
