In [None]:
"""
PDF String Search Tool

This script provides functionality to search for a specific string within PDF files.
It includes two main functions: one that searches within a single PDF file and another 
that recursively searches through all PDF files in a specified directory (and its subdirectories).

Functions:
- search_in_pdf(file_path, search_string, context=200): 
  Searches for a string in a single PDF file and prints the page number and a snippet of the text around the found string. 
  It accepts a file path to the PDF, the search string, and an optional 'context' parameter 
  that determines the length of the surrounding text snippet.

- search_in_directory(search_string, directory=os.getcwd()): 
  Recursively searches for the string in all PDF files located in a specified directory and its subdirectories. 
  It takes the search string and an optional directory path, defaulting to the current working directory.

Example:
To search for the phrase "Ramana Maharshi" in all PDFs in the current directory:
    search_string = "Ramana Maharshi"
    search_in_directory(search_string)

Dependencies:
- PyPDF2
- os
"""


In [None]:
import os
import PyPDF2

def search_in_pdf(file_path, search_string, context=200):
    """
    Searches for a specific string within a PDF file and prints its occurrence along with surrounding context.

    This function opens a PDF file, iterates through its pages, and looks for the specified search string. 
    Once found, it prints the page number and a snippet of text surrounding the string, 
    with the length of the snippet determined by the 'context' parameter.

    Parameters:
    context (int, optional): The number of characters to include in the snippet around the search string. 
                          
    Returns:
    None: Only outputs the results to the console.
    """
    try:
        with open(file_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for i, page in enumerate(reader.pages):
                text = page.extract_text()
                if text and search_string in text:
                    start_index = text.index(search_string)
                    end_index = start_index + len(search_string)
                    # Print surrounding text with a specified context window
                    snippet_start = max(start_index - context, 0)
                    snippet_end = min(end_index + context, len(text))
                    snippet = text[snippet_start:snippet_end]
                    print(f"'{search_string}' found in {file_path} on page {i + 1}")
                    print(f"Context: {snippet}")
                    break
    except Exception as e:
        print(f"Error reading {file_path}: {e}")

def search_in_directory(search_string, directory=os.getcwd()):
    """
    Recursively searches for a string in all PDF files within a specified directory and its subdirectories.

    This function walks through the given directory and its subdirectories, looking for PDF files.
    When a PDF file is found, it calls the 'search_in_pdf' function to search for the specified string.
    The results are printed to the console.

    Parameters:
    directory (str, optional): Defaults to the current working directory.

    Returns:
    None: Prints the results.
    """
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.lower().endswith('.pdf'):
                search_in_pdf(os.path.join(root, file), search_string)

search_string = "Ramana Maharshi"
search_in_directory(search_string)