In [1]:
import pdfplumber

def extract_text_from_pdf(pdf_path: str) -> str:
    """
    Extracts and returns the full text from a PDF document using pdfplumber.
    
    Args:
        pdf_path (str): The path to the PDF file.

    Returns:
        str: The complete extracted text from the PDF.
    """
    full_text = ""
    
    try:
        with pdfplumber.open(pdf_path) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"
    except Exception as e:
        print(f"Error reading PDF file: {e}")

    return full_text


In [3]:
import psycopg2

db_config = {
    "dbname": "postgres",
    "user": "postgres",
    "password": "Radhekrishna@24",
    "host": "localhost",
    "port": 5432
}

def create_table():
    create_query = """
    CREATE TABLE IF NOT EXISTS guidelines (
        id SERIAL PRIMARY KEY,
        prog_number VARCHAR(100),
        text TEXT,
        date VARCHAR(10)  -- stores in dd-mm-yy format
    );
    """
    try:
        with psycopg2.connect(**db_config) as conn:
            with conn.cursor() as cur:
                cur.execute(create_query)
                conn.commit()
                print("✅ Table created successfully.")
    except Exception as e:
        print(f"❌ Error creating table: {e}")

# Call this once to create the table
if __name__ == "__main__":
    create_table()


✅ Table created successfully.


In [6]:
import requests
import tempfile
import pdfplumber

def extract_pdf_text_from_url(pdf_url: str) -> str:
    """
    Downloads the PDF from URL and extracts full text using pdfplumber.
    """
    try:
        response = requests.get(pdf_url)
        response.raise_for_status()
        
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
            tmp_file.write(response.content)
            tmp_file_path = tmp_file.name

        full_text = ""
        with pdfplumber.open(tmp_file_path) as pdf:
            for page in pdf.pages:
                text = page.extract_text()
                if text:
                    full_text += text + "\n"
        
        return full_text.strip()
    except Exception as e:
        raise RuntimeError(f"Failed to extract PDF: {e}")
    
data=extract_pdf_text_from_url("https://beeindia.gov.in/sites/default/files/press_releases/Recommended%20Guidelines.pdf")
print(data)

Bureau of Energy Efficiency, Ministry of Power
Government of India
Recommended Guidelines
Subject: Energy Conservation in Building Space Cooling through recommended
optimum temperature setting
Bureau of Energy Efficiency (BEE) under the aegis of Ministry of Power (MoP)
implementing various energy efficiency and conservation schemes stipulated under Energy
Conservation Act 2001, with the primary objective of reducing the energy intensity of Indian
economy. Bureau in consultation with Ministry of Power, also develops policies and strategies
that emphasize self-regulation and market principles to achieve objectives of energy conservation
and energy savings. As per clause 14 (t) of the EC Act 2001, Central Govt. shall take “all measures
necessary to create awareness and disseminate information for efficient use of energy and its
conservation”.
With the objective to reduce energy intensity, in the area of space cooling, BEE initiated
Star Labeling of Air Conditioners to improve efficiency i