# Informe CEV (v.2) - Page 1

### Import `PyMuPDF`

In [None]:
import pandas as pd
import fitz  # PyMuPDF

### Load the PDF

In [None]:
pdf_file_path = '/mnt/c/Users/rober/OneDrive/8_DEVELOPMENT/cev-database-reports/data/raw/9/pdf_files/9_195_1_aa1400a4dbe6c3177ff044edce8b5585fb846445.pdf'
pdf_file_path = './1_5_1_c9cbda772da622dd1667027eef1b862a7d7209c8.pdf'
pdf_file_path = './1_113_2_0da232073435d32084842b8fd244ecdf37be7c25.pdf'
pdf_file_path = './Informe_CEV_v2.pdf'
pdf_report = fitz.open(pdf_file_path)
page_number = 0  # Page number (starting from 0)
page = pdf_report[page_number]


## Pagina 1

In [None]:
def extract_text_from_area(page, area):
    """
    Extract text from a specific area of a PDF page.

    Args:
    - page (fitz.Page): Page object from which to extract text.
    - area (tuple): Tuple containing (x1, y1, x2, y2) coordinates of the area to extract text from.

    Returns:
    - extracted_text (str): Text extracted from the specified area.
    """
    extracted_text = ""
    try:
        # Clean the page contents to avoid misplaced item insertions
        page.clean_contents()

        # Pdf Report Dimensions
        report_width = 215.9  # mm
        report_height = 330.0  # mm

        # Get page dimensions
        width = page.rect.width
        height = page.rect.height

        # Normalize the coordinates
        x1, y1, x2, y2 = area
        rx1, ry1, rx2, ry2 = x1 / report_width, y1 / report_height, x2 / report_width, y2 / report_height

        # Define the rectangle area to extract text from
        rect = fitz.Rect(rx1 * width, ry1 * height, rx2 * width, ry2 * height)

        # Extract text from the specified area
        extracted_text = page.get_textbox(rect)
    except Exception as e:
        print(f"Error: {e}")

    return extracted_text

### Seccion 1: Datos vivienda y Evaluación

In [None]:
area_coordinates = (8.3, 10.3, 165.6, 65.1)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
datos_vivienda = extracted_text.splitlines()[-8:]
datos_vivienda

In [None]:
index = ['tipo_evaluacion', 'codigo_evaluacion', 'region', 'comuna', 'direccion', 'rol_vivienda_proyecto', 'tipo_vivienda', 'superficie_interior_util_m2']

### Convert list to dictionary

In [None]:
_dict = dict(zip(index, datos_vivienda))
_dict

In [None]:
_dict['tipo_evaluacion'] = _dict['tipo_evaluacion'].title()
_dict['superficie_interior_util_m2'] = float(_dict['superficie_interior_util_m2'].replace(',', '.'))


In [None]:
# Convert dictionary to DataFrame
df = pd.DataFrame.from_dict(_dict, orient='index').T
df.T

### Seccion 2: Letra de eﬁciencia energética - Diseño de arquitectura

In [None]:
area_coordinates = (5.6, 78.6, 165.8, 191.3)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
porcentaje_ahorro_list = extracted_text.splitlines()
porcentaje_ahorro_list

In [None]:
porcentaje_ahorro = None
for item in porcentaje_ahorro_list:
    if item.replace('-', '').isdigit():
        porcentaje_ahorro = int(item)
        break
df['porcentaje_ahorro'] = porcentaje_ahorro
df.T

In [None]:
def _from_procentaje_ahorro_to_letra(porcentaje_ahorro: float) -> str:
    """
    Convert a savings percentage to a corresponding letter grade.

    Args:
    - porcentaje_ahorro (float): The savings percentage value, should be between -1 and 100.

    Returns:
    - letra (str): The corresponding letter grade based on the savings percentage.
    """
    if porcentaje_ahorro > 0.85 and porcentaje_ahorro <= 100:
        letra = 'A+'
    elif porcentaje_ahorro > 0.7 and porcentaje_ahorro <= 0.85:
        letra = 'A'
    elif porcentaje_ahorro > 0.55 and porcentaje_ahorro <= 0.7:
        letra = 'B'
    elif porcentaje_ahorro > 0.4 and porcentaje_ahorro <= 0.55:
        letra = 'C'
    elif porcentaje_ahorro > 0.2 and porcentaje_ahorro <= 0.4:
        letra = 'D'
    elif porcentaje_ahorro > -0.1 and porcentaje_ahorro <= 0.20:        
        letra = 'E'
    elif porcentaje_ahorro > -0.35 and porcentaje_ahorro <= -0.1:        
        letra = 'F'
    elif porcentaje_ahorro <= -0.35:
        letra = 'G'
    else:
        letra = None
    return letra


In [None]:
df['letra_eficiencia_energetica_dem'] = _from_procentaje_ahorro_to_letra(porcentaje_ahorro/100)
df.T

### Section 3: Requerimientos anuales de energía para calefacción y enfriamiento

### Subsection 1: Demanda energética para calefacción

In [None]:
area_coordinates = (15.6, 220.0, 73.0, 230.0)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
demanda_calefaccion_kwh_m2_ano = float(extracted_text.splitlines()[-1].replace(',', '.'))
df['demanda_calefaccion_kwh_m2_ano'] = demanda_calefaccion_kwh_m2_ano
df.T

### Subsection 2: Demanda energética para enfriamiento

In [None]:
area_coordinates = (90.0, 220.0, 151.5, 230.0)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
demanda_enfriamiento_kwh_m2_ano = float(extracted_text.splitlines()[-1].replace(',', '.'))
df['demanda_enfriamiento_kwh_m2_ano'] = demanda_enfriamiento_kwh_m2_ano
df.T

### Subsection 3: Demanda energética total

In [None]:
area_coordinates = (167.0, 225.0, 209.0, 245.0)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
demanda_total_kwh_m2_ano = float(extracted_text.splitlines()[-1].replace(',', '.'))
df['demanda_total_kwh_m2_ano'] = demanda_total_kwh_m2_ano
df.T

### Subsection 4: Fecha de Emision

In [None]:
area_coordinates = (35.5, 247.5, 57.0, 255.0)  # Coordinates of the area to extract text from: (x1, y1, x2, y2)
extracted_text = extract_text_from_area(page, area_coordinates)
emitida_el = extracted_text.splitlines()[-1]
emitida_el

In [None]:
df['emitida_el'] = emitida_el
df.T

# END