# What is Google Lens?
Google Lens is a set of vision-based computing capabilities that can understand what you’re looking at and use that information to copy or translate text, identify plants and animals, explore locales or menus, discover products, find visually similar images and take other useful actions.
# How Google Lens works
Lens compares objects in your picture to other images, and ranks those images based on their similarity and relevance to the objects in the original picture. Lens also uses its understanding of the objects in your picture to find other relevant results from the web. Lens may also use other helpful signals, such as words, language and other metadata on the image’s host site, to determine ranking and relevance.

# Cloud Vision API
Cloud Vision API allows developers to easily integrate vision detection features within applications, including image labeling, face and landmark detection, optical character recognition (OCR), and tagging of explicit content.

https://cloud.google.com/vision?hl=en#extract-insights-from-images-documents-and-videos



In [28]:
from google.cloud import vision_v1
import os

# Set environment variable for Google Cloud credentials
# You can download the JSON key file from your service account and set the path.
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "keys/complete-tube-421007-208a4862c992.json"

def detect_text(image_path):
    """Detects text in an image using the Google Cloud Vision API."""

    client = vision_v1.ImageAnnotatorClient()

    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision_v1.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations

    # Print detected text and bounding boxes (optional)
    if texts:
        print("Detected Text:")
        for text in texts:
            print(f'"{text.description}"')  # The detected text
            # Print bounding box vertices (optional)
            # vertices = [(vertex.x, vertex.y) for vertex in text.bounding_poly.vertices]
            # print(f"  Bounding Box: {vertices}")

        return texts  # Return the list of text annotations
    else:
        print("No text detected.")
        return None



def detect_labels(image_path):
    """Detects labels in an image using the Google Cloud Vision API."""
    client = vision_v1.ImageAnnotatorClient()

    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision_v1.Image(content=content)

    response = client.label_detection(image=image)
    labels = response.label_annotations

    if labels:
        print("\nDetected Labels:")
        for label in labels:
            print(f"{label.description} (Confidence: {label.score:.2f})")
        return labels # Return the list of label annotations
    else:
        print("No labels detected.")
        return None



def detect_objects(image_path):
    """Detects objects in an image using the Google Cloud Vision API."""
    client = vision_v1.ImageAnnotatorClient()

    with open(image_path, 'rb') as image_file:
        content = image_file.read()

    image = vision_v1.Image(content=content)

    response = client.object_localization(image=image)
    objects = response.localized_object_annotations

    if objects:
        print("\nDetected Objects:")
        for object_ in objects:
            print(f"{object_.name} (Confidence: {object_.score:.2f})")
            # Print bounding box vertices (optional)
            vertices = [(vertex.x, vertex.y) for vertex in object_.bounding_poly.vertices]
            print(f"  Bounding Box: {vertices}")
        return objects # Return the list of object annotations
    else:
        print("No objects detected.")
        return None





image_path = "desk.png"  # Replace with the path to your image

detected_texts = detect_text(image_path)
detected_labels = detect_labels(image_path)
detected_objects = detect_objects(image_path)



No text detected.

Detected Labels:
Blue (Confidence: 0.99)
Water (Confidence: 0.99)
Body of water (Confidence: 0.98)
Building (Confidence: 0.98)
City (Confidence: 0.96)
Waterway (Confidence: 0.96)
Architecture (Confidence: 0.94)
Watercourse (Confidence: 0.93)
Reflection (Confidence: 0.92)
Channel (Confidence: 0.91)

Detected Objects:
Building (Confidence: 0.51)
  Bounding Box: []


In [30]:
client = vision_v1.ImageAnnotatorClient()

In [29]:
with open(image_path, 'rb') as image_file:
        content = image_file.read()

image = vision_v1.Image(content=content)

In [33]:
response = client.landmark_detection(image=image)
landmarks = response.landmark_annotations
if landmarks:
    for landmark in landmarks:
        print(f"Landmark: {landmark.description}, Lat/Lng: {landmark.locations}")

Landmark: Chapel Bridge, Lat/Lng: [lat_lng {
  latitude: 47.051648899999989
  longitude: 8.3075350999999973
}
]
Landmark: Brasserie Bodu, Lat/Lng: [lat_lng {
  latitude: 47.0519132
  longitude: 8.3057106
}
]
Landmark: Mcdonald’s, Lat/Lng: [lat_lng {
  latitude: 47.054548999999994
  longitude: 8.3086636
}
]
Landmark: Luzerner Theater, Lat/Lng: [lat_lng {
  latitude: 47.0505906
  longitude: 8.3059906
}
]


In [40]:
from google.cloud import vision_v1
import os
import os
from google.cloud import vision_v1
from PIL import Image  # Pillow library for image manipulation
import fitz  # PyMuPDF (fitz) library for PDF handling
import io
# Set environment variable for Google Cloud credentials
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] ="keys/complete-tube-421007-208a4862c992.json"



def pdf_to_images(pdf_path):
    """Converts a PDF to a list of PIL Image objects."""
    doc = fitz.open(pdf_path)  # Open the PDF
    images = []
    for page_num in range(doc.page_count):
        page = doc[page_num]
        pix = page.get_pixmap(matrix=fitz.Matrix(3, 3)) # Increase resolution for better OCR
        img = Image.open(io.BytesIO(pix.tobytes())) # Convert to PIL Image
        images.append(img)
    return images

def detect_text_from_image(image):
    """Detects text in a PIL Image using the Vision API."""
    client = vision_v1.ImageAnnotatorClient()
    buffered = io.BytesIO()
    image.save(buffered, format="PNG") # Use PNG for better OCR quality
    content = buffered.getvalue()

    image = vision_v1.Image(content=content)
    response = client.text_detection(image=image)
    texts = response.text_annotations

    extracted_text = ""
    if texts:
        for text in texts:
            extracted_text += text.description + " "  # Add extracted text to the string
    return extracted_text

def ocr_pdf(pdf_path):
    """Performs OCR on a PDF file."""
    images = pdf_to_images(pdf_path)
    all_text = ""
    for image in images:
        page_text = detect_text_from_image(image)
        all_text += page_text + "\n\n"  # Add page break between pages
    return all_text



pdf_path = "9190014302.PDF" # Replace with your PDF file path

try:
    extracted_text = ocr_pdf(pdf_path)
    print(extracted_text)

    # Save to a text file:
    with open("extracted_text.txt", "w", encoding="utf-8") as f:
        f.write(extracted_text)

except Exception as e:
    print(f"An error occurred: {e}")







GESVALT Sociedad de Tasación S.A. Inscrita en el registro especial de sociedades de tasación del Banco de España con el nº 4455 el día 27 de mayo de 1994.
Gesvalt
Sociedad de
Tasación S.A.
Motivo:
Visado Digital
Fecha:
gesvaltⓇ
Consultoría y
Valoración
Exp Nº:
GV190004182V2
04/07/2019 16:53:49
Advisory &
Valuation
INFORME DE VALORACIÓN:
Finca rústica de labor secano con edificaciones. en PARAJE CERRO GORDO,
Polígono 17, Parcela 19.
06225 - Ribera del Fresno (Badajoz - Extremadura. España)
SOLICITANTE: BUILDINGCENTER, S.A.U.
FINALIDAD: Circular 4/2017 (Adjudicados/Activos Propios)
FECHA DEL INFORME: 24/06/2019
Ref Cliente: 9190014302 GESVALT Sociedad de Tasación S.A. Inscrita en el registro especial de sociedades de tasación del Banco de España con el nº 4455 el día 27 de mayo de 1994 . Gesvalt Sociedad de Tasación S.A. Motivo : Visado Digital Fecha : gesvaltⓇ Consultoría y Valoración Exp Nº : GV190004182V2 04/07/2019 16:53:49 Advisory & Valuation INFORME DE VALORACIÓN : Finca rústica d