<a href="https://colab.research.google.com/github/jottaVLF/BLIP-integrado-com-Google-Drive/blob/main/IA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib
!pip install transformers torch pillow

Collecting google-api-python-client
  Downloading google_api_python_client-2.172.0-py3-none-any.whl.metadata (7.0 kB)
Downloading google_api_python_client-2.172.0-py3-none-any.whl (13.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.6/13.6 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-api-python-client
  Attempting uninstall: google-api-python-client
    Found existing installation: google-api-python-client 2.171.0
    Uninstalling google-api-python-client-2.171.0:
      Successfully uninstalled google-api-python-client-2.171.0
Successfully installed google-api-python-client-2.172.0
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-

In [2]:
from google.colab import auth
auth.authenticate_user()

from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
drive_service = build('drive', 'v3')

In [None]:
# VERSÃO_FULL01
import io
import os
import json
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Entradas do usuário
FOLDER_ID = input("Cole o ID da pasta do Google Drive (ex: 1ABcD...): ").strip()
PALAVRA_CHAVE = input("Digite a palavra-chave para buscar na legenda (em português): ").strip().lower()

# Carrega ou inicializa o banco de legendas
banco_path = f"banco_legendas_{FOLDER_ID}.json"
if os.path.exists(banco_path):
    with open(banco_path, "r", encoding="utf-8") as f:
        banco_legendas = json.load(f)
else:
    banco_legendas = {}

# Carregar modelos BLIP e NLLB
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
modelo_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

modelo_nllb = "facebook/nllb-200-distilled-600M"
tokenizer = AutoTokenizer.from_pretrained(modelo_nllb)
tradutor = AutoModelForSeq2SeqLM.from_pretrained(modelo_nllb)

tokenizer.src_lang = "eng_Latn"
tokenizer.tgt_lang = "por_Latn"

# Traduz legenda do inglês para português
def traduzir_legenda(legenda_en):
    inputs = tokenizer(legenda_en, return_tensors="pt", padding=True, truncation=True)
    bos_token_id = tokenizer.convert_tokens_to_ids("por_Latn")
    output = tradutor.generate(
        **inputs,
        max_length=100,
        forced_bos_token_id=bos_token_id
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Lista imagens (arquivos) dentro da pasta do Drive
def listar_imagens_na_pasta(folder_id):
    query = f"'{folder_id}' in parents and mimeType contains 'image/' and trashed = false"
    results = drive_service.files().list(q=query, pageSize=1000, fields="files(id, name)").execute()
    return results.get('files', [])

# Gera link padrão do Google Drive (não altera permissões)
def gerar_link_padrao(file_id):
    return f"https://drive.google.com/file/d/{file_id}/view"

# Processa novas imagens e atualiza o banco de legendas
def processar_novas_imagens(files):
    for file in files:
        file_id = file['id']
        nome = file['name']
        if file_id not in banco_legendas:
            try:
                request = drive_service.files().get_media(fileId=file_id)
                buffer = io.BytesIO()
                downloader = MediaIoBaseDownload(buffer, request)
                done = False
                while not done:
                    _, done = downloader.next_chunk()
                buffer.seek(0)

                imagem = Image.open(buffer).convert("RGB")
                inputs = processor(images=imagem, return_tensors="pt")
                with torch.no_grad():
                    saida = modelo_blip.generate(**inputs)
                legenda_en = processor.decode(saida[0], skip_special_tokens=True).strip()
                legenda_pt = traduzir_legenda(legenda_en).strip().lower()
                link = gerar_link_padrao(file_id)

                banco_legendas[file_id] = {
                    "nome": nome,
                    "legenda_pt": legenda_pt,
                    "link": link
                }

                print(f"Imagem processada: {nome}")

            except Exception as e:
                print(f"Erro ao processar {nome}: {e}")

    # Salva o banco atualizado
    with open(banco_path, "w", encoding="utf-8") as f:
        json.dump(banco_legendas, f, ensure_ascii=False, indent=2)

# Busca no banco de legendas a palavra-chave
def buscar_palavra_chave(palavra):
    resultados = []
    for dado in banco_legendas.values():
        if palavra in dado['legenda_pt']:
            resultados.append((dado['nome'], dado['legenda_pt'], dado['link']))
    return resultados

# Execução principal
arquivos = listar_imagens_na_pasta(FOLDER_ID)
processar_novas_imagens(arquivos)
resultados = buscar_palavra_chave(PALAVRA_CHAVE)

if resultados == []:
    print("Nenhum resultado encontrado.")
else:
  print(f"\nResultados encontrados com a palavra '{PALAVRA_CHAVE}':")
  for nome, legenda, link in resultados:
      print(f"\nNome: {nome}\nLegenda: {legenda}\nLink: {link}")