In [None]:
import pandas as pd
import time
import warnings
import os
import io

from google.cloud import vision

warnings.filterwarnings('ignore')

In [None]:
client      = vision.ImageAnnotatorClient.from_service_account_file('gcp_key.json')

# image_path  = r'video_frames\7481308222805019947\frame_3.jpg'

# image_paths =  [r'video_frames\7481308222805019947\frame_1.jpg',
#                 r'video_frames\7481308222805019947\frame_2.jpg',
#                 r'video_frames\7481308222805019947\frame_3.jpg',
#                 r'video_frames\7481308222805019947\frame_4.jpg',
#                 r'video_frames\7481308222805019947\frame_5.jpg']




In [None]:
def imagen_analisis (image_path):

    start_time = time.perf_counter()

    with open(image_path,"rb") as image_file:
        content = image_file.read()
    
    image = vision.Image(content=content)


#01 Texto
    response  = client.text_detection(image=image)
    textos    = response.text_annotations

    try:
        text_block = [text.description for text in textos][0].replace("\n"," ")
    except:
        text_block = " "
    
    df_texto = pd.DataFrame({"Texto": [text_block],"Image": [image_path],"Categoria": ["Texto"]})


#02 Objects
    objects = client.object_localization(image=image).localized_object_annotations
    filtered_objects = {obj.name: obj.score for obj in objects if obj.score > 0.65}        
    df_objeto = pd.DataFrame(filtered_objects.items(), columns=['Name', 'Score']).assign(Image=image_path,Categoria = 'Objetos')


#03 Labels
    response = client.label_detection(image=image)
    labels   = response.label_annotations
    filtered_labels = {label.description: label.score for label in labels if label.score > 0.75}
    df_labels = pd.DataFrame(filtered_labels.items(), columns=['Name', 'Score']).assign(Image=image_path,Categoria = 'Etiquetas')


#04 Faces
    response = client.face_detection(image=image)
    faces    = response.face_annotations

    likelihood_name = ("UNKNOWN","VERY_UNLIKELY","UNLIKELY","POSSIBLE","LIKELY","VERY_LIKELY",)

    faces_data = []

    for i, face in enumerate(faces):
        face_info = {
            "Rostro": f"Rostro {i+1}",
            "Detection Confidence": face.detection_confidence,
            "Anger Likelihood": likelihood_name[getattr(face, "anger_likelihood", 0)], 
            "Joy Likelihood": likelihood_name[getattr(face, "joy_likelihood", 0)]
        }
        faces_data.append(face_info)

    df_faces = pd.DataFrame(faces_data)
    df_faces["Image"] = image_path
    df_faces["Categoria"] = "Faces"

##########################
    df_description = pd.concat([df_labels,df_texto,df_objeto])

    end_time = time.perf_counter()
    execution_time = end_time - start_time
    print(f"Tiempo de ejecución: {execution_time:.4f} segundos")

    return df_faces,df_description

In [None]:
# EJECUCION DE LA FUNCION PRINCIPAL / LOOP EN LOS FRAMES DENTRO DE CADA UNA DE SUS CARPETAS

import os
import glob

# Carpeta principal
base_folder = "video_frames"

# Iterar sobre cada subcarpeta dentro de 'video_frames'
start_time = time.perf_counter()


for subfolder in os.listdir(base_folder):
    subfolder_path = os.path.join(base_folder, subfolder)
    
    # Verificar si es un directorio
    if os.path.isdir(subfolder_path):
        # Buscar archivos .jpg dentro de la subcarpeta
        image_files = sorted(glob.glob(os.path.join(subfolder_path, "frame_*.jpg")))


        ruta_carpeta = r'video_results'

        # Procesar las imágenes
        faces_images = pd.DataFrame() 
        descr_images = pd.DataFrame()

        for image_path in image_files:
            nombre_base = os.path.basename(os.path.dirname(image_path))
            img_faces,img_descrip =  imagen_analisis(image_path=image_path)        

            faces_images = pd.concat([faces_images,img_faces])
            descr_images = pd.concat([descr_images,img_descrip])

        ###
        faces_images   = faces_images.assign(Video= nombre_base)
        try:
            faces_images   = faces_images[faces_images['Detection Confidence'] > 0.95] #Seleccion de probabilidad de mas del 95%
        except:
            pass

        ###
        descr_images   = descr_images.assign(Video = nombre_base)   
        try:
            descr_images.fillna({'Score':1},inplace=True)
            descr_images = descr_images[descr_images['Score'] > 0.85]
            descr_images = descr_images.drop_duplicates(subset=['Name','Texto']).sort_values(by=['Categoria','Score'],ascending=False)
        except:
            pass


        carpeta_archivo = os.path.join(ruta_carpeta,nombre_base) 
        os.makedirs(carpeta_archivo, exist_ok=True)

        
        # ruta_faces = os.path.join(carpeta_archivo, f"faces_{nombre_base}.xlsx")
        # ruta_descr = os.path.join(carpeta_archivo, f"descriptions_{nombre_base}.xlsx")
        
        # faces_images.to_excel(ruta_faces, index=False)
        # descr_images.to_excel(ruta_descr, index=False)

        ruta_resultados = os.path.join(carpeta_archivo, f"faces_{nombre_base}.xlsx") 
        
        with pd.ExcelWriter(ruta_resultados, engine="openpyxl") as writer:
            faces_images.to_excel(writer, sheet_name="Faces", index=False)
            descr_images.to_excel(writer, sheet_name="Descriptions", index=False)
                    
end_time = time.perf_counter()
execution_time = end_time - start_time    
print(f"-Tiempo de ejecución de todas las imagenes: {execution_time:.4f} segundos\n")



In [None]:
# Directorio raíz donde están las subcarpetas con archivos Excel
directorio_base = r'video_results'


# Buscar todos los archivos .xlsx en subcarpetas
archivos_excel = glob.glob(os.path.join(directorio_base, "**", "faces_*.xlsx"), recursive=True)

# Diccionario para almacenar los DataFrames agrupados por nombre de hoja
dataframes_por_hoja = {}

# Leer cada archivo Excel
for archivo in archivos_excel:
    xls = pd.ExcelFile(archivo, engine="openpyxl")  # Abrir archivo
    for hoja in xls.sheet_names:  # Recorrer cada hoja
        df = pd.read_excel(xls, sheet_name=hoja)  # Leer hoja
        df["Archivo"] = os.path.basename(archivo)  # Agregar columna de origen

        # Agregar el DataFrame a la lista correspondiente a su hoja
        if hoja not in dataframes_por_hoja:
            dataframes_por_hoja[hoja] = []
        dataframes_por_hoja[hoja].append(df)

# Crear un archivo consolidado con dos hojas
ruta_consolidado = os.path.join(directorio_base, "analisis_video_tiktok.xlsx")

with pd.ExcelWriter(ruta_consolidado, engine="openpyxl") as writer:
    for hoja, lista_df in dataframes_por_hoja.items():
        df_concatenado = pd.concat(lista_df, ignore_index=True).drop(columns='Video')
        df_concatenado.to_excel(writer, sheet_name=hoja, index=False)

print(f"Archivo consolidado guardado en: {ruta_consolidado}")

Archivo consolidado guardado en: video_results\consolidado.xlsx
