# Video Intelligence API

In this notebook, we'll use the Video Intelligence API.

## Setup

In [None]:
!pip3 install ipython google-cloud-videointelligence
!pip3 install pandas 
!pip3 install Pillow
!pip3 install opencv-python
!pip3 install google-cloud-vision

You might have to restart your runtime to load these packages.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

In [None]:
PROJECT_ID = "your-project-id"

### Enable video intelligence API (Execute one time)

In [None]:
! gcloud services enable videointelligence.googleapis.com --project {PROJECT_ID}

In [None]:
import sys
import os
import json
import math
import pandas as pd
from PIL import Image, ImageDraw
from google.cloud import videointelligence_v1 as vi

### Authentication (Colab only)

If you are running this notebook on Colab, you will need to run the following cell authentication. This step is not required if you are using Vertex AI Workbench as it is pre-authenticated.

In [None]:
import sys

# if it's Colab runtime, authenticate the user with Google Cloud
if 'google.colab' in sys.modules:
    from google.colab import auth
    auth.authenticate_user()

In [None]:
BUCKET = "bucket-name"
video_file = "video1.mp4"

video_gcs_uri = f"gs://{BUCKET}/{video_file}"     
video_path = f"./video/{video_file}"                     

## Logo Detection

In [None]:
from datetime import timedelta
from typing import Optional, Sequence, cast

from google.cloud import videointelligence_v1 as vi


def detect_logos(
    video_uri: str, segments: Optional[Sequence[vi.VideoSegment]] = None
) -> vi.VideoAnnotationResults:
    video_client = vi.VideoIntelligenceServiceClient()
    features = [vi.Feature.LOGO_RECOGNITION]
    context = vi.VideoContext(segments=segments)
    request = vi.AnnotateVideoRequest(
        input_uri=video_uri,
        features=features,
        video_context=context,
    )

    print(f'Processing video "{video_uri}"...')
    operation = video_client.annotate_video(request)

    # Wait for operation to complete
    response = cast(vi.AnnotateVideoResponse, operation.result())
    # A single video is processed
    results = response.annotation_results[0]

    return results
    

In [None]:
def print_detected_logos(results: vi.VideoAnnotationResults):
    annotations = results.logo_recognition_annotations
    #print(annotations)

    print(f" Detected logos: {len(annotations)} ".center(80, "-"))
    for annotation in annotations:
        entity = annotation.entity
        entity_id = entity.entity_id
        description = entity.description
        for track in annotation.tracks:
            confidence = track.confidence
            t1 = track.segment.start_time_offset.total_seconds()
            t2 = track.segment.end_time_offset.total_seconds()
            logo_frames = len(track.timestamped_objects)
            print(
                f"{confidence:4.0%}",
                f"{t1:>7.3f}",
                f"{t2:>7.3f}",
                f"{logo_frames:>3} fr.",
                f"{entity_id:<15}",
                f"{description}",
                sep=" | ",
            )

In [None]:
from google.cloud import vision
def detect_logo_text(img_path):
    client = vision.ImageAnnotatorClient()
    
    with open(img_path, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)
    
    
    #im = Image.open("./1_frame.jpg")    

    response = client.text_detection(image=image)
    #print(response.full_text_annotation.text)
    
    return response.full_text_annotation.text

In [None]:
import cv2

def extract_logo_frames(index:int, video_name, start_time:float, end_time:float, box):
    print(f" extract frame: {index} ".center(80, "-"))

    cap = cv2.VideoCapture(f"./video/{video_name}")
    #print(f" start time ~ end time :{start_time} ~ {end_time}" )

    # Check if the video was opened successfully
    if not cap.isOpened():
        print("Error opening video file")
        exit()

    # Get the frame rate of the video
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate the total number of frames to extract
    total_frames = int((end_time - start_time) * fps)
    #print(f"total+frames  :{total_frames}")

    # Set the starting frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_time * fps)

    # Initialize a frame counter
    frame_count = 0
    image_name = ""

    ret, frame = cap.read()
    directory_path = f"./logos/{video_name}"
    if ret:
        if not os.path.exists(directory_path):
            os.makedirs(directory_path)
        image_name = f"{directory_path}/{index}_frame.jpg"
        cv2.imwrite(image_name, frame)
        #print(f"image name : {image_name}")
                      
    width = 1280
    height = 720
    if image_name : 
        im = Image.open(image_name)  
        left = box.left * width
        bottom = box.bottom * height
        right = box.right * width
        top = box.top * height
        #print(f" {left} {bottom} {right} {top}" )
        #        im2 = im.crop([vects[0].x, vects[0].y , vects[2].x , vects[2].y ])
        im_crop = im.crop([left, top, right, bottom ])
        width, height = im_crop.size
        
        im_crop.save(f"{directory_path}/logo_{index}.jpg", "JPEG")    
        detected_text=detect_logo_text(f"{directory_path}/logo_{index}.jpg")
        print(
            f"Logo Size : {im_crop.size}",
            f"Logo w:h Ratio : {width/height:.1f} : 1",
            f"Logo Location (top,left) : ({top:.1f}, {left:.1f})",
            f"Text : {detected_text}",
            sep=" | ",
        )

        display(im_crop)
        
        
    # Release the VideoCapture object
    cap.release()
    
    #extract_logo_by_vision(image_name)
    
    return image_name

In [None]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

def print_image_frames(images):
    #print(images)

    fig = plt.figure(figsize=(20, 12))  # Adjust figure size as needed
    row =len(images) // 4 + 1

    gs = gridspec.GridSpec(row, 4, figure=fig)  # 2 rows, 3 columns

    # Iterate through the images and plot each in a subplot
    for i, image_path in enumerate(images):
        ax = fig.add_subplot(gs[i])  # Place image in the grid
        ax.imshow(plt.imread(image_path))
        ax.axis('off')  # Turn off axes for cleaner display

    # Customize spacing and layout
    plt.tight_layout()  # Adjust spacing between subplots

    # Display the plot
    plt.show()

In [None]:
def print_logo_frames(results: vi.VideoAnnotationResults, entity_id: str):
    def keep_annotation(annotation: vi.LogoRecognitionAnnotation) -> bool:
        return annotation.entity.entity_id == entity_id

    annotations = results.logo_recognition_annotations
    annotations = [a for a in annotations if keep_annotation(a)]
    for annotation in annotations:
        description = annotation.entity.description
        for track in annotation.tracks:
            confidence = track.confidence
            print(
                f" {description},"
                f" confidence: {confidence:.0%},"
                f" frames: {len(track.timestamped_objects)} ".center(80, "-")
            )
            for timestamped_object in track.timestamped_objects:
                t = timestamped_object.time_offset.total_seconds()
                box = timestamped_object.normalized_bounding_box
                print(box)
                print(
                    f"{t:>7.3f}",
                    f"({box.left:.5f}, {box.top:.5f})",
                    f"({box.right:.5f}, {box.bottom:.5f})",
                    sep=" | ",
                )

In [None]:
def print_detected_logos_extract_frames(results: vi.VideoAnnotationResults, video_name):
    annotations = results.logo_recognition_annotations
    #print(annotations)
    
    index=0;
    images = [] 
    logos = []

    print(f" Detected logos: {len(annotations)} ".center(80, "-"))
    for annotation in annotations:
        entity = annotation.entity
        entity_id = entity.entity_id
        description = entity.description
        for track in annotation.tracks:
            confidence = track.confidence
            t1 = track.segment.start_time_offset.total_seconds()
            t2 = track.segment.end_time_offset.total_seconds()
            objects = track.timestamped_objects
            logo_frames = len(objects)

            if entity_id == "/m/03068d" : # "LG Electronics"
               
                if logo_frames >= 2 :
                    image = extract_logo_frames(index, video_name, t1, t1+ objects[1].time_offset.total_seconds(), objects[1].normalized_bounding_box)
                    if image:
                        images.append(image)

                else:
                    image = extract_logo_frames(index, t1, t2, objects[0].normalized_bounding_box)
                    if image:
                        images.append(image)
             
                #images.append(extract_logo_frames(index, t1,t2))
                index += 1
            
            print(
                f"{confidence:4.0%}",
                f"{t1:>7.3f}",
                f"{t2:>7.3f}",
                f"{t2-t1:>7.3f} secs",
                f"{logo_frames:>3} fr.",
                f"{entity_id:<15}",
                f"{description}",
                sep=" | ",
            )
    #print(images)        
    print_image_frames(images)


In [None]:
video_uri = video_gcs_uri
'''
segment = vi.VideoSegment(
    start_time_offset=timedelta(seconds=146),
    end_time_offset=timedelta(seconds=156),
)
results = detect_logos(video_uri, [segment])
'''
results = detect_logos(video_uri )

In [None]:
#print_detected_logos(results)
#print_logo_frames(results)
video_name = video_file
print(video_file)
print_detected_logos_extract_frames(results, video_name)

# Detect Shot Change
Shot Change와 Object Tracking을 통해서 움직임이 없는 동영상 검수
- Shot Change : 동영상이 장면이 바뀌는 듯한 변화가 감지되는것 
- shot이 1개 인 동영상에 대해서만 Object Tracking을 추가로 검수
- Tracking된 Object가 shot의 start에서 부터 end까지 보여지는 지 확인
- start에서 end까지 보여진다면 위치 변화가 없는지 확인 : 어느 정도의 px까지의 움직임을 확인할 것인지 정할 필요 있음, 움직임이 없다고 보내준 동영상이 실제로는 object의 px위치에서 6px 정도의 움직임이 있었(아래 예시는 10px 이하로 허용하는 것으로 구현)

In [None]:
from datetime import timedelta
from typing import Optional, Sequence, cast

from google.cloud import videointelligence_v1 as vi


def detect_shot_change(
    video_uri: str, segments: Optional[Sequence[vi.VideoSegment]] = None
) -> vi.VideoAnnotationResults:
    video_client = vi.VideoIntelligenceServiceClient()
    features = [vi.Feature.SHOT_CHANGE_DETECTION, vi.Feature.OBJECT_TRACKING,] 
    context = vi.VideoContext(segments=segments)
    request = vi.AnnotateVideoRequest(
        input_uri=video_uri,
        features=features,
        video_context=context,
    )

    print(f'Processing video "{video_uri}"...')
    operation = video_client.annotate_video(request)

    # Wait for operation to complete
    response = cast(vi.AnnotateVideoResponse, operation.result())
    # A single video is processed
    results = response.annotation_results[0]
    #print(response)
    #print("-----")

    return results

In [None]:
import cv2

def extract_shot_frames(index:int, video_name, start_time:float, end_time:float):
    print(f" extract frame: {index} ".center(80, "-"))

    cap = cv2.VideoCapture(f"./video/{video_name}")
    #print(f" start time ~ end time :{start_time} ~ {end_time}" )

    # Check if the video was opened successfully
    if not cap.isOpened():
        print("Error opening video file")
        exit()

    # Get the frame rate of the video
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate the total number of frames to extract
    total_frames = int((end_time - start_time) * fps)
    #print(f"total+frames  :{total_frames}")
    middle_frame = total_frames // 2

    # Set the starting frame position
    cap.set(cv2.CAP_PROP_POS_FRAMES, start_time * fps + middle_frame)

    # Initialize a frame counter
    frame_count = 0
    image_name = ""

    ret, frame = cap.read()
    if ret:
        directory_path = f"./shots/{video_name}"
        if not os.path.exists(directory_path):
            os.makedirs(directory_path)
        image_name = f"{directory_path}/{index}_frame.jpg"
        cv2.imwrite(image_name, frame)
        print(f"image name : {image_name}")
                             
    # Release the VideoCapture object
    cap.release()
    
    #extract_logo_by_vision(image_name)
    
    return image_name

In [None]:
def print_detected_shot_change(results: vi.VideoAnnotationResults):
    #print(results)
    shot_annotations = results.shot_annotations
    #print(annotations)
    
    motion_detected = True 
    
    width = 1280
    height = 720
    
    shot_len = len(shot_annotations)
    print(f" Shot Changess: {shot_len} ".center(80, "-"))
    
    
    if shot_len == 1 : 
        t1 = shot_annotations[0].start_time_offset.total_seconds()
        t2 = shot_annotations[0].end_time_offset.total_seconds()
        shot_secs = t2-t1

        print(f"{t1:>7.3f} | {t2:>7.3f} | {shot_secs:>7.3f} secs")
        
        a_index = 0
        annotations = results.object_annotations
        for annotation in annotations:

            object_sec = annotation.frames[-1].time_offset.total_seconds()
            
            #print(f" a index {a_index}")
            if abs(shot_secs - object_sec) < 0.1 :
                print(f" object is shown throuhput the shot : Diff : {abs(shot_secs - object_sec):.3f} secs".center(80, "-"))
                motion_detected = False
                b_left = int(annotation.frames[0].normalized_bounding_box.left * width)
                b_right = int(annotation.frames[0].normalized_bounding_box.right * width)
                b_top = int(annotation.frames[0].normalized_bounding_box.top * height)
                b_bottom = int(annotation.frames[0].normalized_bounding_box.bottom * height)

                f_index = 0
                for frames in annotation.frames: 
                    box = frames.normalized_bounding_box
                    f_left = int(box.left * width)
                    f_right = int(box.right * width)
                    f_top = int(box.top * height)
                    f_bottom = int(box.bottom * height)
                    
                    if (abs(f_left - b_left) > 10 or abs(f_right - b_right) > 10 or abs(f_top - b_top ) > 10 or abs(f_bottom - b_bottom) > 10):
                    #if (abs(f_left - b_left) > 5 or abs(f_right - b_right) > 5 or abs(f_top - b_top ) > 5 or abs(f_bottom - b_bottom) > 5):            
                        motion_detected = True
                        print(f" > frame [{f_index}] : Object is moved")
                        print("    base box", f"({b_left}, {b_top})", f"({b_right}, {b_bottom})",sep=" | ",)
                        print("    frame box", f"({f_left}, {f_top})",f"({f_right}, {f_bottom})", sep=" | ",)
                        break
                    else :
                        motion_detected = False
                    f_index +=1 
                
            if motion_detected :
                break
            a_index +=1 
                    
    if not motion_detected:
        print("This video has just one shot and no motion")
    else:
        print("This video has more than one shots")

In [None]:
def print_detected_shot_change_extract_frame(results: vi.VideoAnnotationResults, video_name):
    #print(results)
    annotations = results.shot_annotations
    #print(annotations)
    
    index=0;
    images = [] 

    print(f" Detected shots: {len(annotations)} ".center(80, "-"))
    for annotation in annotations:
        t1 = annotation.start_time_offset.total_seconds()
        t2 = annotation.end_time_offset.total_seconds()

        image = extract_shot_frames(index, video_name, t1, t2)
        if image:
            images.append(image)
            index += 1

            print_image_frames(images)

In [None]:
video_uri = video_gcs_uri
video_name = video_file
results = detect_shot_change(video_uri )

# Shot 변경 여부 감지 및 결과 출력

In [None]:
print_detected_shot_change(results)

# shot 변경 감지 및 shot의 Frame 이미지 extract

In [None]:
print_detected_shot_change_extract_frame(results, video_name)

# 이미지 품질 검사

In [None]:
!pip install --upgrade scikit-image 

In [None]:
import cv2
import numpy as np
from skimage.metrics import mean_squared_error
#from skimage.feature import brisque

def is_low_quality(image_path, blur_threshold=100, noise_threshold=0.005, contrast_threshold=0.9):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    low_quality = False

    # Blur detection
    laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
    print(f"laplacian_var : {laplacian_var}")
    if laplacian_var < blur_threshold:
        print("low blur")
        quality = True

    # Noise detection
    mse = mean_squared_error(gray, np.full_like(gray, 128))  # Compare to flat gray
    print(f"mse : {mse}")
    if mse > noise_threshold:
        print("noise")
        quality = True
   
    # Contrast detection
    hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    cdf = hist.cumsum() / hist.sum()
    contrast_ratio = (cdf[255] - cdf[0]) / cdf[255]
    print(f"contrast_ratio : {contrast_ratio}")
    if contrast_ratio < contrast_threshold:
        print("contrast")
        quality = True
   
    # Optional: BRISQUE score for a more sophisticated quality estimate
    """
    brisque_score = brisque.score(gray)
    if brisque_score > 50:  # Experimentally tune this threshold
         return True
    """
    return quality

In [None]:
# Example usage
image_path = './logos/video1.mp4/0_frame.jpg'
if is_low_quality(image_path):
    print("Low quality image detected")
else:
    print("Image quality acceptable")

In [None]:
# Example usage
image_path = './logos/video1.mp4/0_frame.jpg'
if is_low_quality(image_path):
    print("Low quality image detected")
else:
    print("Image quality acceptable")

In [None]:
import cv2
import numpy as np

def edge_quality_check(image_path, threshold=50):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    edges = cv2.Canny(gray, 50, 300) 
    percent_strong_edges = np.sum(edges > 128) / edges.size
    print(f"{edges} { edges.size} {percent_strong_edges}")

    if percent_strong_edges < threshold:
        return True  # Indicates low quality
    else:
        return False

In [None]:
image_path = './logos/video1.mp4/0_frame.jpg'
edge_quality_check(image_path)

# Image 속성, 조작 관련 함수들

### RGB 값 추출

In [None]:
from PIL import Image

def extract_rgb_pillow(image_path):
    img = Image.open(image_path)

    # Convert to RGB mode if needed
    if img.mode != 'RGB':
        img = img.convert('RGB')

    width, height = img.size
    rgb_pixels = []

    for y in range(height):
        for x in range(width):
            r, g, b = img.getpixel((x, y))
            rgb_pixels.append((r, g, b))

    return rgb_pixels

# Example usage
image_path = './logos/video1.mp4/0_frame.jpg'
rgb_data = extract_rgb_pillow(image_path)
print(rgb_data)

### 그림 위에 격자 그리기

In [None]:
import cv2
from PIL import Image, ImageDraw

def draw_grid(image_name, grid_shape, color='red', thickness=1):
    img = Image.open(image_name)  
    rows, cols = grid_shape
    width, height = img.size

    draw = ImageDraw.Draw(img)

    # Draw vertical lines
    for i in range(1, cols): 
        x = width * i / cols
        draw.line((x, 0, x, height), fill=color, width=thickness)

    # Draw horizontal lines
    for i in range(1, rows):
        y = height * i / rows
        draw.line((0, y, width, y), fill=color, width=thickness)

    return img

# Example usage
image_path = './logos/video1.mp4/logo_0.jpg'
grid_img = draw_grid(image_path, (10, 20))  # Draw red grid lines
grid_img.save("./logo_grid.jpg")

display(grid_img)

### RGB 값 가져오기

In [None]:
from PIL import Image
import numpy as np

def get_grid_average_rgb(image_path, grid_shape, grid_position):
    img = Image.open(image_path)

    rows, cols = grid_shape
    row_index, col_index = grid_position

    # Calculate cell size
    cell_width, cell_height = img.size[0] // cols, img.size[1] // rows

    # Select the grid cell
    start_x = col_index * cell_width
    start_y = row_index * cell_height
    end_x = start_x + cell_width
    end_y = start_y + cell_height
    grid_cell_img = img.crop((start_x, start_y, end_x, end_y))

    # Convert to a NumPy array for efficient calculations
    grid_cell_array = np.array(grid_cell_img)

    # Calculate average RGB
    average_rgb = np.mean(grid_cell_array, axis=(0, 1)).astype(int)

    #return average_rgb[0],  average_rgb[1], average_rgb[2]
    return average_rgb

def get_grid_rgb(image_path, grid_shape, grid_position, select_corner="top_left"):
    img = Image.open(image_path)

    rows, cols = grid_shape
    row_index, col_index = grid_position

    cell_width, cell_height = img.size[0] // cols, img.size[1] // rows
    start_x = col_index * cell_width
    start_y = row_index * cell_height

    if select_corner == "top_left":
        first_pixel = img.getpixel((start_x, start_y))
    elif select_corner == "center":
        center_x = start_x + cell_width // 2
        center_y = start_y + cell_height // 2
        first_pixel = img.getpixel((center_x, center_y))
    else:
        raise ValueError("Invalid select_corner value. Use 'top_left' or 'center'")

    return first_pixel

In [None]:
image_path = './logos/video1.mp4/logo_0.jpg'
grid_shape = (10, 20)
grid_position = (4, 6)

rgb_color = get_grid_rgb(image_path, grid_shape, grid_position, select_corner="center")
print(f"RGB color of the center pixel in grid cell (4, 6): {rgb_color}")

### RGB 색상 확인 용 

In [None]:
from PIL import Image, ImageDraw
width = 20 
height = 20
color = (165, 0, 52)  # RGB color tuple

# Create a new RGB image 
img = Image.new('RGB', (width, height), color)

# Show the image
display(img)
display(img)

In [None]:
import numpy as np
from PIL import Image, ImageDraw, ImageFont

rows = 20
cols = 40
data = np.zeros((rows, cols), dtype='object')

for i in range(rows):
    for j in range(cols):
        grid_position = (i, j)  
        data[i,j] = get_grid_rgb(image_path, (rows,cols), grid_position, select_corner="center")
        

# Parameters 
image_size = (40, 30) 

# Create canvas for the whole arrangement
canvas_width = cols * image_size[0]
canvas_height = rows * image_size[1]
canvas = Image.new('RGB', (canvas_width, canvas_height), 'white')  # White background

# Font for position text
font = ImageFont.load_default()  

# 2. Display images and positions
for row in range(rows):
    for col in range(cols):
        text= ""
        x = col * image_size[0]
        y = row * image_size[1]       

        color = tuple(data[row, col])

        img = Image.new('RGB', image_size, color)

        canvas.paste(img, (x, y))  # Paste image 

        # Draw position text
        draw = ImageDraw.Draw(canvas)
        text_position = (x + 2, y + 2)  # Small offset
        if (color[0] == 165 and color[1] == 0 and color[2] == 52) :
            print("color match")
            text = "LR" 
        draw.text(text_position, f"{text}", font=font, fill='black') 

canvas.save("logo_1.jpg")
display(canvas)

In [None]:
from PIL import Image

def get_matching_pixels(image_path, target_rgb):
    img = Image.open(image_path)
    width, height = img.size
    pixels = img.load()  # Get pixel access object

    matching_pixels = []

    for y in range(height):
        for x in range(width):
            current_rgb = pixels[x, y]
            if current_rgb == target_rgb:
                matching_pixels.append((x, y))

    return matching_pixels

# Example usage
image_path = './logos/video1.mp4/logo_0.jpg'
target_rgb = (165, 0, 52)  # Replace with your desired RGB value

matching_pixels = get_matching_pixels(image_path, target_rgb)

# Print pixel information
for pixel in matching_pixels:
    x, y = pixel
    print(f"Pixel at ({x}, {y}) has the RGB value {target_rgb}")

# Audio 추출

In [None]:
!pip install librosa

In [None]:
import librosa

def is_audio_silent_librosa(video_file, threshold=0.02):
    """Detects silence using librosa for more fine-grained analysis."""
    try:
        y, sr = librosa.load(video_file)  # Load audio
        rms = librosa.feature.rms(y=y)   # Calculate Root-Mean-Square (RMS) energy

        if (rms < threshold).all():
            #print("The video file has a SILENT audio stream.")
            return True  # Audio is mostly silent
        else:
            #print("The audio stream contains SOUND.")
            return False
    except Exception as e:
        print(f"Error processing file '{video_file}': {e}")
        return False

In [None]:
is_audio_silent_librosa(video_path)

# Vision API로 Image에서 Logo Detect & 이미지 자르기

In [None]:
client = vision.ImageAnnotatorClient()
    
image_file = "./logos/video1.mp4/0_frame.jpg"
image = vision.Image(content=content)
im = Image.open(image_file)    

response = client.logo_detection(image=image)
logos = response.logo_annotations
print("Logos:")

for logo in logos:
    print(logo)
    vects = logo.bounding_poly.vertices
        
    im2 = im.crop([vects[0].x, vects[0].y , vects[2].x , vects[2].y ])
    im2.save("vision-crop2.jpg", "JPEG")
    
    im3 = Image.open(image_file)    
    draw = ImageDraw.Draw(im3)
    draw.polygon(
        [
            vects[0].x,
            vects[0].y,
            vects[1].x,
            vects[1].y,
            vects[2].x,
            vects[2].y,
            vects[3].x,
            vects[3].y,
        ],
        None,
        "red",
    )
    im3.save("output-hint.jpg", "JPEG")

if response.error.message:
    raise Exception(
        "{}\nFor more info on error messages, check: "
        "https://cloud.google.com/apis/design/errors".format(response.error.message)
    )


# Vision API로 Logo Detect하는 예시

In [None]:
def extract_logo_by_vision(image_name):
    client = vision.ImageAnnotatorClient()

    with open(image_name, "rb") as image_file:
        content = image_file.read()

    image = vision.Image(content=content)

    response = client.logo_detection(image=image)
    logos = response.logo_annotations
   
    logo_names = []
    logo_count = 0

    if not os.path.exists(image_name):
        os.makedirs(image_name) 
    
    for logo in logos:
        print(logo.description)
        vects = logo.bounding_poly.vertices
        
        im2 = im.crop([vects[0].x, vects[0].y , vects[2].x , vects[2].y ])
        logo_name = f"logo_{logo_count}"
        im2.save(logo_name, "JPEG")
        logo_names.append(logo_name)
        logo_count += 1

    if response.error.message:
        raise Exception(
            "{}\nFor more info on error messages, check: "
            "https://cloud.google.com/apis/design/errors".format(response.error.message)
        )

    print_image_frames(logo_names)

In [None]:
image_file = "./logos/video1.mp4/0_frame.jpg"
extract_logo_by_vision(image_file)

# 전체 프레임 수 및 해상도 확인

In [None]:
import cv2

def get_video_info(video_path):
    # Create a VideoCapture object to read the video
    cap = cv2.VideoCapture(video_path)

    # Check if the video was opened successfully
    if not cap.isOpened():
        print("Error opening video file")
        exit()

    # Get the total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Get the frame rate
    frame_rate = cap.get(cv2.CAP_PROP_FPS)

    # Get the width and height of the video
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    # Print the extracted information
    print("* Total frames:", total_frames)
    print("* Frame rate:", frame_rate)
    print("* Resolution:", width, "x", height)

    # Release the VideoCapture object
    cap.release()

In [None]:
im3 = Image.open("./logos/video1.mp4/0_frame.jpg") 
print(im3.size)

# 9:16 사이즈 오용 검사

In [None]:
import cv2

def is_shorts_format(video_path):
    #print(video_path)
    cap = cv2.VideoCapture(video_path)
    ret, first_frame = cap.read()
    if ret:
        cv2.imwrite("./shorts_temp.jpg", first_frame)
        #print(f"image name : {image_name}")
    else :
        print("error to read video")

    cap.release()

    # Load the image
    is_shorts = False
    
    image = cv2.imread("./shorts_temp.jpg")    


    # Convert to grayscale for easier processing
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    height, width = grayscale_image.shape[:2]

    # Check for all-black pixels in both regions
    left_region = grayscale_image[:, 0:400]  
    right_region_start = width - 400  
    right_region = grayscale_image[:, right_region_start:width]  

    left_is_all_black = all(pixel == 0 for pixel in left_region.flatten())
    right_is_all_black = all(pixel == 0 for pixel in right_region.flatten())

    if left_is_all_black and right_is_all_black :
        print("This is a shorts format video")
        is_shorts = True

    # Print the results
    #print("The left region (0-400 pixels) is all black:", left_is_all_black)
    #print("The right region (0-400 pixels from the right) is all black:", right_is_all_black)
    
    return is_shorts

In [None]:
is_shorts = is_shorts_format(video_path)
get_video_info(video_path)
print(f"* Is this shorts format : {is_shorts}")