In [None]:
import cv2
import os
from tqdm import tqdm
import requests
import logging
import base64

In [None]:
## HERE IS THE INPUT VIDEO. YOU SET THIS.
input_video_path = 'Backyard-picnic.mp4'

# API Key from platform.openai.com. YOU SET THIS TO YOUR API KEY.
api_key="YOUR OPENAI API KEY"

In [None]:

## OUTPUT FILES. AUTOMATICALLY SET.
output_framestext_file=input_video_path+"_frametext_out.txt"
output_summary_file=input_video_path+"_summary_out.txt"

## Frames folder. AUTOMATICALLY SET.
output_frames_dir="frames"

In [None]:
# Set up logging
logging.basicConfig(level=logging.DEBUG, filename='video_labeler.log', filemode='w', format='%(asctime)s - %(levelname)s - %(message)s')
console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)


In [None]:
## Given an image the following function
## returns OpenAI's response.
## The function uses the global variable api_key
## to communicate with OpenAI

def generate_image_description(image_path):
    with requests.Session() as session:
        # Convert image to base64
        with open(image_path, "rb") as image_file:
            image_base64 = base64.b64encode(image_file.read()).decode('utf-8')
        headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
        }
        
        payload = {
          "model": "gpt-4o",
          "messages": [
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": "What’s in this image?"
                },
                {
                  "type": "image_url",
                  "image_url": {
                    "url": f"data:image/jpeg;base64,{image_base64}"
                  }
                }
              ]
            }
          ],
          "max_tokens": 300
        }

        #response = requests.post("https://api.openai.com/v1/chat/completions", 
        #                         headers=headers, json=payload)
        # I will create a session for each image.
        response = session.post("https://api.openai.com/v1/chat/completions", 
                                    headers=headers, json=payload)
              
        if response.status_code == 200:
            description = response.json()['choices'][0]['message']['content'].strip()
            return description
        else:
            logging.error(f"OpenAI API error: {response.status_code}, {response.text}")
            return "Error in getting description"



In [None]:

# Given the output_framestext_file as the input parameter
# the function returns the summary text for all the frames.
# The function uses OpenAI API Key (in global variable api_key)

def summarizeViaOpenAI(frames_description_file):
    with requests.Session() as session:
        try:
            with open(frames_description_file, 
                      'r', encoding='utf-8', 
                      errors='replace') as file:
                file_content = file.read()
        except Exception as e:
            return f"An error occurred: {e}"
        
        headers = {
          "Content-Type": "application/json",
          "Authorization": f"Bearer {api_key}"
        }
        
        payload = {
          "model": "gpt-4o",
          "messages": [
            {
              "role": "user",
              "content": [
                {
                  "type": "text",
                  "text": "The content has descriptions of frames in a video. Please summarize the video."
                },
                {
                  "type": "text",
                  "text": file_content
                }
              ]
            }
          ],
          "max_tokens": 300
        }
        
        response = session.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
        return response.json()['choices'][0]['message']['content']


In [None]:
# Load video
cap = cv2.VideoCapture(input_video_path)
if not cap.isOpened():
    raise Exception(
        f"Error: Could not open input video file: {input_video_path}")

In [None]:

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
frame_interval = int(fps * 2)  # 2 seconds interval

In [None]:

# Create an output directory for frames
os.makedirs(output_frames_dir, exist_ok=True)


In [None]:
# Open the text file to write detected objects. 
# Also, save the frames that are sent to OpenAI
with open(output_framestext_file, 
          "w") as file, tqdm(total=int(cap.get(cv2.CAP_PROP_FRAME_COUNT)), 
         desc="Processing frames") as pbar:
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0:
            frame_path = os.path.join(output_frames_dir, f"frame_{frame_count}.jpg")
            cv2.imwrite(frame_path, frame)
            
            # Send frame to OpenAI API
            try:
                description = generate_image_description(frame_path)
                # Write object descriptions to file
                file.write(f"Frame {frame_count}: {description}\n")
                file.flush()
                logging.info(f"Processed frame {frame_count}: {description}")
            except Exception as e:
                logging.error(f"Error processing frame {frame_count} with OpenAI API: {e}")
        
        frame_count += 1
        pbar.update(1)
    cap.release()
    cv2.destroyAllWindows()

In [None]:
summary = summarizeViaOpenAI(output_framestext_file)
print("THE SUMMARY OF THE VIDEO IS AS FOLLOWS: ")
print(summary)

In [None]:
## Write the summary to the file output_summary_file
try:
    with open(output_summary_file, 
              'w', encoding='utf-8') as summaryfile:
        summaryfile.write(summary)
        summaryfile.flush()
    print(f"Content successfully written to {output_summary_file}")
except Exception as e:
    print(f"An error occurred: {e}")