-
Notifications
You must be signed in to change notification settings - Fork 3
/
buildtranscripts-video.py
179 lines (143 loc) · 6.18 KB
/
buildtranscripts-video.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import os
import time
import cv2
from openai import OpenAI
import base64
import requests
import pandas as pd
import utils.openAiRateLimit as oirt
# from VectorizeDataset import Preprocessing
# from EmbedChunks import EmbedChunks
# from SaveData import SaveData
# use_serverless = True
SKIP_RATE = 30
GPT_ACCEPT_RATE = 12
api_key=os.getenv("OPENAI_API_KEY")
Client=OpenAI()
#read the latest dataframe
df = pd.read_pickle('dataframe.pickle')
video_outfolder = "video-out"
# Function to convert video to frames
def convert_video_to_frames(video_path, creatorid,video_outfolder,overwrite=False):
video_name = os.path.basename(video_path)
folder_name=os.path.join(video_outfolder, creatorid,os.path.splitext(video_name)[0] )
#Create folder for creator, videos if none exists
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# Check if '.jpg' files exist in 'folder_name' and overwrite=False
if not overwrite and any(file.endswith('.jpg') for file in os.listdir(folder_name)):
print("JPG files already exist in the folder. Skipping conversion.")
else:
print("running video to frames conversion")
#read video and save frames (max frames per video is limited to 500/30=16.6 seconds at 30fps)
video = cv2.VideoCapture(video_path)
i=0
while video.isOpened() and i<500:
success, frame = video.read()
if not success:
break
if i%SKIP_RATE==0:
frames_path = os.path.join(folder_name,'frame'+str(i).zfill(3)+".jpg")
cv2.imwrite(frames_path, frame)
print(frames_path)
i+=1
video.release()
return folder_name
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def convert_frames_to_transcript(frames,max_frames=15):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
payload = {
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "The below pictures are frames from a video sequence. Extract all text and product labels mentioned in the image. Describe all products along with color, material in 1-2 sentences. If something is not clear, do not make stuff up. products are mostly clothes and fashion accessroies."
}
]
}
],
"max_tokens": 500
}
image_urls = []
for frame in os.listdir(frames):
enc_image=encode_image(os.path.join(frames,frame))
image_url = {
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{enc_image}", #"url": f"data:image/jpeg;base64,{encode_image(os.path.join('video-out',image))}"
"detail": "low"
}
}
image_urls.append(image_url)
print(f'frames loaded (max):',len(image_urls),'(',max_frames,')')
payload["messages"][0]["content"].extend(image_urls[:max_frames])
try:
response = oirt.completions_http_with_backoff("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
transcript = response.json()['choices'][0]['message']['content']
res_header = response.headers
except Exception as e:
print(f"Exception occurred: {e}")
transcript = 'skipped transcript'
res_header = None
return transcript, res_header
for url in df['url']:
if df.loc[df["url"] == url,'typename'].values[0]=='GraphVideo':
creatorid = df.loc[df["url"] == url,'creatorid'].values[0]
frames_path=convert_video_to_frames(url,creatorid, video_outfolder,overwrite=False)
df.loc[df["url"] == url, "frames_url"] = frames_path
df.to_pickle("dataframe.pickle")
for frames in df["frames_url"]:
if str(frames)!='nan' and str(df.loc[df["frames_url"] == frames,'video_transcript'].values[0])=='nan':
print(f"frame: {frames}")
transcript,res_header=convert_frames_to_transcript(frames,max_frames=12)
df.loc[df["frames_url"] == frames, "video_transcript"] = transcript
print(f"remaining requests: {res_header['x-ratelimit-remaining-requests']}")
print(f"remaining tokens tokens: {res_header['x-ratelimit-remaining-tokens']}")
df.to_pickle("dataframe.pickle")
#loop for missed videos
missed_videos = []
for video_transcript in df["video_transcript"]:
if str(video_transcript)!='nan' and len(str(video_transcript))<=50:
missed_videos.append(df.loc[df["video_transcript"] == video_transcript,'frames_url'].values[0])
for frames in missed_videos:
if str(frames)!='nan':
print(f"frame: {frames}")
transcript,res_header=convert_frames_to_transcript(frames,max_frames=12)
df.loc[df["frames_url"] == frames, "video_transcript"] = transcript
print(f"remaining requests: {res_header['x-ratelimit-remaining-requests']}")
print(f"remaining tokens tokens: {res_header['x-ratelimit-remaining-tokens']}")
df.to_pickle("dataframe.pickle")
#create combined transcript
for index, row in df.iterrows():
audio_transcript = str(row['en_audio_transcript'])
video_transcript = str(row['video_transcript'])
print("processing transcript: ", index)
if audio_transcript != 'nan' and video_transcript != 'nan':
input_msg = f"AUDIO TRANSCRIPT: {audio_transcript}\n\n\nVIDEO TRANSCRIPT: {video_transcript}"
try:
response = oirt.completions_with_backoff(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "combine the below audio transcript and video transcript into a single summary. Provide a list of any products and brands mentioned in the transcript. List the products and brands with special emphasis to the AUDIO TRANSCRIPT. Be truthful, if no products or brands are available, say N/A"},
{"role": "user", "content": input_msg}
]
)
transcript = response.choices[0].message.content
except Exception as e:
print(f"OpenAI API request error: {e}")
transcript = 'nan'
pass
title = str(row['caption'])
tags = str(row['caption_hashtags'])
transcript_full = f"TRANSCRIPT: {transcript}\n\n\nTITLE: {title}\n\n\nTAGS: {tags}"
df.loc[index, "transcript"] = transcript_full
df.to_pickle("dataframe.pickle")