diff --git a/requirements.txt b/requirements.txt index eccc469..e979296 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ -blrec -ffmpeg_python==0.2.0 +blrec==2.0.0b4 fontTools==4.55.3 librosa==0.9.1 more_itertools==10.6.0 @@ -11,8 +10,7 @@ tiktoken==0.4.0 torch==1.11.0 triton==3.1.0 zhconv==1.4.3 -openai -google-genai -bilitool +bilitool==0.1.2 +google-generativeai>=0.7.2 # don't change this part zhipuai -pysqlite3 +openai \ No newline at end of file diff --git a/src/autoslice/mllm_sdk/gemini_sdk.py b/src/autoslice/mllm_sdk/gemini_new_sdk.py similarity index 100% rename from src/autoslice/mllm_sdk/gemini_sdk.py rename to src/autoslice/mllm_sdk/gemini_new_sdk.py diff --git a/src/autoslice/mllm_sdk/gemini_old_sdk.py b/src/autoslice/mllm_sdk/gemini_old_sdk.py new file mode 100644 index 0000000..c4cc8d1 --- /dev/null +++ b/src/autoslice/mllm_sdk/gemini_old_sdk.py @@ -0,0 +1,37 @@ +import google.generativeai as genai +from src.config import GEMINI_API_KEY +from src.log.logger import scan_log +import time + +# the new gemini sdk has the conflicts pydantic version in project, so we use the old one +# https://github.com/google-gemini/deprecated-generative-ai-python + +def gemini_generate_title(video_path, artist): + + genai.configure(api_key=GEMINI_API_KEY) + + # 2GB in size, 20GB in total + # https://github.com/google-gemini/cookbook/blob/28fc33fbc2189a30a682148165ea6049ffa93db0/quickstarts/Video.ipynb + video_file = genai.upload_file(path=video_path) + + while video_file.state.name == "PROCESSING": + time.sleep(10) + video_file = genai.get_file(video_file.name) + + if video_file.state.name == "FAILED": + raise ValueError(video_file.state.name) + + # Create the prompt. + prompt = f"视频是{artist}的直播的切片,请根据该视频中的内容及弹幕信息,为这段视频起一个调皮并且吸引眼球的标题,只返回该标题即可,无需返回其他内容" + + # Set the model to Gemini Flash. + model = genai.GenerativeModel(model_name="models/gemini-2.0-flash") + + response = model.generate_content([prompt, video_file], + request_options={"timeout": 600}) + # delete the video file + genai.delete_file(video_file.name) + scan_log.info("使用 Gemini-2.0-flash 生成切片标题") + scan_log.info(f"Prompt: 视频是{artist}的直播的切片,请根据该视频中的内容及弹幕信息,为这段视频起一个调皮并且吸引眼球的标题,只需要返回一个标题即可,无需返回其他内容") + scan_log.info(f"生成的切片标题为: {response.text}") + return response.text \ No newline at end of file diff --git a/src/autoslice/title_generator.py b/src/autoslice/title_generator.py index 39dc4a1..ecfbe38 100644 --- a/src/autoslice/title_generator.py +++ b/src/autoslice/title_generator.py @@ -15,7 +15,7 @@ def wrapper(video_path, artist): from .mllm_sdk.zhipu_sdk import zhipu_glm_4v_plus_generate_title return zhipu_glm_4v_plus_generate_title(video_path, artist) elif model_type == "gemini": - from .mllm_sdk.gemini_sdk import gemini_generate_title + from .mllm_sdk.gemini_old_sdk import gemini_generate_title return gemini_generate_title(video_path, artist) elif model_type == "qwen": from .mllm_sdk.qwen_sdk import qwen_generate_title