In [None]:
import os,ffmpeg,configparser,logging
from openai import OpenAI
import logging
from py_logging_config import setup_logging
from tqdm import tqdm
# 配置日志记录器
setup_logging()

def get_logger_my(logger_name):
    return logging.getLogger(logger_name)

In [2]:
def openai_speech2text_my(audio_file_paths,config_file='config.ini',api_prefix='sk'):
    logger = get_logger_my('my_logger')
    # 读取配置文件
    config = configparser.ConfigParser()
    config.read(config_file)
    api_prefix=api_prefix # 'sk'
    client = OpenAI(
        api_key=config['openai']['api_key_' + api_prefix],
        base_url=config['openai']['api_base_' + api_prefix],
    )
    # 定义每个片段的大小（以字节为单位）
    chunk_size = 1024 * 1024  # 1MB
    for audio_file_path in audio_file_paths:
        logger.info('processing {}'.format(audio_file_path))
        # 读取音频文件并切分
        with open(audio_file_path, "rb") as audio_file:
            audio_data = audio_file.read()
        chunks = [audio_data[i:i + chunk_size] for i in range(0, len(audio_data), chunk_size)]

        # 存储所有片段的转录结果
        transcriptions = []
        # 依次发送每个片段的请求
        for chunk in tqdm(chunks, desc=f"Processing {os.path.basename(audio_file_path)}"):  # 使用 tqdm 包装 chunks
            with open('./tmp/temp_chunk.mp3', 'wb') as temp_file:
                temp_file.write(chunk)
            
            with open('./tmp/temp_chunk.mp3', "rb") as temp_audio_file:
                response = client.audio.transcriptions.create(
                    model="FunAudioLLM/SenseVoiceSmall",
                    file=temp_audio_file,
                    response_format="json"
                )
            
            transcriptions.append(response.text)
        file_name, _ = os.path.splitext(audio_file_path)
        out_text_file_name = file_name + '.txt'
        with open(out_text_file_name, 'w') as temp_file:
            for line in transcriptions:
                temp_file.write(line)
        # logger.info('translated audio file to {}'.format(out_text_file_name))

def vedio2mp3_my(folder_path):
    # 获取文件夹中的所有文件
    logger = get_logger_my('my_logger')
    all_files = os.listdir(folder_path)
    # 过滤出 MP4 文件
    mp4_files = [file for file in all_files if file.lower().endswith('.mp4')]
    mp3_files = []
    for mp4_file in mp4_files:
        file_name, _ = os.path.splitext(mp4_file)
        mp3_file_name = folder_path + '/audio_' + file_name + '.mp3'
        logger.info('transleting {}'.format(file_name))
        ffmpeg.input(folder_path+'/'+mp4_file).output(mp3_file_name, q=0, map='a').run(overwrite_output=True)
        logger.info('saved to {}'.format(mp3_file_name))
        mp3_files.append(mp3_file_name)
    return mp3_files

In [4]:
folder_path = "E:/告别原生家庭直通/tmp"
vedio2mp3_my(folder_path)

2024-12-04 17:23:43,584 - my_logger - INFO - transleting 第1讲-从察觉原生家庭的问题到自我成长的意识
2024-12-04 17:24:12,341 - my_logger - INFO - saved to E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3


['E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3']

In [5]:
res='E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3'
os.path.splitext(res)

('E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识', '.mp3')

In [3]:
audio_file_paths=['E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3']
openai_speech2text_my(audio_file_paths,config_file='config.ini',api_prefix='sk')

2024-12-04 18:02:52 - my_logger - INFO - processing E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3


Processing E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.mp3: 100%|██████████| 53/53 [01:09<00:00,  1.31s/it]

2024-12-04 18:04:02 - my_logger - INFO - translated audio file to E:/告别原生家庭直通/tmp/audio_第1讲-从察觉原生家庭的问题到自我成长的意识.txt





### 批量修改文件

In [16]:
import os
def get_files(folder_path, file_type=None, max_files=0):
    all_files = os.listdir(folder_path)
    # 过滤文件
    if file_type:
        all_files = [file for file in all_files if file.lower().endswith(file_type)]
    if max_files:
        all_files = all_files[:max_files]
    res = [folder_path + '/' + file_name for file_name in all_files]
    print('get {} files'.format(len(res)))
    return res

In [45]:
root_folder = 'E:/video/reply1988'
# for i in range(1,11):
#     file_folder = '{}SE{:02d}'.format(root_folder,i)
#     # print(file_folder)
#     for line in get_files(file_folder, file_type='mkv'):
#         print(line)
# for line in get_files(root_folder, file_type='mkv'):
#     old_file_path = os.path
#     new_file_path = line.replace('[Henshin]_Shin_Seiki_Evangelion_', 'Neon_Genesis_Evangelion_S01E').replace('_', 'E', 1)
#     # new_name = new_name.replace('(', '_(')
#     # os.rename(old_file_path, new_file_path)
#     print(f'Renamed: {old_file_path} -> {new_file_path}')
#     # print(new_file_path)

folder_path = root_folder
all_files = os.listdir(folder_path)
for file_name in all_files:
    if file_name.lower().endswith('.cht.ass') or file_name.lower().endswith('.mkv'):
        new_name = file_name.replace('[TSKS][', '').replace('][E0','.S01E').replace('(720P)][','.').replace(']','')
        old_file_path = os.path.join(folder_path, file_name)
        new_file_path = os.path.join(folder_path, new_name)
        if new_name == file_name:
            continue
        os.rename(old_file_path, new_file_path)
        print(f'Renamed: {old_file_path} -> {new_file_path}')

Renamed: E:/video/reply1988\[TSKS][Reply.1988][E001(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E01.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E002(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E02.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E003(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E03.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E004(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E04.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E005(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E05.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E006(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E06.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E007(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E07.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply.1988][E008(720P)][KO_CN].mkv -> E:/video/reply1988\Reply.1988.S01E08.KO_CN.mkv
Renamed: E:/video/reply1988\[TSKS][Reply

In [15]:

def rename_files(folder_path):
    all_files = os.listdir(folder_path)
    for file_name in all_files:
        if file_name.lower().endswith('.mkv'):
            parts = file_name.split('.')
            if len(parts) >= 4:
                season = parts[1][2:]
                episode = parts[2]
                new_name = f"Friends.S{season.zfill(2)}E{episode.zfill(2)}.1080p.mkv"
                old_file_path = os.path.join(folder_path, file_name)
                new_file_path = os.path.join(folder_path, new_name)
                os.rename(old_file_path, new_file_path)
                print(f'Renamed: {old_file_path} -> {new_file_path}')

# 使用函数重命名文件
root_folder = 'E:/video/EVA_Neon_Genesis_Evangelion'
for i in range(2, 11):
    file_folder = '{}SE{:02d}'.format(root_folder, i)

    # rename_files(file_folder)

Renamed: E:/video/friendsHD/SE02\1080P.SE02.01.mkv -> E:/video/friendsHD/SE02\Friends.S02E01.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.02.mkv -> E:/video/friendsHD/SE02\Friends.S02E02.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.03.mkv -> E:/video/friendsHD/SE02\Friends.S02E03.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.04.mkv -> E:/video/friendsHD/SE02\Friends.S02E04.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.05.mkv -> E:/video/friendsHD/SE02\Friends.S02E05.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.06.mkv -> E:/video/friendsHD/SE02\Friends.S02E06.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.07.mkv -> E:/video/friendsHD/SE02\Friends.S02E07.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.08.mkv -> E:/video/friendsHD/SE02\Friends.S02E08.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.09.mkv -> E:/video/friendsHD/SE02\Friends.S02E09.1080p.mkv
Renamed: E:/video/friendsHD/SE02\1080P.SE02.10.mkv -> E:/video/friendsHD/