In [1]:
!pip install inaSpeechSegmenter 'https://github.com/Numenorean/ShazamAPI/archive/master.zip' loguru zhconv
!apt-get install aria2 ffmpeg
!wget https://github.com/nilaoda/BBDown/releases/download/1.5.4/BBDown_1.5.4_20221019_linux-x64.zip
!unzip BBDown_1.5.4_20221019_linux-x64.zip
!chmod +x BBDown
!./BBDown https://www.bilibili.com/video/BV1tY411r7GU/ --use-aria2c -F '<ownerMid>'
# colab 内存不够一次处理全部。
!ffmpeg -i '/content/10850238.mp4' -ss 00:00:00 -to 01:30:00 -c:v copy -c:a copy '/content/10850238.1.mp4'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting https://github.com/Numenorean/ShazamAPI/archive/master.zip
  Downloading https://github.com/Numenorean/ShazamAPI/archive/master.zip (9.7 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting inaSpeechSegmenter
  Downloading inaSpeechSegmenter-0.7.6-py3-none-any.whl (26 kB)
Collecting loguru
  Downloading loguru-0.6.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 KB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting zhconv
  Downloading zhconv-1.4.3.tar.gz (211 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.6/211.6 KB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyannote.parser
  Downloading pyannote.parser-0.8-py3-none-any.whl (24 kB)
Collecting pyannote.core
  Downloading pyannote.core-5.0.0-py3-none-any.whl (58

In [None]:
!ffmpeg -i '/content/10850238.mp4' -ss 00:00:00 -to 01:30:00 -c:v copy -c:a copy '/content/10850238.1.mp4'

In [3]:
from os import makedirs, rename, listdir, system
from os.path import basename, splitext, dirname, exists, join, isfile

import ShazamAPI
from inaSpeechSegmenter import Segmenter
from loguru import logger
from zhconv import convert


@logger.catch
def segment(media: str, batch_size: int = 32, energy_ratio: float = 0.02):
    logger.info(f'开始为 {media} 分段。')
    return Segmenter(
        vad_engine='sm',  # 'smn': 'speech', 'music', 'noise' (better) ; 'sm': 'speech', 'music'
        detect_gender=False,  # 性别确认
        energy_ratio=energy_ratio,  # ?
        batch_size=batch_size  # 根据显卡情况决定
    )(media)


@logger.catch
def extract_music(
        segmentation,  # 切片信息
        music_segment_threshold: int = 60,  # 合并前音乐切片的最短时间
        segment_connect: int = 3,  # 两个分割小于这个时间则合并分割
        music_segment_threshold_final: int = 90,  # 合并后音乐切片的最短时间
):
    if segmentation is None:
        logger.warning('切片信息为空，可能在分割的时候发生了异常，请检查。')
        return
    # 将结果中被认为没有声音且在 2s 内的片段分割的切分合并。
    for i in range(len(segmentation) - 2, 0, -1):
        if segmentation[i][0] == 'noEnergy' and \
                segmentation[i][2] - segmentation[i][1] < 2 and \
                segmentation[i - 1][0] == segmentation[i + 1][0]:
            segmentation[i - 1] = (segmentation[i - 1][0], segmentation[i - 1][1], segmentation[i + 1][2])

    r = list(filter(lambda x: x[0] == 'music' and x[2] - x[1] > music_segment_threshold, segmentation))

    # 合并过短的分割
    for i in range(len(r) - 1, 0, -1):
        if r[i][1] - r[i - 1][2] < segment_connect:
            r[i - 1] = (r[i - 1][0], r[i - 1][1], r[i][2])
            del r[i]

    rf = list(filter(lambda x: x[2] - x[1] > music_segment_threshold_final, r))
    return [
        [
            f'{str(int(x[1] // 3600)).zfill(2)}:{str(int(x[1] % 3600 // 60)).zfill(2)}:{str(int(x[1] % 60)).zfill(2)}',
            f'{str(int(x[2] // 3600)).zfill(2)}:{str(int(x[2] % 3600 // 60)).zfill(2)}:{str(int(x[2] % 60)).zfill(2)}',
        ] for x in rf
    ]


@logger.catch
def extract_mah_stuff(
        media,
        segmented_stamps,
        result_ext=None,  # 如果不提供则保留原始格式
        output_dir=None  # 如果不提供则使用原始文件所在目录下的 segmented
):
    if segmented_stamps is None:
        return

    if output_dir is None:
        output_dir = join(dirname(media), 'segmented')
    if not exists(output_dir):
        makedirs(output_dir)
    logger.info(f'将分割结果写入到 {output_dir}。')
    filename = basename(media)
    filename_without_ext, file_ext = splitext(filename)
    if result_ext is None:
        result_ext = file_ext

    logger.info(f'共 {len(segmented_stamps)} 段内容被识别。')
    for i in range(len(segmented_stamps)):
        system(
            f'ffmpeg -i "{media}" '
            f'-ss {segmented_stamps[i][0]} '
            f'-to {segmented_stamps[i][1]} '
            f'-c:v copy '
            f'-c:a copy '
            f'"{join(output_dir, filename_without_ext)}_{i}{result_ext}"')
    return output_dir


@logger.catch
def shazam(mp3, stop_at_first_match=True):
    logger.info(f'开始识别 {mp3} 。')
    recognize_generator = ShazamAPI.Shazam(
        open(mp3, 'rb').read(),
        lang='cn',
        time_zone='Asia/Shanghai'
    ).recognizeSong()

    matches = []
    try:
        while True:
            match = next(recognize_generator)
            if match[1].get('matches') and len(match[1].get('matches')) > 0 and match[1].get('track'):
                matches.append(match)
                if stop_at_first_match: raise StopIteration()
    except StopIteration:
        pass
    return matches


@logger.catch
def legalize_filename(file_name):
    return file_name.replace(':', ' ').replace('"', '').replace(r'/', '').replace(r'?', '').replace(r'*', '')


@logger.catch
def shazam_title(match):
    return legalize_filename(match[1]['track']['title']) + '_' + legalize_filename(match[1]['track']['subtitle'])


@logger.catch
def recognize_song(song_dir: str, dist_dir: str):
    if song_dir is None:
        return
    if not exists(dist_dir):
        makedirs(dist_dir)
    logger.info(f'移动结果到 {dist_dir}。')
    for file in listdir(song_dir):
        file_path = join(song_dir, file)
        if not isfile(file_path):
            continue
        recognize_result = shazam(file_path)
        if recognize_result:
            title = convert(shazam_title(recognize_result[0]), 'zh-cn')
            logger.info(f'识别结果为: {title} 。')
            filename = basename(file_path)
            filename_without_ext, file_ext = splitext(filename)
            rename(file_path, join(dist_dir, filename_without_ext) + '_' + title + file_ext)
        else:
            logger.warning(f'识别失败。')

In [4]:
raw_file_path = '/content/10850238.1.mp4'
seg_out_dir = r'/content/convert2music'
recognized_dir = r'/content/recognized'

extracted_info = extract_music(segment(raw_file_path, batch_size=512))
logger.info(f'分段情况：{extracted_info}。')
this_seg_out_dir = extract_mah_stuff(raw_file_path, extracted_info, output_dir=seg_out_dir)
recognize_song(this_seg_out_dir, recognized_dir)

2023-02-28 12:28:10.946 | INFO     | __main__:segment:12 - 开始为 /content/10850238.1.mp4 分段。


Downloading data from https://github.com/ina-foss/inaSpeechSegmenter/releases/download/models/keras_speech_music_cnn.hdf5


  return np.vstack(
  return np.vstack(
  data = (data - np.mean(data, axis=1).reshape((len(data), 1))) / np.std(data, axis=1).reshape((len(data), 1))
  x = asanyarray(arr - arrmean)


499/499 - 15s - 15s/epoch - 31ms/step


2023-02-28 12:29:06.128 | INFO     | __main__:<module>:6 - 分段情况：[['00:03:36', '00:07:47'], ['00:07:54', '00:11:47'], ['00:14:14', '00:18:48'], ['00:20:57', '00:22:48'], ['00:26:21', '00:28:44'], ['00:29:14', '00:30:58'], ['00:32:01', '00:34:50'], ['00:40:13', '00:45:05'], ['00:55:41', '00:59:26'], ['01:04:58', '01:06:42'], ['01:10:08', '01:13:04'], ['01:14:59', '01:18:07']]。
2023-02-28 12:29:06.130 | INFO     | __main__:extract_mah_stuff:69 - 将分割结果写入到 /content/convert2music。
2023-02-28 12:29:06.134 | INFO     | __main__:extract_mah_stuff:75 - 共 12 段内容被识别。
2023-02-28 12:29:14.166 | INFO     | __main__:recognize_song:124 - 移动结果到 /content/recognized。
2023-02-28 12:29:14.169 | INFO     | __main__:shazam:89 - 开始识别 /content/convert2music/10850238.1_3.mp4 。
2023-02-28 12:29:28.676 | INFO     | __main__:recognize_song:132 - 识别结果为: 我不难过_Yanzi Sun 。
2023-02-28 12:29:28.678 | INFO     | __main__:shazam:89 - 开始识别 /content/convert2music/10850238.1_7.mp4 。
2023-02-28 12:31:11.719 | INFO     | __main

In [5]:
from google.colab import drive
drive.mount('/content/drive')

!mkdir /content/drive/MyDrive/594461
!mv /content/recognized /content/drive/MyDrive/594461
!mv /content/convert2music /content/drive/MyDrive/594461

Mounted at /content/drive
