# B站视频列表总时长获取
输入视频url链接，输出视频列表总时长  
保存网页中的body整个组件到html文件即可

In [None]:
import requests
from bs4 import BeautifulSoup
import re
from collections import defaultdict

def parse_time_str(time_str):
    """
    解析 'HH:MM:SS' 或 'MM:SS' 格式时间字符串为秒
    """
    parts = time_str.strip().split(':')
    if len(parts) == 2:
        m, s = parts
        return int(m) * 60 + int(s)
    elif len(parts) == 3:
        h, m, s = parts
        return int(h) * 3600 + int(m) * 60 + int(s)
    else:
        return 0

def format_seconds(total_seconds):
    h = total_seconds // 3600
    m = (total_seconds % 3600) // 60
    s = total_seconds % 60
    result = []
    if h > 0:
        result.append(f"{h}小时")
    if m > 0:
        result.append(f"{m}分")
    if s > 0 or not result:
        result.append(f"{s}秒")
    return ''.join(result)

def extract_time_string(text, method="default"):
    """
    根据不同的匹配方法从文本中提取时间字符串
    """
    if method == "default":
        match = re.search(r'\b(\d{1,2}:\d{2}(?::\d{2})?)\b', text)
        if match:
            return match.group(1)
        else:
            return None
    else:
        return None

def extract_group_name(text, method="default"):
    """
    根据不同方法从标题中提取分组名
    """
    if method == "pian":
        # 默认方法：找“xx篇”这样的前缀
        match = re.match(r'(.+?篇)[-－]?\d+', text)
        if match:
            return match.group(1)
        else:
            return "未分组"

    elif method == "chapter":
        # 章节编号法，例如 1.1、2.3
        match = re.match(r'(\d+)\.\d+', text)
        if match:
            return f"{match.group(1)}章"
        else:
            return "未分组"

    elif method == "default":
        # 以后可以在这里继续加更多方法
        return "未分组"

def get_total_video_time_from_url(url, group_method="default"):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
    }
    response = requests.get(url, headers=headers)
    html = response.text

    soup = BeautifulSoup(html, 'html.parser')

    body_div = soup.find('div', class_='video-pod__body')
    if not body_div:
        print('没有找到 class=video-pod__body 的内容')
        return

    video_items = body_div.find_all('div', class_=lambda x: x and 'video-pod__item' in x)
    if not video_items:
        print('没有找到 class=video-pod__item 的内容')
        return

    group_totals = defaultdict(int)
    total_seconds = 0
    for item in video_items:
        text = item.get_text()
        time_str = extract_time_string(text)
        if not time_str:
            continue

        seconds = parse_time_str(time_str)
        total_seconds += seconds
        group_name = extract_group_name(text, method=group_method)
        group_totals[group_name] += seconds

    # 输出分组结果
    print('每个分组累计时间：')
    for group, total_seconds in group_totals.items():
        print(f"{group}: {format_seconds(total_seconds)}")
    
    print('所有视频累计时间：', format_seconds(total_seconds))
    return total_seconds

### 正则表达式分组方法（extract_group_name函数的method参数值）
1. default - 无分组
2. pian - XX篇 ...
3. chapter - 1.1 1.2 、2.1 2.2 ...

In [17]:
url = 'https://www.bilibili.com/video/BV1gY4y1L7M6/?spm_id_from=333.337.search-card.all.click&vd_source=5f19aac781288b6cb89fbc29d2be6ece'
get_total_video_time_from_url(url)

每个分组累计时间：
未分组: 3小时49分20秒
