In [None]:
from datetime import datetime
# stime/etime分别设定起止时间
stime_readable = "2025-05-01 00:00:00"
etime_readable = "2025-06-01 00:00:00"
# 若将current_month设为False，将跳过常规数据分析环节
current_month = "五月"
stime_obj = datetime.strptime(stime_readable, "%Y-%m-%d %H:%M:%S")
etime_obj = datetime.strptime(etime_readable, "%Y-%m-%d %H:%M:%S")
stime_unix = int(stime_obj.timestamp())
etime_unix = int(etime_obj.timestamp())
seconds_diff = etime_obj - stime_obj
days_diff = seconds_diff.days
print(f"天差距: {days_diff}, 起时间: {stime_unix}, 止时间: {etime_unix}")

In [None]:
# 读入所有主题帖数据
import os, pickle
file_list = os.listdir("topic_datas")
topic_list = list()
for x in file_list:
    with open(f"topic_datas/{x}", "rb") as f:
        topic_list.append(pickle.load(f))
print(f"共读入主题帖数: {len(topic_list)}")
# 数据结构改造（使所有回复贴都具有board_belong属性）
for topic in topic_list:
    if topic == False: continue
    for reply in topic["reply_list"]:
        reply["board_belong"] = topic["board_belong"]
        reply["topic_title"] = topic["topic_title"]

In [None]:
# 判断主题帖/回复贴是否在时间范围内的函数
# d为主题帖/回复贴字典，返回值为布尔值
def is_in_timeframe(d, stime_unix=stime_unix, etime_unix=etime_unix):
    if "topic_time" in d:
        t = d["topic_time"]
    elif "reply_time" in d:
        t = d["reply_time"]
    else:
        raise ValueError("input must be a topic/reply dict.")
    return etime_unix >= t >= stime_unix
def unix2readable(unix_timestamp, format_type):
    if format_type == 1: return datetime.fromtimestamp(unix_timestamp).strftime("%Y%m%d")[2:]
    if format_type == 2: return datetime.fromtimestamp(unix_timestamp).strftime("%Y-%m-%d %H:%M")
    else: raise ValueError("format_type error.")
import calendar
def readable2weekday(readable_timestamp):
    return calendar.day_name[datetime.strptime(f"20{readable_timestamp}", "%Y%m%d").weekday()][:3]

In [None]:
# 图表相关函数
import matplotlib.pyplot as plt
def output_plot_bar(x, y, fn, w, color="lightpink"):
# 展示并存储柱状图，fn为标题，w为图像高度（单位为英寸），默认为浅粉色（可变）
    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.figure(figsize=(16, w))
    # 图像宽度默认为16英寸
    bars = plt.bar(x, y, color=color)
    plt.axhline(y=0, zorder=0, color="black")
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height,
                 f'{int(height)}', ha='center', va='bottom')
    plt.title(fn, fontsize=15)
    plt.xticks(rotation=45, fontsize=12)
    plt.tight_layout()
    plt.savefig(fn+".png", dpi=100)
    plt.show()
def output_plot_line(x, y, fn, w):
# 展示并存储折线图，fn为标题，w为图像高度（单位为英寸），默认为天蓝色（固定）
    plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
    plt.figure(figsize=(16, w))
    # 图像宽度默认为16英寸
    line = plt.plot(x, y, 
                   color='skyblue',
                   marker='o',
                   markersize=8,
                   linewidth=2.5)
    for xi, yi in zip(x, y):
        plt.text(xi, yi, 
                f'{int(yi)}', 
                ha='center', 
                va='bottom',
                fontsize=12)
        plt.grid(axis='y', alpha=0.3)
    plt.title(fn, fontsize=15, pad=20)
    plt.xticks(rotation=45, fontsize=12)
    plt.tight_layout()
    plt.savefig(fn+".png", dpi=100)
    plt.show()

In [None]:
# 定义一些对照用列表/字典
# 23个板块名称及默认排序
all_boards = [
    "自由讨论区", "个人日记", "论坛管理", "自绘美少女", "ACG实物讨论区", 
    "电子产品讨论区", "文字类作品区", "图片/作品出处询问版", "寻求资源", "动漫综合讨论区", 
    "动画资源共享区", "漫画轻小说共享区", "ACG音乐资源共享区", "LIVE类资源分享区", "Galgame BitTorrent区", 
    "Galgame 网络硬盘区",  "CG画册资源共享区", "GAL本子区", "无限制资源区", "GalGame推荐区", 
    "GalGame新作动态", "GalGame综合讨论区", "游戏安装疑难互助"
]
# 14个非资源板块及默认排序
boards_without_resource = [
    "自由讨论区", "个人日记", "论坛管理", "自绘美少女", "ACG实物讨论区", 
    "电子产品讨论区", "文字类作品区", "图片/作品出处询问版", "寻求资源", "动漫综合讨论区", 
    "GalGame推荐区",  "GalGame新作动态", "GalGame综合讨论区", "游戏安装疑难互助"
]
# 图片文件名与表情标签间的对应关系
emoji_trans = {
    "em01.gif": "[s:10]", "em02.gif": "[s:11]", "em03.gif": "[s:12]", "em04.gif": "[s:13]",
    "em05.gif": "[s:14]", "em06.gif": "[s:15]", "em07.gif": "[s:16]", "em08.gif": "[s:17]",
    "em09.gif": "[s:18]", "em10.gif": "[s:19]", "em11.gif": "[s:20]", "em12.gif": "[s:21]",
    "em13.gif": "[s:22]", "em14.gif": "[s:23]", "em15.gif": "[s:24]", "em16.gif": "[s:25]",
    "em17.gif": "[s:26]", "em18.gif": "[s:27]", "em19.gif": "[s:28]", "em20.gif": "[s:29]",
    "em21.gif": "[s:30]", "em22.gif": "[s:31]", "em23.gif": "[s:32]", "em24.gif": "[s:33]",
    "em25.gif": "[s:34]", "em26.gif": "[s:35]", "em27.gif": "[s:36]", "em28.gif": "[s:37]",
    "em29.gif": "[s:38]", "em30.gif": "[s:39]", "em31.gif": "[s:40]", "em32.gif": "[s:41]",
    "em33.gif": "[s:42]", "em34.gif": "[s:43]", "em35.gif": "[s:44]", "em36.gif": "[s:45]",
    "em37.gif": "[s:46]", "em38.gif": "[s:47]", "em39.gif": "[s:48]", "em40.gif": "[s:49]",
    "em41.gif": "[s:50]", "em42.gif": "[s:51]", "em43.gif": "[s:52]", "em44.gif": "[s:53]",
    "em45.gif": "[s:54]", "em46.gif": "[s:55]", "em47.gif": "[s:56]", "em48.gif": "[s:57]",
}

In [None]:
# 定义一些符合时间限定的子集
import copy
valid_topic_list = list() #合规的所有主题帖
valid_reply_list = list() #合规的所有回复贴（存在于主题帖结构中）
valid_reply_list_unstructured = list() #合规的所有回复贴（不存在于主题帖结构中）
valid_topic_list = [topic for topic in topic_list if topic and is_in_timeframe(topic)]
inn_topic_list = copy.deepcopy(topic_list)
for topic in inn_topic_list:
    inn = list()
    if topic == False: continue
    for reply in topic["reply_list"]:
        if is_in_timeframe(reply):
            inn.append(reply)
    if inn:
        topic["reply_list"] = inn
        valid_reply_list.append(topic)
        valid_reply_list_unstructured += inn
valid_reply_list_unstructured_sorted = sorted(valid_reply_list_unstructured, key=lambda x:x["reply_time"])
# 按回复时间升序排序好的valid_reply_list_unstructured

In [None]:
# 根据以上子集将主题帖/回复贴按板块/日期分类
valid_topic_dict_board = dict()
valid_reply_dict_board_unstructured = dict()
# 暂时想不到需要valid_reply_dict_board的情景
valid_topic_dict_day = dict()
valid_reply_dict_day_unstructured = dict()
for topic in valid_topic_list:
    if topic["board_belong"] in valid_topic_dict_board:
        valid_topic_dict_board[topic["board_belong"]].append(topic)
    else:
        valid_topic_dict_board[topic["board_belong"]] = [topic]
    t = unix2readable(topic["topic_time"], format_type=1)
    if t in valid_topic_dict_day:
        valid_topic_dict_day[t].append(topic)
    else:
        valid_topic_dict_day[t] = [topic]
for reply in valid_reply_list_unstructured:
    if reply["board_belong"] in valid_reply_dict_board_unstructured:
        valid_reply_dict_board_unstructured[reply["board_belong"]].append(reply)
    else:
        valid_reply_dict_board_unstructured[reply["board_belong"]] = [reply]
    t = unix2readable(reply["reply_time"], format_type=1)
    if t in valid_reply_dict_day_unstructured:
        valid_reply_dict_day_unstructured[t].append(reply)
    else:
        valid_reply_dict_day_unstructured[t] = [reply]

In [None]:
# 构建用户名索引字典
all_usernames_list = list(set([reply["username"] for reply in valid_reply_list_unstructured]))
username_search_dict = {username: list() for username in all_usernames_list}
for reply in valid_reply_list_unstructured:
    username_search_dict[reply["username"]].append(reply)
print(f"共建立{len(all_usernames_list)}位用户的索引字典")

In [None]:
# 数据分析：关于主题帖
if current_month:
    boards_labels_all = [board for board in all_boards if board in valid_topic_dict_board]
    counts_labels_all = [len(valid_topic_dict_board[board]) for board in boards_labels_all]
    print(f"统计期间内，论坛各板块共发表主题帖{sum(counts_labels_all)}个，平均每天发表{sum(counts_labels_all)/days_diff:.2f}个")
    boards_labels_without_resource = [board for board in boards_without_resource if board in valid_topic_dict_board]
    counts_labels_without_resource = [len(valid_topic_dict_board[board]) for board in boards_labels_without_resource]
    print(f"排除九个资源区后，统计期间内，论坛各板块共发表主题帖{sum(counts_labels_without_resource)}个，平均每天发表{sum(counts_labels_without_resource)/days_diff:.2f}个")
    output_plot_bar(boards_labels_all, counts_labels_all, f"{current_month}各板块新增主题帖数量", 5)
    inn = sorted(list(valid_topic_dict_day.keys()))
    if len(inn) != days_diff: inn.pop()
    # 偶尔极其巧合地会出现正好发表在etime瞬间的帖子
    weekdays_labels = [readable2weekday(x) for x in inn]
    weekdays_labels = [f"{inn[x][2:]}{weekdays_labels[x]}" for x in range(days_diff)]
    counts_labels = [len(valid_topic_dict_day[x]) for x in inn]
    output_plot_line(weekdays_labels, counts_labels, f"{current_month}每天新增主题帖数量", 4)
    
    mc_counts = [len(valid_topic_dict_board[board]) if board in valid_topic_dict_board else 0 for board in all_boards]
    mc_counts += [sum(counts_labels_all)/days_diff, sum(counts_labels_without_resource)/days_diff]
    m3_counts = [469, 283, 32, 10, 12, 10, 1, 149, 423, 52, 9, 0, 227, 1, 0, 238, 41, 0, 15, 1, 42, 50, 42, 67.96774193548387, 50.83870967741935]
    m4_counts = [472, 254, 34, 5, 16, 12, 0, 121, 357, 34, 15, 1, 263, 0, 0, 254, 12, 0, 10, 2, 33, 79, 20, 66.46666666666667, 47.96666666666667]
    m5_counts = [537, 253, 37, 5, 12, 19, 0, 141, 364, 28, 22, 2, 265, 0, 0, 212, 18, 0, 13, 4, 43, 67, 30, 66.83870967741936, 49.67741935483871]
    plast = m4_counts
    counts_diff = [mc_counts[i]-plast[i] for i in range(23)]
    output_plot_bar(all_boards, counts_diff, "各板块主题帖发帖数变化量", 5, color="orange")
    print(f"总体增率{mc_counts[-2]/plast[-2]-1:.2%}，排除九个资源区后增率{mc_counts[-1]/plast[-1]-1:.2%}")
    print(mc_counts)

In [None]:
# 数据分析：关于回复贴
if current_month:
    boards_labels_all = [board for board in all_boards if board in valid_reply_dict_board_unstructured]
    counts_labels_all = [len(valid_reply_dict_board_unstructured[board]) for board in boards_labels_all]
    print(f"统计期间内，论坛各板块共发表回复贴{sum(counts_labels_all)}个，平均每天发表{sum(counts_labels_all)/days_diff:.2f}个")
    boards_labels_without_resource = [board for board in boards_without_resource if board in valid_reply_dict_board_unstructured]
    counts_labels_without_resource = [len(valid_reply_dict_board_unstructured[board]) for board in boards_labels_without_resource]
    print(f"排除九个资源区后，统计期间内，论坛各板块共发表回复贴{sum(counts_labels_without_resource)}个，平均每天发表{sum(counts_labels_without_resource)/days_diff:.2f}个")
    output_plot_bar(boards_labels_all, counts_labels_all, f"{current_month}各板块新增回复贴数量", 5)
    inn = sorted(list(valid_reply_dict_day_unstructured.keys()))
    if len(inn) != days_diff: inn.pop()
    # 偶尔极其巧合地会出现正好发表在etime瞬间的帖子
    weekdays_labels = [readable2weekday(x) for x in inn]
    weekdays_labels = [f"{inn[x][2:]}{weekdays_labels[x]}" for x in range(days_diff)]
    counts_labels = [len(valid_reply_dict_day_unstructured[x]) for x in inn]
    output_plot_line(weekdays_labels, counts_labels, f"{current_month}每天新增回复贴数量", 4)
    
    mc_counts = [len(valid_reply_dict_board_unstructured[board]) if board in valid_reply_dict_board_unstructured else 0 for board in all_boards]
    mc_counts += [sum(counts_labels_all)/days_diff, sum(counts_labels_without_resource)/days_diff]
    m3_counts = [8414, 5389, 90, 148, 43, 342, 5, 432, 1799, 1134, 14, 1, 264, 2, 0, 811, 55, 2, 61, 9, 898, 830, 213, 676.0, 636.9677419354839]
    m4_counts = [9201, 6682, 71, 38, 137, 158, 0, 432, 1433, 551, 21, 2, 308, 0, 0, 1086, 38, 0, 51, 20, 791, 1132, 131, 742.7666666666667, 692.5666666666667]
    m5_counts = [10789, 5244, 96, 97, 65, 215, 0, 598, 1454, 483, 29, 3, 299, 1, 0, 703, 36, 0, 39, 55, 868, 914, 189, 715.3870967741935, 679.5806451612904]
    plast = m4_counts
    counts_diff = [mc_counts[i]-plast[i] for i in range(23)]
    output_plot_bar(all_boards, counts_diff, "各板块回复贴发帖数变化量", 5, color="orange")
    print(f"总体增率{mc_counts[-2]/plast[-2]-1:.2%}，排除九个资源区后增率{mc_counts[-1]/plast[-1]-1:.2%}")
    print(mc_counts)

In [None]:
# 数据分析：最高热度贴（每天）
limit = 40
# 规定一个主题帖在某天回复数超过limit时即为当天的高热帖
# 若当天所有帖子均非高热帖，则输出当天回复数最高的主题帖
day_popular_topic_dict = {k: dict() for k in valid_reply_dict_day_unstructured}
for day, replies in valid_reply_dict_day_unstructured.items():
    for reply in replies:
        if reply["topic_belong"] in day_popular_topic_dict[day]:
            day_popular_topic_dict[day][reply["topic_belong"]][1] += 1
        else:
            day_popular_topic_dict[day][reply["topic_belong"]] = [reply["topic_title"], 1]
    day_popular_topic_dict[day] = sorted(list(day_popular_topic_dict[day].items()), key=lambda x:x[1][1], reverse=True)
inn = sorted(list(day_popular_topic_dict.keys()))
if len(inn) != days_diff: inn.pop()
for day in inn:
    url, (title, count) = day_popular_topic_dict[day][0]
    print(f"{day}：新增{count}回复：[url={url}]{title}[/url]")
    for url, (title, count) in day_popular_topic_dict[day][1:]:
        if count >= limit: print(f"　　　　  新增{count}回复：[url={url}]{title}[/url]")
# 数据分析：最高热度贴（总和）
print("="*130)
total_popular_topic_list = sorted(valid_reply_list, key=lambda x:len(x["reply_list"]), reverse=True)
for index, topic in enumerate(total_popular_topic_list):
    print(f'第{index+1:^4}名：共增{len(topic["reply_list"])}回复：[url={topic["topic_url"]}]{topic["topic_title"]}[/url]')

In [None]:
# 数据分析：水帖王与助人王
# 水帖王（统计所有贴，不区分主题回复）
waterking_list = sorted(list(username_search_dict.items()), key=lambda x:len(x[1]), reverse=True)
for index, ele in enumerate(waterking_list):
    print(f'第{index+1:>2}名|共发{len(ele[1]):>3}帖：{ele[0]}')

In [None]:
# 助人王（数据获取部分：出处区与寻求资源区，权重1）
flag10 = False
flag11 = False
flag12 = False
if flag10:
    import re
    goodguy_boards = ["图片/作品出处询问版", "寻求资源"]
    goodguy_reply_list = list()
    i = 1
    for topic in valid_reply_list:
        disp = False
        if topic["board_belong"] in goodguy_boards:
            num_map = {'一': '1', '二': '2', '三': '3', '四': '4',
                       '五': '5', '六': '6', '七': '7', '八': '8', '九': '9'}
            p = re.compile(r'[一二三四五六七八九]')
            topic_title = p.sub(lambda x: num_map[x.group()], topic["topic_title"])
            patterns = r"(\d+)[Ll楼樓][、，,(, )]?|(\d+)[、，,(, )]\d+[Ll楼樓]"
            matches = re.findall(patterns, topic_title)
            goodguy_floor_list = list({int(floor) for ele in matches for floor in ele if floor}) if matches else []
            poster = topic["reply_list"][0]["username"] if topic["reply_list"][0]["floor"] == 0 else "#unknown#"
            if not goodguy_floor_list and \
               all(word not in topic["topic_title"] for word in ["自己找到", "无人回应"]) and \
               any(reply["username"] != poster for reply in topic["reply_list"]):
                disp = True
                print(topic["topic_url"], topic["topic_title"])
                for reply in topic["reply_list"]:
                    for fieldset in reply["fieldset_list"]:
                        if "<legend>↓</legend>本帖为优秀帖" in fieldset:
                            goodguy_floor_list = [reply["floor"]]
                if not goodguy_floor_list:
                    for reply in topic["reply_list"]:
                        print(f'floor{reply["floor"]}: {reply["reply_text"]}')
                    inn = input()
                    goodguy_floor_list = [int(floor) for floor in inn.split(" ")] if inn else []
            goodguy_reply_list_current = [reply for reply in topic["reply_list"] if reply["floor"] in goodguy_floor_list]
            goodguy_reply_list += goodguy_reply_list_current
            if disp: 
                print(f"提取({i})：", [reply["username"] for reply in goodguy_reply_list_current])
                print("="*130)
                i += 1
    len(goodguy_reply_list)
# 助人王（数据获取部分：安装疑难区，权重0.75）
# valid_reply_list中部分topic会无0L，定义函数search_poster以通过topic_url在topic_list中搜索poster
if flag11:
    if flag1:
        def search_poster(topic_url):
            for topic in topic_list:
                if topic and topic_url == topic["topic_url"]:
                    if topic["reply_list"][0]["floor"] == 0:
                        return topic["reply_list"][0]["username"]
                    else:
                        raise Expection("error.")
        for topic in valid_reply_list:
            if topic["board_belong"] == "游戏安装疑难互助":
                goodguy_username_list_current = list()
                poster = search_poster(topic["topic_url"])
                for reply in topic["reply_list"]:
                    if reply["username"] != poster and reply["username"] not in goodguy_username_list_current:
                        goodguy_username_list_current.append(reply)
                goodguy_reply_list += goodguy_username_list_current
        len(goodguy_reply_list)
# 助人王（统计部分）
if flag12:
    goodguy_dict = dict()
    for reply in goodguy_reply_list:
        if reply["username"] not in goodguy_dict:
            goodguy_dict[reply["username"]] = [0, 0, 0, 0]
        if reply["board_belong"] == "游戏安装疑难互助":
            goodguy_dict[reply["username"]][0] += 0.75
            goodguy_dict[reply["username"]][3] += 0.75
        elif reply["board_belong"] == "图片/作品出处询问版":
            goodguy_dict[reply["username"]][0] += 1
            goodguy_dict[reply["username"]][2] += 1
        else:
            goodguy_dict[reply["username"]][0] += 1
            goodguy_dict[reply["username"]][1] += 1
        if reply["username"] == "1208276875": print(reply["topic_belong"])
    goodguy_list = sorted(list(goodguy_dict.items()), key=lambda x:x[1][0], reverse=True)
    for index, guy in enumerate(goodguy_list):
        print(f"第{index+1:>2}名 | {int(guy[1][0]):>3}次应求：[b]{guy[0]}[/b] (资源{guy[1][1]}, 出处{guy[1][2]}, 疑难{int(guy[1][3])})")

In [None]:
# 数据分析：表情包热度榜
all_imgs_url = [img_url for reply in valid_reply_list_unstructured for img_url in reply["image_list"]]
all_imgs_url = {img_url: all_imgs_url.count(img_url) for img_url in all_imgs_url}
all_imgs_url_integrated = dict()
for item in all_imgs_url.items():
    inn = item[0][-8:]
    if inn in emoji_trans:
        if emoji_trans[inn] not in all_imgs_url_integrated:
            all_imgs_url_integrated[emoji_trans[inn]] = 0
        all_imgs_url_integrated[emoji_trans[inn]] += item[1]
    elif "sticker" in item[0]:
        all_imgs_url_integrated[item[0]] = item[1]
all_imgs_url = sorted(list(all_imgs_url_integrated.items()), key=lambda x:x[1], reverse=True)
others = list()
for x in all_imgs_url:
    if x[0] in emoji_trans.values(): print(f"{x[0]}（{x[1]}次）", end="")
    elif "pesoguin" in x[0] or "em" in x[0]: print(f"[img]{x[0]}[/img]（{x[1]}次）", end="")
    else: others.append(x)
print("\n" + "="*130)
for x in others:
    print(f"[img]{x[0]}[/img]（{x[1]}次）", end="")

In [None]:
# 数据分析：发言用户数
print(f"统计期间内共有{len(username_search_dict)}位用户发言。")
user_dict_day = dict()
for day in valid_reply_dict_day_unstructured:
    user_dict_day[day] = list(dict.fromkeys([reply['username'] for reply in valid_reply_dict_day_unstructured[day]]))
inn = sorted(list(user_dict_day.keys()))
if len(inn) != days_diff: inn.pop()
weekdays_labels = [readable2weekday(x) for x in inn]
weekdays_labels = [f"{inn[x][2:]}{weekdays_labels[x]}" for x in range(days_diff)]
counts_labels = [len(user_dict_day[x]) for x in inn]
output_plot_line(weekdays_labels, counts_labels, f"{current_month}每日发言用户数", 4)
average = sum(counts_labels)/days_diff
#平均发言用户数不能用len(username_search_dict)/days_diff来算，应该保留重复
print(f"统计期间平均每天有{average:.2f}位用户发言。")

In [None]:
# 个人分析：历史言论/活跃倾向（时间）
flag2 = False
if flag2:
    username = "kisaragizen"
    his_all_replies = username_search_dict[username]
    time_list   = [datetime.fromtimestamp(x["reply_time"]).strftime("%H") for x in his_all_replies]
    time_labels = [f"{x:>02}" for x in range(24)]
    time_dict   = dict()
    for x in time_labels:
        if x in time_list: 
            for y in time_list: time_dict[x] = time_list.count(x)
        else: time_dict[x] = 0
    output_plot_bar(time_labels, list(time_dict.values()), f"{username}的回复时间分布（每天）", 5)
    user_reply_dict_day = dict() #能够以类似250501形式的key索引该用户当天所有回复
    for day in range(1, days_diff+1):
        readable = unix2readable(stime_unix+86400*(day-1), 1)
        if readable not in user_reply_dict_day: user_reply_dict_day[readable] = list()
        for reply in his_all_replies:
            if is_in_timeframe(reply, stime_unix=stime_unix+86400*(day-1), etime_unix=stime_unix+86400*day):
                user_reply_dict_day[readable].append(reply)
    inn = sorted(list(user_reply_dict_day.keys()))
    if len(inn) != days_diff: inn.pop()
    weekdays_labels = [readable2weekday(x) for x in inn]
    weekdays_labels = [f"{inn[x][2:]}{weekdays_labels[x]}" for x in range(days_diff)]
    counts_labels = [len(user_reply_dict_day[x]) for x in inn]
    output_plot_line(weekdays_labels, counts_labels, f"{username}的回复时间分布（期间）", 4)
    for reply in sorted(his_all_replies, key=lambda x:x["reply_time"]):
        print(f"{unix2readable(reply['reply_time'], 2)} {reply['topic_title']}")
        print(f"{reply['topic_belong']}&spid={reply['pid'][3:]}\n{reply['reply_text']}\n")

In [None]:
# 总和统计：色彩偏好
flag0 = False
if flag0:
    user_color_dict  = {reply["username"]: reply["replyboxcolor"] for reply in valid_reply_list_unstructured_sorted}
    all_color_list   = list(user_color_dict.values())
    color_count_dict0 = {color: all_color_list.count(color) for color in set(all_color_list)}
    color_count_list0 = sorted(list(color_count_dict0.items()), key=lambda x:x[-1], reverse=True)
    color_count_dict1 = dict()
    for reply in valid_reply_list_unstructured_sorted:
        if reply["replyboxcolor"] in color_count_dict1:
            color_count_dict1[reply["replyboxcolor"]] += 1
        else:
            color_count_dict1[reply["replyboxcolor"]] = 1
    print(len(user_color_dict), len(valid_reply_list_unstructured_sorted))
    for ele in color_count_list0: print(f"[backcolor={ele[0][:-1]}]　　　　　　　　　　　　　　　　　　　　　　　　　[/backcolor]{ele[1]/len(user_color_dict):>7.2%} users in {color_count_dict1[ele[0]]:>5} posts: {ele[0]}")
    for key in color_count_dict0:
        color_count_dict0[key] = color_count_dict1[key]/color_count_dict0[key]
    color_count_list0 = sorted(list(color_count_dict0.items()), key=lambda x:x[-1], reverse=True)
    print("="*130)
    for ele in color_count_list0: print(f"[backcolor={ele[0][:-1]}]　　　　　　　　　　　　　　　　　　　　　　　　　[/backcolor]{ele[1]:>7.2f} posts: {ele[0]}")