In [1]:
import pandas as pd
from collections import Counter
import re
import matplotlib.pyplot as plt
import numpy as np

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

comp_color = {
    '魔坦刺': '#cfcf95',
    '坦刺': '#858561',
    '扶桑法刺': '#9e71ad',
    '扶桑群雄刺': '#968795',
    '扶桑群雄法刺': '#6603fc',
    '扶桑刺': '#6603fc',
    '魔种': '#a35956',
    '扶桑男刺': '#bfae91',
    '扶桑法': '#8000ff',

    '七射': '#14ffa5',
    '坦射': '#ccff14',
    '攻辅射': '#42d7f5',
    '弟弟射': '#fceb30',
    '攻辅蛋': '#ffac1c',

    '九战': '#ff0000',
    '稷下战': '#ff5e5e',
    '轻战守约': '#ff8e5e',
    '魏战': '#ff0084',
    '长城战': '#f8ff6b',

    '蜀国': '#00cc18',
    '弟弟蜀': '#a1eb34',

    '法奶': '#0093f5',
    '弟妹法': '#7900fa',
    '坦法': '#ff5900'
    }

comp_to_brief = {
    '扶桑法刺': '扶桑刺',
    '扶桑刺': '扶桑刺',
    '扶桑群雄刺': '扶桑刺',
    '扶桑群雄法刺': '扶桑刺',
    '扶桑男刺': '扶桑刺',
    '魔种(无天赋)' :'魔种',
    '魔种(有天赋)' :'魔种',
    '封神攻辅射': '攻辅射',
    '吴国攻辅射': '攻辅射',
    '稷下攻辅射': '攻辅射',
    '尧天攻辅射': '攻辅射'
}

rank_to_score = [12, 9, 7, 5, 3, 2, 1, 0]

class Config:
    use_brief_comp = True

pd.set_option('display.max_columns', None)

In [2]:
def preprocess_data(df):
    df = df.dropna()
    df['分数'] = df['排名'].apply(lambda x: rank_to_score[int(x) - 1])
    if Config.use_brief_comp:
        df['详细阵容'] = df['阵容']
        df['阵容'] = df['详细阵容'].apply(lambda x: comp_to_brief.get(x, x))
    df['回合数'] = df['胜'] + df['负']
    return df

def check_data_integrity(df):
    groups = df.groupby(['周数', '分组', '局数'])
    for g_name, g_df in groups:
        assert g_df['排名'].tolist() == [1, 2, 3, 4, 5, 6, 7, 8], g_df['排名'].tolist()
        assert g_df['回合数'].is_monotonic_decreasing, g_df
        tmp_l = g_df['回合数'].tolist()
        assert tmp_l[0] == tmp_l[1], g_df['回合数']
        assert len(g_df['俱乐部'].unique()) == len(g_df['俱乐部']), len(g_df['俱乐部'].unique())

    # 全局BP
    groups = df.groupby(['周数', '分组', '俱乐部'])
    for g_name, g_df in groups:
        assert len(g_df['ban位'].unique()) == len(g_df['ban位']), (g_name, len(g_df['ban位'].unique()))

    return df

df = pd.read_excel('result.xlsx')
df = preprocess_data(df)
df = check_data_integrity(df)

In [3]:
def get_comp_statistics(df):
    def get_most_ban(x):
        c = Counter(x['ban位'])
        return [(i, c[i] / len(x)) for i, count in c.most_common()]

    groups = df.groupby('阵容')
    mean_rank = groups['排名'].mean()
    var_rank = groups['排名'].var()
    sum_occur = groups['排名'].count()
    mean_score = groups['分数'].mean()
    top1_cnt = groups['排名'].apply(lambda x: (x == 1).sum())
    top1_rate = top1_cnt / sum_occur
    top3_cnt = groups['排名'].apply(lambda x: (x <= 3).sum())
    top3_rate = top3_cnt / sum_occur
    most_ban = groups.apply(get_most_ban)
    res = pd.concat([mean_score, mean_rank, var_rank, sum_occur, top1_cnt, top1_rate, top3_cnt, top3_rate, most_ban], axis=1)
    res.columns = ('平均分数', '平均排名', '排名方差', '出场次数', '登顶次数', '登顶率', '前三次数', '前三率', '阵容选用')
    res = res.sort_values(['平均分数', '平均排名', '登顶率', '前三率'], ascending=[False, True, False, False])
    res = res.fillna('-')
    return res

def get_ban_statistics(df):
    def get_most_comp(x):
        c = Counter(x['阵容'])
        return [(i, c[i]) for i, count in c.most_common()]
    groups = df.groupby('ban位')
    mean_rank = groups['排名'].mean()
    var_rank = groups['排名'].var()
    mean_score = groups['分数'].mean()
    sum_occur = groups['排名'].count()
    top1_cnt = groups['排名'].apply(lambda x: (x == 1).sum())
    top1_rate = top1_cnt / sum_occur
    top3_cnt = groups['排名'].apply(lambda x: (x <= 3).sum())
    top3_rate = top3_cnt / sum_occur
    most_comp = groups.apply(get_most_comp)
    res = pd.concat([mean_score, mean_rank, var_rank, sum_occur, top1_cnt, top1_rate, top3_cnt, top3_rate, most_comp], axis=1)
    res.columns = ('平均分数', '平均排名', '排名方差', '出场次数', '登顶次数', '登顶率', '前三次数', '前三率', '阵容选用')
    res = res.sort_values(['平均分数', '平均排名', '登顶率', '前三率'], ascending=[False, True, False, False])
    return res


def get_team_statistics(df):
    def get_most_comp(x):
        c = Counter(x['阵容'])
        return [(i, c[i]) for i, count in c.most_common()]
    groups = df.groupby('俱乐部')
    mean_rank = groups['排名'].mean()
    var_rank = groups['排名'].var()
    mean_score = groups['分数'].mean()
    sum_occur = groups['排名'].count()
    top1_cnt = groups['排名'].apply(lambda x: (x == 1).sum())
    top1_rate = top1_cnt / sum_occur
    top3_cnt = groups['排名'].apply(lambda x: (x <= 3).sum())
    top3_rate = top3_cnt / sum_occur
    most_comp = groups.apply(get_most_comp)
    res = pd.concat([mean_score, mean_rank, var_rank, sum_occur, top1_cnt, top1_rate, top3_cnt, top3_rate, most_comp], axis=1)
    res.columns = ('平均分数', '平均排名', '排名方差', '出场次数', '登顶次数', '登顶率', '前三次数', '前三率', '阵容选用')
    res = res.sort_values(['平均分数', '平均排名', '登顶率', '前三率'], ascending=[False, True, False, False])
    return res

def get_player_statistics(df):
    def get_most_comp(x):
        c = Counter(x['阵容'])
        return [(i, c[i]) for i, count in c.most_common()]
    groups = df.groupby('选手')
    mean_rank = groups['排名'].mean()
    var_rank = groups['排名'].var()
    mean_score = groups['分数'].mean()
    sum_occur = groups['排名'].count()
    top1_cnt = groups['排名'].apply(lambda x: (x == 1).sum())
    top1_rate = top1_cnt / sum_occur
    top3_cnt = groups['排名'].apply(lambda x: (x <= 3).sum())
    top3_rate = top3_cnt / sum_occur
    most_comp = groups.apply(get_most_comp)
    res = pd.concat([mean_score, mean_rank, var_rank, sum_occur, top1_cnt, top1_rate, top3_cnt, top3_rate, most_comp], axis=1)
    res.columns = ('平均分数', '平均排名', '排名方差', '出场次数', '登顶次数', '登顶率', '前三次数', '前三率', '阵容选用')
    res = res.sort_values(['平均分数', '平均排名', '登顶率', '前三率'], ascending=[False, True, False, False])
    return res

display(get_comp_statistics(df))
display(get_ban_statistics(df))
display(get_team_statistics(df))
display(get_player_statistics(df))

Unnamed: 0_level_0,平均分数,平均排名,排名方差,出场次数,登顶次数,登顶率,前三次数,前三率,阵容选用
阵容,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
坦射,10.5,1.5,0.5,2,1,0.5,2,1.0,"[(尧, 0.5), (蜀, 0.5)]"
攻辅蛋,9.0,2.0,-,1,0,0.0,1,1.0,"[(蜀, 1.0)]"
弟弟射,6.422222,3.577778,4.65859,45,10,0.222222,24,0.533333,"[(蜀, 1.0)]"
坦刺,5.75,4.25,10.9167,4,1,0.25,2,0.5,"[(稷, 0.5), (魏, 0.25), (尧, 0.25)]"
攻辅射,5.34375,4.28125,6.07964,32,5,0.15625,15,0.46875,"[(蜀, 0.375), (卫, 0.3125), (魏, 0.28125), (尧, 0...."
弟弟蜀,5.285714,4.428571,7.95238,7,1,0.142857,4,0.571429,"[(卫, 1.0)]"
扶桑法,5.0,4.0,-,1,0,0.0,0,0.0,"[(卫, 1.0)]"
长城战,5.0,4.0,-,1,0,0.0,0,0.0,"[(安, 1.0)]"
魏战,5.0,4.0,-,1,0,0.0,0,0.0,"[(安, 1.0)]"
稷下战,4.876712,4.39726,4.21499,73,6,0.082192,27,0.369863,"[(安, 0.6164383561643836), (魏, 0.16438356164383..."


Unnamed: 0_level_0,平均分数,平均排名,排名方差,出场次数,登顶次数,登顶率,前三次数,前三率,阵容选用
ban位,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
蜀,5.8,3.9375,4.920095,80,14,0.175,38,0.475,"[(弟弟射, 45), (攻辅射, 12), (稷下战, 8), (法奶, 6), (扶桑刺..."
安,5.0,4.402985,5.123021,67,9,0.134328,26,0.38806,"[(稷下战, 45), (蜀国, 11), (魔种, 3), (九战, 3), (轻战守约,..."
卫,4.9625,4.4,4.774684,80,9,0.1125,30,0.375,"[(扶桑刺, 44), (法奶, 14), (攻辅射, 10), (弟弟蜀, 7), (蜀国..."
稷,4.681159,4.623188,5.26769,69,9,0.130435,23,0.333333,"[(扶桑刺, 41), (魔种, 11), (魔坦刺, 10), (蜀国, 3), (坦刺,..."
魏,4.529412,4.75,5.772388,68,6,0.088235,24,0.352941,"[(扶桑刺, 23), (七射, 17), (稷下战, 12), (攻辅射, 9), (蜀国..."
吴,4.0625,5.0625,6.195833,16,2,0.125,5,0.3125,"[(稷下战, 6), (扶桑刺, 3), (蜀国, 2), (九战, 2), (轻战守约, ..."
尧,2.947368,5.736842,4.982456,19,1,0.052632,4,0.210526,"[(七射, 9), (扶桑刺, 3), (稷下战, 2), (攻辅射, 1), (坦法, 1..."
神,2.0,6.0,,1,0,0.0,0,0.0,"[(魔坦刺, 1)]"


Unnamed: 0_level_0,平均分数,平均排名,排名方差,出场次数,登顶次数,登顶率,前三次数,前三率,阵容选用
俱乐部,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
XQ,6.333333,3.666667,5.52381,15,2,0.133333,8,0.533333,"[(扶桑刺, 6), (弟弟射, 3), (稷下战, 2), (坦刺, 2), (攻辅射, ..."
DYG,6.2,3.8,5.6,15,4,0.266667,7,0.466667,"[(稷下战, 4), (扶桑刺, 3), (攻辅射, 3), (法奶, 1), (七射, 1..."
CW,6.066667,3.933333,6.92381,15,4,0.266667,7,0.466667,"[(扶桑刺, 5), (弟弟射, 3), (魔种, 2), (稷下战, 2), (蜀国, 1..."
KSSC,6.0,3.933333,6.495238,15,3,0.2,8,0.533333,"[(攻辅射, 7), (扶桑刺, 4), (魔种, 1), (九战, 1), (蜀国, 1)..."
重庆QGScholar,6.0,4.066667,8.352381,15,4,0.266667,8,0.533333,"[(扶桑刺, 6), (稷下战, 5), (弟弟射, 3), (弟弟蜀, 1)]"
RSG,5.933333,3.733333,3.352381,15,2,0.133333,8,0.533333,"[(稷下战, 3), (扶桑刺, 2), (弟弟射, 2), (法奶, 2), (魔坦刺, ..."
成都AG超玩会,5.733333,3.933333,4.352381,15,2,0.133333,6,0.4,"[(扶桑刺, 5), (弟弟射, 3), (蜀国, 2), (七射, 1), (扶桑法, 1..."
MQ,5.733333,4.2,7.742857,15,5,0.333333,6,0.4,"[(扶桑刺, 6), (魔种, 4), (稷下战, 3), (七射, 1), (弟弟射, 1)]"
西安WE,5.4,4.3,6.852632,20,4,0.2,8,0.4,"[(扶桑刺, 7), (稷下战, 2), (攻辅射, 2), (弟弟射, 2), (坦刺, ..."
RNG.M,4.933333,4.4,4.828571,15,1,0.066667,6,0.4,"[(扶桑刺, 5), (稷下战, 3), (轻战守约, 2), (攻辅射, 2), (蜀国,..."


Unnamed: 0_level_0,平均分数,平均排名,排名方差,出场次数,登顶次数,登顶率,前三次数,前三率,阵容选用
选手,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
奕将,9.8,1.800000,0.700000,5,2,0.400000,5,1.000000,"[(弟弟射, 3), (稷下战, 2)]"
ray,8.5,2.500000,3.500000,6,2,0.333333,5,0.833333,"[(攻辅射, 5), (扶桑刺, 1)]"
小笨,8.4,2.400000,0.800000,5,1,0.200000,5,1.000000,"[(稷下战, 2), (弟妹法, 1), (蜀国, 1), (坦射, 1)]"
简若,8.4,2.800000,7.200000,5,3,0.600000,3,0.600000,"[(弟弟射, 3), (扶桑刺, 1), (稷下战, 1)]"
噢吼,8.0,2.666667,2.333333,3,1,0.333333,2,0.666667,"[(扶桑刺, 1), (弟弟蜀, 1), (九战, 1)]"
...,...,...,...,...,...,...,...,...,...
莫然,1.0,7.000000,,1,0,0.000000,0,0.000000,"[(九战, 1)]"
风流,1.0,7.000000,,1,0,0.000000,0,0.000000,"[(扶桑刺, 1)]"
小泥巴,0.0,8.000000,,1,0,0.000000,0,0.000000,"[(扶桑刺, 1)]"
年久,0.0,8.000000,,1,0,0.000000,0,0.000000,"[(法奶, 1)]"


In [4]:
%%time

def report_mvp_comp(comp_stats):
    MIN_OCCURENCE = 5
    metric_weights = {
        '平均分数': [15, False],
        '平均排名': [5, True],
        '排名方差': [3, True],
        '出场次数': [5, False],
        '登顶率': [5, False],
        '前三率': [3, False]
    }
    comp_stats = comp_stats[comp_stats['出场次数'] >= MIN_OCCURENCE]
    raw_ranks = []
    weights = []
    for metric, value in metric_weights.items():
        weight, ascending = value
        metric_rank = comp_stats[metric].rank(ascending=ascending, method='min').astype(int)
        raw_ranks.append(metric_rank)
        weights.append(weight)
    comp_rank = sum([r * w for r, w in zip(raw_ranks, weights)]).sort_values()
    # print(comp_rank)
    mvp_comp = comp_rank.index[0]
    # print('本期MVP阵容:', mvp_comp)
    mvp_comp_metric_ranks = pd.Series({r.name: r[mvp_comp] for r in raw_ranks})
    mvp_comp_metric_values = comp_stats.loc[mvp_comp][mvp_comp_metric_ranks.index].T
    res = pd.concat([mvp_comp_metric_values, mvp_comp_metric_ranks], axis=1)
    res.columns = [mvp_comp, '全阵容对比排名']
    return res
comp_stats = get_comp_statistics(df)
display(report_mvp_comp(comp_stats))

def report_mvp_player(player_stats):
    MIN_OCCURENCE = 3
    metric_weights = {
        '平均分数': [15, False],
        '平均排名': [10, True],
        '排名方差': [5, True],
        '登顶率': [10, False],
        '前三率': [5, False],
        '出场次数': [0, False],
        '阵容选用': [0, False]
    }
    player_stats = player_stats[player_stats['出场次数'] >= MIN_OCCURENCE]
    raw_ranks = []
    weights = []
    for metric, value in metric_weights.items():
        weight, ascending = value
        metric_rank = player_stats[metric].rank(ascending=ascending, method='min').astype(int)
        raw_ranks.append(metric_rank)
        weights.append(weight)
    player_rank = sum([r * w for r, w in zip(raw_ranks, weights)]).sort_values()
    mvp_player = player_rank.index[0]
    # print('本期MVP选手:', mvp_player)
    mvp_player_metric_ranks = pd.Series({r.name: r[mvp_player] for r in raw_ranks})
    mvp_player_metric_values = player_stats.loc[mvp_player][mvp_player_metric_ranks.index].T
    res = pd.concat([mvp_player_metric_values, mvp_player_metric_ranks], axis=1)
    res.columns = [mvp_player, '全选手对比排名']
    res['全选手对比排名'] = (res['全选手对比排名'] / weights * weights).fillna('/')
    return res

player_stats = get_player_statistics(df)
display(report_mvp_player(player_stats))

def report_mvp_team(team_stats):
    metric_weights = {
        '平均分数': [15, False],
        '平均排名': [10, True],
        '排名方差': [5, True],
        '登顶率': [10, False],
        '前三率': [5, False],
        '出场次数': [0, False],
        '阵容选用': [0, False]
    }
    raw_ranks = []
    weights = []
    for metric, value in metric_weights.items():
        weight, ascending = value
        metric_rank = team_stats[metric].rank(ascending=ascending, method='min').astype(int)
        raw_ranks.append(metric_rank)
        weights.append(weight)
    team_rank = sum([r * w for r, w in zip(raw_ranks, weights)]).sort_values()
    mvp_team = team_rank.index[0]
    # print('本期MVP选手:', mvp_team)
    mvp_team_metric_ranks = pd.Series({r.name: r[mvp_team] for r in raw_ranks})
    mvp_team_metric_values = team_stats.loc[mvp_team][mvp_team_metric_ranks.index].T
    res = pd.concat([mvp_team_metric_values, mvp_team_metric_ranks], axis=1)
    res.columns = [mvp_team, '全队伍对比排名']
    res['全队伍对比排名'] = (res['全队伍对比排名'] / weights * weights).fillna('/')
    return res

team_stats = get_team_statistics(df)
display(report_mvp_team(team_stats))

def report_interesting_data(df):
    b_df = df.copy()
    b_df['选手'] = b_df['选手'].astype(str)
    b_df['俱乐部'] = b_df['俱乐部'].astype(str)
    b_df['选手'] = b_df[['俱乐部', '选手']].agg('.'.join, axis=1)
    b_df['局数'] = b_df.apply(lambda x: f'第{x["周数"]}周{x.loc["分组"]}第{x.loc["局数"]}局', axis=1)
    # 最长游戏回合
    longest_game = b_df.loc[b_df['回合数'] == b_df['回合数'].max()][['选手', '详细阵容', '回合数', '局数', '排名']]
    print(f'最长游戏:\n{longest_game}\n')
    # 最短游戏回合
    first_twos = b_df.loc[b_df['排名'] <= 2]
    shortest_game = first_twos.loc[first_twos['回合数'] == first_twos['回合数'].min()][['选手', '详细阵容', '回合数', '局数', '排名']]
    print(f'最短游戏:\n{shortest_game}\n')
    highest_price = b_df.iloc[b_df['质量'].idxmax()][['选手', '详细阵容', '质量', '局数', '排名']]
    print(f'最高质量:\n{highest_price}\n')
    lowest_price = b_df.iloc[b_df['质量'].idxmin()][['选手', '详细阵容', '质量', '局数', '排名']]
    print(f'最低质量:\n{lowest_price}\n')
    highest_damage = b_df.iloc[b_df['输出'].idxmax()][['选手', '详细阵容', '输出', '局数', '排名']]
    print(f'最高输出:\n{highest_damage}\n')
    lowest_damage = b_df.iloc[b_df['输出'].idxmin()][['选手', '详细阵容', '输出', '局数', '排名']]
    print(f'最低输出:\n{lowest_damage}\n')

report_interesting_data(df)

Unnamed: 0,弟弟射,全阵容对比排名
平均分数,6.42222,1
平均排名,3.57778,1
排名方差,4.65859,3
出场次数,45.0,3
登顶率,0.222222,1
前三率,0.533333,2


Unnamed: 0,奕将,全选手对比排名
平均分数,9.8,1
平均排名,1.8,1
排名方差,0.7,1
登顶率,0.4,4
前三率,1,1
出场次数,5,/
阵容选用,"[(弟弟射, 3), (稷下战, 2)]",/


Unnamed: 0,XQ,全队伍对比排名
平均分数,6.33333,1
平均排名,3.66667,1
排名方差,5.52381,14
登顶率,0.133333,8
前三率,0.533333,1
出场次数,15,/
阵容选用,"[(扶桑刺, 6), (弟弟射, 3), (稷下战, 2), (坦刺, 2), (攻辅射, ...",/


最长游戏:
                 选手    详细阵容  回合数         局数  排名
120     武汉eStar.小玄策  扶桑群雄法刺   36  第4周低分组第1局   1
121        XROCK.痘痘     弟弟射   36  第4周低分组第1局   2
144          RW侠.种子   稷下攻辅射   36  第4周低分组第4局   1
145      杭州LGD大鹅.六月     稷下战   36  第4周低分组第4局   2
296       XROCK.水长东    扶桑法刺   36  第5周中分组第3局   1
297        WB.TS.刺儿   封神攻辅射   36  第5周中分组第3局   2
352           XQ.奕将     弟弟射   36  第5周高分组第5局   1
353  重庆QGScholar.鸵鸟     稷下战   36  第5周高分组第5局   2

最短游戏:
                 选手 详细阵容  回合数         局数  排名
328  重庆QGScholar.大牛  弟弟蜀   24  第5周高分组第2局   1
329       KS.YTG.安然  稷下战   24  第5周高分组第2局   2

最高质量:
选手          CW.简若
详细阵容         扶桑法刺
质量            177
局数      第3周中分组第1局
排名              1
Name: 40, dtype: object

最低质量:
选手        MQ.暴躁黑猪
详细阵容      魔种(无天赋)
质量             33
局数      第4周高分组第5局
排名              8
Name: 239, dtype: object

最高输出:
选手      重庆QGScholar.大牛
详细阵容               弟弟蜀
输出                 296
局数           第5周高分组第2局
排名                   1
Name: 328, dtype: object

最低输出:
选手      KSSC.抓只好棋
详细阵容 

In [5]:
def plot_ban_to_comp(ban_stats, filepath=None):
    fig, axs = plt.subplots(1, len(ban_stats), figsize=(30, 4))
    for ax, ban in zip(axs, ban_stats.itertuples()):
        labels, ys = [m[0] for m in ban[-1]], [m[1] for m in ban[-1]]
        colors = [comp_color[re.sub(r'\([^)]*\)', '', c).strip()] for c in labels]
        explode = [0.015] * len(ys)
        def value(val):
            return f'{val:.2f}%'
        ax.pie(ys, labels=labels, colors=colors, explode=explode, autopct=value, shadow=False, pctdistance=0.6)
        ax.set_title(f'{ban[0]} (均分{ban[1]:.2f},出场{ban[4]})', fontsize=15, x=0.5, y=1.01)

    fig.tight_layout()
    if filepath:
        fig.savefig(filepath, dpi=250)
    else:
        plt.show(fig)
    plt.close(fig)

def plot_comp_data(comp_stats, filepath=None):
    fig, ax = plt.subplots(figsize=(6, 4))
    y = comp_stats['平均排名']
    x = comp_stats['出场次数']
    names = comp_stats.index
    ax.scatter(x, y)
    ax.set_title('阵容数据')
    ax.set_ylabel('平均排名')
    ax.set_xlabel('出场次数')
    ax.set_xlim(left=0, right=max(x) + 1)
    ax.set_ylim(bottom=1, top=8)
    ax.hlines(y=4.5, xmin=0, xmax=max(x) + 1, colors='orange', linestyles='--', lw=2, label='理论均值')
    ax.legend(loc="lower right")
    for i, txt in enumerate(names):
        ax.annotate(txt, (x[i] + 0.25, y[i]))
    fig.gca().invert_yaxis()
    if filepath:
        fig.savefig(filepath, dpi=250)
    else:
        plt.show(fig)
    plt.close(fig)

comp_stats = get_comp_statistics(df)
plot_comp_data(comp_stats, "data/comp_data.png")
# plot_comp_data(comp_stats, None)
ban_stats = get_ban_statistics(df)
plot_ban_to_comp(ban_stats, "data/ban_to_comp.png")
# plot_ban_to_comp(ban_stats, None)

In [6]:
weibo_csv_path = 'weibo_crawler/weibo/王者模拟战职业大师赛/5464294919.csv'
weibo_df = pd.read_csv(weibo_csv_path)
weibo_df = weibo_df.loc[weibo_df['正文'].str.contains('决赛圈')]
display(weibo_df.head(1))
print(weibo_df.shape)



Unnamed: 0,id,bid,正文,头条文章url,原始图片url,视频url,位置,日期,工具,点赞数,评论数,转发数,话题,@用户
4,4558497624883491,JoxN0kupZ,王者荣耀超话 KPL超话 #王者模拟战# #王者模拟战职业大师赛#·秋季赛·常规赛第四周...,,https://wx1.sinaimg.cn/large/005XNCJ1gy1gjk9bn...,,,2020-10-10,搜狗高速浏览器,0,4,0,"王者模拟战,王者模拟战职业大师赛",


(30, 14)
