# 多分类评估

In [1]:
import os
import pandas as pd
from sklearn.metrics import f1_score

In [2]:
y_true_df = pd.read_csv("y_true.tsv", sep="\t", dtype=str)

In [3]:
y_true_df

Unnamed: 0,sample,true
0,JS03547,164890007
1,JS05376,426783006
2,JS03258,427084000
3,JS01739,426177001
4,JS30186,426177001
...,...,...
4230,JS41604,427084000
4231,JS10730,426177001
4232,JS17947,164890007
4233,JS19847,164890007


In [4]:
predict_file = "pred.csv"

In [20]:
predict_df = pd.read_csv(predict_file, dtype=object)

In [21]:
predict_df

Unnamed: 0,sample,pred
0,JS08532,426177001
1,JS42711,427084000
2,JS14674,164890007
3,JS08837,426783006
4,JS13472,164890007
...,...,...
4230,JS21500,426177001
4231,JS38369,426783006
4232,JS27620,426177001
4233,JS23515,426177001


In [22]:
data_merge = pd.merge(y_true_df, predict_df, on="sample")

In [23]:
data_merge

Unnamed: 0,sample,true,pred
0,JS03547,164890007,164890007
1,JS05376,426783006,426783006
2,JS03258,427084000,427084000
3,JS01739,426177001,426177001
4,JS30186,426177001,426177001
...,...,...,...
4230,JS41604,427084000,427084000
4231,JS10730,426177001,426177001
4232,JS17947,164890007,164890007
4233,JS19847,164890007,164890007


## 评估

In [24]:
y_true = data_merge["true"].values

In [25]:
y_pred = data_merge["pred"].values

In [26]:
data_merge["true"].value_counts()

426177001    1664
426783006     805
164890007     789
427084000     701
164889003     182
426761007      79
713422000      15
Name: true, dtype: int64

In [27]:
data_merge["pred"].value_counts()

426177001    1696
164890007     831
426783006     791
427084000     707
426761007     106
164889003     100
713422000       4
Name: pred, dtype: int64

In [28]:
import numpy as np

In [29]:
labels = np.array(data_merge["pred"].value_counts().index.values)

In [30]:
labels

array(['426177001', '164890007', '426783006', '427084000', '426761007',
       '164889003', '713422000'], dtype=object)

In [31]:
y_true.shape

(4235,)

In [32]:
y_true

array(['164890007', '426783006', '427084000', ..., '164890007',
       '164890007', '426783006'], dtype=object)

In [33]:
y_pred

array(['164890007', '426783006', '427084000', ..., '164890007',
       '164890007', '426783006'], dtype=object)

In [34]:
sum(y_true == y_pred)

3824

In [35]:
data_merge

Unnamed: 0,sample,true,pred
0,JS03547,164890007,164890007
1,JS05376,426783006,426783006
2,JS03258,427084000,427084000
3,JS01739,426177001,426177001
4,JS30186,426177001,426177001
...,...,...,...
4230,JS41604,427084000,427084000
4231,JS10730,426177001,426177001
4232,JS17947,164890007,164890007
4233,JS19847,164890007,164890007


In [36]:
def cal_f1(data, class_type):
    y_true = (data["true"] == class_type).values.astype(int)
    y_pred = (data["pred"] == class_type).values.astype(int)
    f1 = f1_score(y_true, y_pred)
    return f1

In [41]:
cal_f1(data_merge, "713422000")

0.0

In [42]:
for label in labels:
    f1 = cal_f1(data_merge, label)
    print(f"{label} f1 = {f1}")

426177001 f1 = 0.9863095238095237
164890007 f1 = 0.7864197530864199
426783006 f1 = 0.9649122807017544
427084000 f1 = 0.9573863636363636
426761007 f1 = 0.6918918918918918
164889003 f1 = 0.15602836879432624
713422000 f1 = 0.0
