In [1]:
# 本次采用sklearn库作为代码描述，使用sklearn可以简单方便的做出结果
# 分析sklearn可以使用其中的指标模块 sklearn.metrics 


In [2]:
# sklearn.metrics 中的 用户自己可以设定评分模型
# Model Selection Interface  模型接口选择器
# 1、从用户选项中决定积分器
# check_scoring(estimator, scoring=None, allow_none=False)
from sklearn.metrics import check_scoring
#metrics.check_scoring(estimator, scoring=None, allow_none=False)

# 2 从字符串中获取模型的分析器 如“accuracy”, "f1", "roc_auc"
from sklearn.metrics import get_scorer
#get_scorer("f1") 等 返回的是 make_scorer(f2, ...)

# 3、从根据绩效指标或损失函数确定得分模型
from sklearn.metrics import make_scorer
# other_scorer = make_scorer(score_func, 
#                greater_is_better=True, 
#                 needs_proba=False, 
#                 needs_threshold=False, **kwargs)
# 使用的make_scorer()可以自己调整评估模型，将生产的新模型other_scorer 
# 可以作为评估模型的参数。 如下面的例子
# from sklearn.metrics import fbeta_score, make_scorer
# ftwo_scorer = make_scorer(fbeta_score, beta=2)
# ftwo_scorer
# make_scorer(fbeta_score, beta=2)
# from sklearn.model_selection import GridSearchCV
# from sklearn.svm import LinearSVC
# grid = GridSearchCV(LinearSVC(), param_grid={'C': [1, 10]},
#                     scoring=ftwo_scorer)


In [3]:
# sklearn.metrics 中的 
# Classification metrics 分类指标
# 1、分类 精确度
from sklearn.metrics import accuracy_score
# accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)
# Examples
# >>> from sklearn.metrics import accuracy_score
# >>> y_pred = [0, 2, 1, 3]
# >>> y_true = [0, 1, 2, 3]
# >>> accuracy_score(y_true, y_pred)
# 0.5
# >>> accuracy_score(y_true, y_pred, normalize=False)
# 2

# 2、使用梯形法则计算曲线下面积（AUC） ROC下面的面积
from sklearn.metrics import auc
# auc(x, y) 需要配合其他函数一起使用如 roc_curve 得到假正率FPR 和真正率TPR
# return acc // acc: float
# 如下例子
# >>> import numpy as np
# >>> from sklearn import metrics
# >>> y = np.array([1, 1, 2, 2])
# >>> pred = np.array([0.1, 0.4, 0.35, 0.8])
# >>> fpr, tpr, thresholds = metrics.roc_curve(y, pred, pos_label=2)
# >>> metrics.auc(fpr, tpr)
# 0.75

# 3、根据预测分数计算平均精度（AP）
from sklearn.metrics import average_precision_score
# average_precision_score(y_true, y_score, 
#                 average='macro', pos_label=1, sample_weight=None
# Returns average_precision: float 
# average 有四个参数micro’, ‘macro’ (default), ‘samples’, ‘weighted’
# >>> import numpy as np
# >>> from sklearn.metrics import average_precision_score
# >>> y_true = np.array([0, 0, 1, 1])
# >>> y_scores = np.array([0.1, 0.4, 0.35, 0.8])
# >>> average_precision_score(y_true, y_scores)
# 0.83...

# 4、计算平衡精度
# 二元和多类分类问题中的平衡精度可以处理不平衡的数据集。 
# 它定义为每个分类获得的召回率的平均值。
# from sklearn.metrics import balanced_accuracy_score
# balanced_accuracy_score(y_true, y_pred, 
#                         sample_weight=None, 
#                         adjusted=False)[source]
# Returns balanced_accuracy:float
# >>> from sklearn.metrics import balanced_accuracy_score
# >>> y_true = [0, 1, 0, 0, 1, 0]
# >>> y_pred = [0, 1, 0, 0, 0, 1]
# >>> balanced_accuracy_score(y_true, y_pred)
# 0.625

# 5、计算Brier分数。
# 计算Brier分数。 Brier分数越小越好，因此命名为“ loss”。
# 在一组N个预测中的所有项目上，Brier分数测量（1）分配给项目i可能结果的预测概率与（2）实际结果之间的均方差。
# 因此，一组预测的Brier分数越低，则对预测的校准就越好。
# 请注意，Brier分数始终取0到1之间的值，因为这是预测概率（必须在0到1之间）和实际结果（只能取0到1）之间的最大可能差值。 ）。
# 布里尔损失由精制损失和校准损失组成。 Brier分数适用于可以构造为真或假的二进制和绝对结果，
# 但不适用于可以采用三个或更多值的序数变量（这是因为Brier分数假定所有可能的结果均等价于“遥远”彼此之间）。
# 哪个标签被视为肯定标签是通过参数pos_label控制的，该参数的默认值为1。
from sklearn.metrics import brier_score_loss
# brier_score_loss(y_true, y_prob, 
#                  sample_weight=None, 
#                  pos_label=None)
# Returns score: float
# >>> import numpy as np
# >>> from sklearn.metrics import brier_score_loss
# >>> y_true = np.array([0, 1, 1, 0])
# >>> y_true_categorical = np.array(["spam", "ham", "ham", "spam"])
# >>> y_prob = np.array([0.1, 0.9, 0.8, 0.3])
# >>> brier_score_loss(y_true, y_prob)
# 0.037...
# >>> brier_score_loss(y_true, 1-y_prob, pos_label=0)
# 0.037...
# >>> brier_score_loss(y_true_categorical, y_prob, pos_label="ham")
# 0.037...
# >>> brier_score_loss(y_true, np.array(y_prob) > 0.5)
# 0.0

#6、建立文字报告，显示主要的分类指标
from sklearn.metrics import classification_report
# classification_report(y_true, y_pred, 
#                       labels=None, 
#                       target_names=None, 
#                       sample_weight=None, 
#                       digits=2, 
#                       output_dict=False, 
#                       zero_division='warn')
# >>> from sklearn.metrics import classification_report
# >>> y_true = [0, 1, 2, 2, 2]
# >>> y_pred = [0, 0, 2, 2, 1]
# >>> target_names = ['class 0', 'class 1', 'class 2']
# >>> print(classification_report(y_true, y_pred, target_names=target_names))
#               precision    recall  f1-score   support
# <BLANKLINE>
#      class 0       0.50      1.00      0.67         1
#      class 1       0.00      0.00      0.00         1
#      class 2       1.00      0.67      0.80         3
# <BLANKLINE>
#     accuracy                           0.60         5
#    macro avg       0.50      0.56      0.49         5
# weighted avg       0.70      0.60      0.61         5
# <BLANKLINE>
# >>> y_pred = [1, 1, 0]
# >>> y_true = [1, 1, 1]
# >>> print(classification_report(y_true, y_pred, labels=[1, 2, 3]))
#               precision    recall  f1-score   support
# <BLANKLINE>
#            1       1.00      0.67      0.80         3
#            2       0.00      0.00      0.00         0
#            3       0.00      0.00      0.00         0
# <BLANKLINE>
#    micro avg       1.00      0.67      0.80         3
#    macro avg       0.33      0.22      0.27         3
# weighted avg       1.00      0.67      0.80         3
# <BLANKLINE>

# 7、科恩的kappa：一种用于统计注释者之间协议的统计数据。
from sklearn.metrics import cohen_kappa_score
# cohen_kappa_score(y1, y2, 
#                   labels=None, 
#                   weights=None, 
#                   sample_weight=None)

# Returns kappa: float

# 8、计算混淆矩阵以评估分类的准确性。
from sklearn.metrics import confusion_matrix
# confusion_matrix(y_true, y_pred, 
#                  labels=None, 
#                  sample_weight=None, 
#                  normalize=None)
# >>> from sklearn.metrics import confusion_matrix
# >>> y_true = [2, 0, 2, 2, 0, 1]
# >>> y_pred = [0, 0, 2, 2, 0, 2]
# >>> confusion_matrix(y_true, y_pred)
# array([[2, 0, 0],
#        [0, 0, 1],
#        [1, 0, 2]])
# >>> y_true = ["cat", "ant", "cat", "cat", "ant", "bird"]
# >>> y_pred = ["ant", "ant", "cat", "cat", "ant", "cat"]
# >>> confusion_matrix(y_true, y_pred, labels=["ant", "bird", "cat"])
# array([[2, 0, 0],
#        [0, 0, 1],
#        [1, 0, 2]])

# 9、计算折现累积收益。
# from sklearn.metrics import dcg_score
# dcg_score(y_true, y_score, k=None, 
#           log_base=2, 
#           sample_weight=None, 
#           ignore_ties=False)
# Returns discounted_cumulative_gain:float
# >>> from sklearn.metrics import dcg_score
# >>> # we have groud-truth relevance of some answers to a query:
# >>> true_relevance = np.asarray([[10, 0, 0, 1, 5]])
# >>> # we predict scores for the answers
# >>> scores = np.asarray([[.1, .2, .3, 4, 70]])
# >>> dcg_score(true_relevance, scores) # doctest: +ELLIPSIS
# 9.49...
# >>> # we can set k to truncate the sum; only top k answers contribute
# >>> dcg_score(true_relevance, scores, k=2) # doctest: +ELLIPSIS
# 5.63...
# >>> # now we have some ties in our prediction
# >>> scores = np.asarray([[1, 0, 0, 0, 1]])
# >>> # by default ties are averaged, so here we get the average true
# >>> # relevance of our top predictions: (10 + 5) / 2 = 7.5
# >>> dcg_score(true_relevance, scores, k=1) # doctest: +ELLIPSIS
# 7.5
# >>> # we can choose to ignore ties for faster results, but only
# >>> # if we know there aren't ties in our scores, otherwise we get
# >>> # wrong results:
# >>> dcg_score(true_relevance,
# ...           scores, k=1, ignore_ties=True) # doctest: +ELLIPSIS
# 5.0

# 10、计算F1分数，也称为平衡F分数或F测量
from sklearn.metrics import f1_score
# f1_score(y_true, y_pred, 
#          labels=None, 
#          pos_label=1, 
#          average='binary', 
#          sample_weight=None, 
#          zero_division='warn')
# # F1 = 2 * (precision * recall) / (precision + recall)
# >>> from sklearn.metrics import f1_score
# >>> y_true = [0, 1, 2, 0, 1, 2]
# >>> y_pred = [0, 2, 1, 0, 0, 1]
# >>> f1_score(y_true, y_pred, average='macro')
# 0.26...
# >>> f1_score(y_true, y_pred, average='micro')
# 0.33...
# >>> f1_score(y_true, y_pred, average='weighted')
# 0.26...
# >>> f1_score(y_true, y_pred, average=None)
# array([0.8, 0. , 0. ])
# >>> y_true = [0, 0, 0, 0, 0, 0]
# >>> y_pred = [0, 0, 0, 0, 0, 0]
# >>> f1_score(y_true, y_pred, zero_division=1)
# 1.0...

# ..... 等几天之后










In [4]:
# Regression metrics 回归评估


In [5]:
# Multilabel ranking metrics 多标签排名指标

In [6]:
# Clustering metrics 聚类指标

In [7]:
# Biclustering metrics 分类指标

In [8]:
# Pairwise metrics 成对指标

In [9]:
# Plotting 绘图