In [1]:
# python 3.6
# tensorflow 2.3.1 cpu
# sklearn.__version__ == 0.23.2

import pandas as pd
import numpy as np
import pickle 
from time import time

from sklearn.utils import shuffle # shuffle打乱样本的顺序，它只会打乱样本的顺序，每个样本的数据维持不变。
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

def save_obj(obj, file):
    with open(file, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(file ):
    with open(file, 'rb') as f:
        return pickle.load(f)
  

In [2]:
# 评价指标函数
def f1_score_get(precision, recall):
    # tf.keras.backend.epsilon() 的值为 1e-07
    # 1.0e-7 == tf.keras.backend.epsilon() 得到 True
    f1 = 2 * precision * recall/( precision + recall + 1.0e-7 )
    return f1


# valid_y:真实标签
# predict_y:预测标签
def eval_p_r_f1(valid_y, predict_y):
    # precision 0 1 2 3
    # setting labels=[pos_label] and average != 'binary' will report scores for that label only.
    accuracy = accuracy_score(
                    y_true = valid_y,
                    y_pred = predict_y
                              )
    
    precision_0 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [0],
                    pos_label = 0,
                    average = 'micro'
                    )

    precision_1 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [1],
                    pos_label = 1,
                    average = 'micro'
                    )

    precision_2 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [2],
                    pos_label = 2,
                    average = 'micro'
                    )

    precision_3 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [3],
                    pos_label = 3,
                    average = 'micro'
                    )
    # recall 0 1 2 3
    recall_0 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [0],
                    pos_label = 0,
                    average = 'micro'
                    )

    recall_1 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [1],
                    pos_label = 1,
                    average = 'micro'
                    )

    recall_2 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [2],
                    pos_label = 2,
                    average = 'micro'
                    )

    recall_3 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [3],
                    pos_label = 3,
                    average = 'micro'
                    )
    
    # f1_score 0 1 2 3
    f1_score_0 = f1_score_get( precision_0, recall_0 )
    f1_score_1 = f1_score_get( precision_1, recall_1 )
    f1_score_2 = f1_score_get( precision_2, recall_2 )
    f1_score_3 = f1_score_get( precision_3, recall_3 )
    
    #由y_true 计算各标签权重
    num_0 = np.sum( valid_y == 0 )
    num_1 = np.sum( valid_y == 1 )
    num_2 = np.sum( valid_y == 2 )
    num_3 = np.sum( valid_y == 3 )
    
    total = num_0 + num_1 + num_2 + num_3
    p_0 = num_0/total
    p_1 = num_1/total
    p_2 = num_2/total
    p_3 = num_3/total
    
    precision_avg = p_0 * precision_0 + p_1 * precision_1 + p_2 * precision_2 + p_3 * precision_3
    recall_avg = p_0 * recall_0 + p_1 * recall_1 + p_2 * recall_2 + p_3 * recall_3
    f1_score_avg = p_0 * f1_score_0 + p_1 * f1_score_1 + p_2 * f1_score_2 + p_3 * f1_score_3
    
    return accuracy, precision_avg, recall_avg, f1_score_avg

In [3]:
# train = pd.read_csv('./data/train_location.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_location.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_service.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_service.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_price.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_price.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_environment.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_environment.csv', usecols=['content', 'label'])

train = pd.read_csv( './data/train_dish.csv', usecols = ['content', 'label'] )
valid = pd.read_csv( './data/valid_dish.csv', usecols = ['content', 'label'] )

train = shuffle(train, random_state = 2020)
valid = shuffle(valid, random_state = 2020)

train = shuffle(train, random_state = 42)
valid = shuffle(valid, random_state = 42)

print(train.shape)
print(valid.shape)


(105000, 2)
(15000, 2)


In [4]:
# 读取 X
# 不同aspect，文本数据一样，不同体现在标签上

train_x = train.content.values.tolist()
valid_x = valid.content.values.tolist()

print(train_x[0:3])

[' 大众点评 十月份 VIP 会员 聚餐 十几个 兴趣相投 吃货们 点评 聚在 周日 晚上 万家 一炉 划分 音乐餐厅 自助餐厅 两个 区域 装修 看图片 点评 菜品 十七个 菜 只说 我爱吃 先赞 农家 扒茄条 口感鲜香 入口微甜 老少咸宜 必点 菜品 鲍汁 一品豆腐 入味 不油腻 美味 金丝虾球 也很 不错 土豆丝 炸的 酥脆 小朋友 肯定 喜欢 我还 喜欢 七彩鱼 皮 越往 吃芥末 越冲 过瘾 米粉肉 甜而不腻 美味 田七 爽口凉菜 酸汤肥牛 … 量少 … 吃了 忘记 味道 … 说 的是 音乐餐厅 八点 点歌 爱唱歌 的你 千万不要错过 媲美 专业音响 … ', ' 第一次 抽中 同城 试吃活动 很开心 ✌ ️ 听说 小伙伴 都是 老会员 参加 活动 好有爱 ❤ ️ 这家店 位置 不是很好 找 到了 饭店 没什么 顾客 地理位置 劣势 原因 服务员态度 搞错 家的 菜名 尴尬 招牌菜 金椒脆鱼 锅 每个人 习惯 鱼 口感 可以接受 还有一个 猪脚 虾 很好吃 猪脚 炖 入味 凉拌 蕨根粉 入味 酸辣粉 味道 店里 凉拌菜 觉 居多 凉拌菜 各类 肉品 锅 火锅 会吃 过多 拒绝 店家 提议 火锅 榴莲飞饼 味道 很好 榴莲肉 多点 更好 说 点的 口水鸡 味道 也很 鸡肉 上夹 肥 油太多 腻味 桂花糖莲藕 江浙菜馆 做的好 凉拌菜 川菜馆 特别 感受 果汁 兑水 兑多 没什么味道 店家 可以增加 炒菜 顾客 多点 选择 ^ _ ^ ', ' 同事 阅兵 小长假 美好 广场 吃的 说是 好吃 下班 直奔 旗舰店 想着 肯定 好吃 点菜 妹纸 讲 不出 道道 问 好吃 说 没吃过 上个 点评 评价点 上菜 搞不清 桌球 错了 想着 价位 比某 底捞 便宜 算了 人也 不容易 锅底 开吃 刚涮 根干 吧吧 鸭肠 妹纸 加汤 可乐 来瓶 茶杯 喝了 上了 三 玻璃杯 华丽丽 的囧 饮料杯 干嘛 早上 服务 无力吐槽 菜品 鸭肠 挂 架子 上的 看着 甩 洋气 表面 干 一夹 长条 缠 架子 自己动手 放 盘子 里 夹 琥珀 肉 口感 还能 吃到 筋 原谅我 牙口 不好 鲜毛肚 也是醉了 黑黑白白 都有 安慰 极品毛肚 求 新鲜 就好 天下 宽粉 味道还行 QQ 弹弹 极品 五花肉 真的很 极品 白白的 肥肉 无从 下口 首相 丸子 姜 喜欢 吃姜 这是 不愉快 拔

In [5]:
# 读取 Y
# 标签
train_y = train.label.values+2
valid_y = valid.label.values+2

In [6]:
n_features = 1000

tfidf_vectorizer = TfidfVectorizer(
                                        max_df=0.95, 
                                        min_df=2,
                                        max_features=n_features,
                                   )
corpus = train_x + valid_x
print(len(corpus))

t0 = time()
tfidf = tfidf_vectorizer.fit(corpus)

120000


In [7]:
print("done in %0.3fs." % (time() - t0))
train_x = tfidf.transform(train_x)
valid_x = tfidf.transform(valid_x)

done in 8.390s.


In [8]:
print(train_x.shape)
print(valid_x.shape)

(105000, 1000)
(15000, 1000)


In [9]:
# 标准化
scaler = StandardScaler(with_mean=False)
train_x = scaler.fit_transform(train_x)
valid_x = scaler.fit_transform(valid_x)

In [None]:
# The fit time scales at least quadratically with the number of samples and may be impractical beyond tens of thousands of samples.
# 拟合时间至少与样本数量成二次关系，如果样本数量超过数万个，拟合时间可能不现实
# Parameters of the RBF Kernel:
#     C: A low C makes the decision surface smooth, while a high C aims at classifying all training examples correctly.
#     gamma: gamma defines how much influence a single training example has. 
#            The larger gamma is, the closer other examples must be to be affected.
#            gamma定义了单个训练示例的影响程度。gamma越大，其他例子受到的影响就越小。
# One is advised to use sklearn.model_selection.GridSearchCV with C and gamma spaced exponentially指数地 far apart to choose good values.
# RBF SVM parameters example：https://scikit-learn.org/stable/auto_examples/svm/plot_rbf_parameters.html#sphx-glr-auto-examples-svm-plot-rbf-parameters-py

'''
clf_svm = SVC(
                C=1.0, # Regularization parameter.
                       # The strength of the regularization is inversely proportional to C. 
                       # Must be strictly positive. The penalty is a squared l2 penalty.
                    
                kernel='rbf', 
                degree=3, 
                gamma='scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                coef0=0.0, 
                shrinking=True, 
                probability=False, 
                tol=0.001, 
                cache_size=200, 
                class_weight=None, 
                verbose=False, 
                max_iter=-1, 
                decision_function_shape='ovr', 
                break_ties=False, 
                random_state=None
            )

'''

In [10]:
C_range = [0.01, 0.1, 1.0, 10.0] # C越小，正则化程度越大，拟合程度越小

# 默认值 1.0

In [None]:
for C in C_range:
    print('C = %f' % C)
    clf = SVC(
                C = C,
                kernel= 'rbf',
                gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                probability= True,
                class_weight= 'balanced',
                verbose= True,
                decision_function_shape= 'ovr',
                random_state = 2020
             )
    # 训练过程
    t0 = time()
    clf.fit(train_x, train_y)
    print("done in %0.3fs." % (time() - t0))
    predict_y = clf.predict(valid_x)
    # predict_prob_y = clf.predict_proba(valid_x)
    # 评价指标值
    accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
    print('accuracy：%0.5f' % accuracy)
    print('precision_avg: %0.5f' % precision_avg)
    print('recall_avg: %0.5f' % recall_avg)
    print('f1_score_avg: %0.5f' % f1_score_avg)
    print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            

C = 0.010000
[LibSVM]done in 68998.195s.
accuracy：0.60273
precision_avg: 0.71359
recall_avg: 0.60273
f1_score_avg: 0.50388
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.100000
[LibSVM]done in 60378.139s.
accuracy：0.70640
precision_avg: 0.79372
recall_avg: 0.70640
f1_score_avg: 0.73060
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 1.000000
[LibSVM]

In [10]:
clf = SVC(
            C = 1.0,
            kernel= 'rbf',
            gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
            probability= True,
            class_weight= 'balanced',
            verbose= True,
            decision_function_shape= 'ovr',
            random_state = 2020
         )
# 训练过程
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


[LibSVM]done in 60629.044s.
accuracy：0.62547
precision_avg: 0.63636
recall_avg: 0.62547
f1_score_avg: 0.61923
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [18]:
clf = SVC(
            C = 1.0,
            kernel= 'rbf',
            gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
            probability= True,
            class_weight= 'balanced',
            verbose= True,
            decision_function_shape= 'ovr',
            random_state = 2020
         )
# 训练过程
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


[LibSVM]done in 64033.915s.
accuracy：0.59480
precision_avg: 0.60604
recall_avg: 0.59480
f1_score_avg: 0.58688
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [26]:
clf = SVC(
            C = 1.0,
            kernel= 'rbf',
            gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
            probability= True,
            class_weight= 'balanced',
            verbose= True,
            decision_function_shape= 'ovr',
            random_state = 2020
         )
# 训练过程
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


[LibSVM]done in 53816.905s.
accuracy：0.66827
precision_avg: 0.67005
recall_avg: 0.66827
f1_score_avg: 0.66068
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [12]:
clf = SVC(
            C = 1.0,
            kernel= 'rbf',
            gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
            probability= True,
            class_weight= 'balanced',
            verbose= True,
            decision_function_shape= 'ovr',
            random_state = 2020
         )
# 训练过程
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


[LibSVM]done in 45912.060s.
accuracy：0.65340
precision_avg: 0.68567
recall_avg: 0.65340
f1_score_avg: 0.66606
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [11]:
for C in C_range:
    print('C = %f' % C)
    clf = SVC(
                C = C,
                kernel= 'rbf',
                gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                probability= True,
                class_weight= 'balanced',
                verbose= True,
                decision_function_shape= 'ovr',
                random_state = 2020
             )
    # 训练过程
    t0 = time()
    clf.fit(train_x, train_y)
    print("done in %0.3fs." % (time() - t0))
    predict_y = clf.predict(valid_x)
    # predict_prob_y = clf.predict_proba(valid_x)
    # 评价指标值
    accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
    print('accuracy：%0.5f' % accuracy)
    print('precision_avg: %0.5f' % precision_avg)
    print('recall_avg: %0.5f' % recall_avg)
    print('f1_score_avg: %0.5f' % f1_score_avg)
    print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            

C = 0.010000
[LibSVM]done in 50869.633s.
accuracy：0.36240
precision_avg: 0.62643
recall_avg: 0.36240
f1_score_avg: 0.25639
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.100000
[LibSVM]done in 46093.488s.
accuracy：0.57033
precision_avg: 0.62008
recall_avg: 0.57033
f1_score_avg: 0.56691
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 1.000000
[LibSVM]done in 56561.114s.
accuracy：0.62547
precision_avg: 0.63636
recall_avg: 0.62547
f1_score_avg: 0.61923
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 10.000000
[LibSVM]done in 107227.098s.
accuracy：0.65173
precision_avg: 0.61704
recall_avg: 0.65173
f1_score_avg: 0.61642
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [11]:
for C in C_range:
    print('C = %f' % C)
    clf = SVC(
                C = C,
                kernel= 'rbf',
                gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                probability= True,
                class_weight= 'balanced',
                verbose= True,
                decision_function_shape= 'ovr',
                random_state = 2020
             )
    # 训练过程
    t0 = time()
    clf.fit(train_x, train_y)
    print("done in %0.3fs." % (time() - t0))
    predict_y = clf.predict(valid_x)
    # predict_prob_y = clf.predict_proba(valid_x)
    # 评价指标值
    accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
    print('accuracy：%0.5f' % accuracy)
    print('precision_avg: %0.5f' % precision_avg)
    print('recall_avg: %0.5f' % recall_avg)
    print('f1_score_avg: %0.5f' % f1_score_avg)
    print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            

C = 0.010000
[LibSVM]done in 46237.830s.
accuracy：0.34527
precision_avg: 0.55059
recall_avg: 0.34527
f1_score_avg: 0.27572
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.100000
[LibSVM]done in 42361.197s.
accuracy：0.53460
precision_avg: 0.58560
recall_avg: 0.53460
f1_score_avg: 0.52511
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 1.000000
[LibSVM]done in 58379.965s.
accuracy：0.59480
precision_avg: 0.60604
recall_avg: 0.59480
f1_score_avg: 0.58688
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 10.000000
[LibSVM]done in 123987.400s.
accuracy：0.59600
precision_avg: 0.57326
recall_avg: 0.59600
f1_score_avg: 0.57177
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


In [None]:
for C in C_range:
    print('C = %f' % C)
    clf = SVC(
                C = C,
                kernel= 'rbf',
                gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                probability= True,
                class_weight= 'balanced',
                verbose= True,
                decision_function_shape= 'ovr',
                random_state = 2020
             )
    # 训练过程
    t0 = time()
    clf.fit(train_x, train_y)
    print("done in %0.3fs." % (time() - t0))
    predict_y = clf.predict(valid_x)
    # predict_prob_y = clf.predict_proba(valid_x)
    # 评价指标值
    accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
    print('accuracy：%0.5f' % accuracy)
    print('precision_avg: %0.5f' % precision_avg)
    print('recall_avg: %0.5f' % recall_avg)
    print('f1_score_avg: %0.5f' % f1_score_avg)
    print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            

C = 0.010000
[LibSVM]done in 50577.191s.
accuracy：0.41400
precision_avg: 0.60619
recall_avg: 0.41400
f1_score_avg: 0.31356
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.100000
[LibSVM]done in 46436.640s.
accuracy：0.58100
precision_avg: 0.65747
recall_avg: 0.58100
f1_score_avg: 0.59587
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 1.000000
[LibSVM]done in 59374.150s.
accuracy：0.66827
precision_avg: 0.67005
recall_avg: 0.66827
f1_score_avg: 0.66068
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 10.000000
[LibSVM]

In [11]:
for C in C_range:
    print('C = %f' % C)
    clf = SVC(
                C = C,
                kernel= 'rbf',
                gamma= 'scale', # Changed in version 0.22: The default value of gamma changed from ‘auto’ to ‘scale’.
                probability= True,
                class_weight= 'balanced',
                verbose= True,
                decision_function_shape= 'ovr',
                random_state = 2020
             )
    # 训练过程
    t0 = time()
    clf.fit(train_x, train_y)
    print("done in %0.3fs." % (time() - t0))
    predict_y = clf.predict(valid_x)
    # predict_prob_y = clf.predict_proba(valid_x)
    # 评价指标值
    accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
    print('accuracy：%0.5f' % accuracy)
    print('precision_avg: %0.5f' % precision_avg)
    print('recall_avg: %0.5f' % recall_avg)
    print('f1_score_avg: %0.5f' % f1_score_avg)
    print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')
            

C = 0.010000
[LibSVM]done in 57460.372s.
accuracy：0.64420
precision_avg: 0.64453
recall_avg: 0.64420
f1_score_avg: 0.62997
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.100000
[LibSVM]done in 48125.855s.
accuracy：0.58147
precision_avg: 0.68456
recall_avg: 0.58147
f1_score_avg: 0.61393
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 1.000000
[LibSVM]done in 49224.899s.
accuracy：0.65340
precision_avg: 0.68567
recall_avg: 0.65340
f1_score_avg: 0.66606
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 10.000000
[LibSVM]done in 109481.897s.
accuracy：0.69447
precision_avg: 0.65396
recall_avg: 0.69447
f1_score_avg: 0.65616
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
