In [1]:
# python 3.6
# tensorflow 2.3.1 cpu
# sklearn.__version__ == 0.23.2

import pandas as pd
import numpy as np
import pickle 
from time import time

from sklearn.utils import shuffle # shuffle打乱样本的顺序，它只会打乱样本的顺序，每个样本的数据维持不变。
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV

from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer


def save_obj(obj, file):
    with open(file, 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(file ):
    with open(file, 'rb') as f:
        return pickle.load(f)

In [2]:
# 评价指标函数
def f1_score_get(precision, recall):
    # tf.keras.backend.epsilon() 的值为 1e-07
    # 1.0e-7 == tf.keras.backend.epsilon() 得到 True
    f1 = 2 * precision * recall/( precision + recall + 1.0e-7 )
    return f1

# valid_y:真实标签
# predict_y:预测标签
def eval_p_r_f1(valid_y, predict_y):
    # precision 0 1 2 3
    # setting labels=[pos_label] and average != 'binary' will report scores for that label only.
    accuracy = accuracy_score(
                    y_true = valid_y,
                    y_pred = predict_y
                              )
    
    precision_0 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [0],
                    pos_label = 0,
                    average = 'micro'
                    )

    precision_1 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [1],
                    pos_label = 1,
                    average = 'micro'
                    )

    precision_2 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [2],
                    pos_label = 2,
                    average = 'micro'
                    )

    precision_3 = precision_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [3],
                    pos_label = 3,
                    average = 'micro'
                    )
    # recall 0 1 2 3
    recall_0 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [0],
                    pos_label = 0,
                    average = 'micro'
                    )

    recall_1 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [1],
                    pos_label = 1,
                    average = 'micro'
                    )

    recall_2 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [2],
                    pos_label = 2,
                    average = 'micro'
                    )

    recall_3 = recall_score(
                    y_true = valid_y,
                    y_pred = predict_y,
                    labels = [3],
                    pos_label = 3,
                    average = 'micro'
                    )
    
    # f1_score 0 1 2 3
    f1_score_0 = f1_score_get( precision_0, recall_0 )
    f1_score_1 = f1_score_get( precision_1, recall_1 )
    f1_score_2 = f1_score_get( precision_2, recall_2 )
    f1_score_3 = f1_score_get( precision_3, recall_3 )
    
    #由y_true 计算各标签权重
    num_0 = np.sum( valid_y==0 )
    num_1 = np.sum( valid_y==1 )
    num_2 = np.sum( valid_y==2 )
    num_3 = np.sum( valid_y==3 )
    total = num_0 + num_1 + num_2 + num_3
    print( 'num_0: %d' %num_0)
    print( 'num_1: %d' %num_1)
    print( 'num_2: %d' %num_2)
    print( 'num_3: %d' %num_3)
    print( 'total: %d' %total)
    
    p_0 = num_0/total
    p_1 = num_1/total
    p_2 = num_2/total
    p_3 = num_3/total
    
    precision_avg = p_0 * precision_0 + p_1 * precision_1 + p_2 * precision_2 + p_3 * precision_3
    recall_avg = p_0 * recall_0 + p_1 * recall_1 + p_2 * recall_2 + p_3 * recall_3
    f1_score_avg = p_0 * f1_score_0 + p_1 * f1_score_1 + p_2 * f1_score_2 + p_3 * f1_score_3
    
    return accuracy, precision_avg, recall_avg, f1_score_avg

In [3]:
# train = pd.read_csv('./data/train_location.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_location.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_service.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_service.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_price.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_price.csv', usecols=['content', 'label'])

# train = pd.read_csv('./data/train_environment.csv', usecols=['content', 'label'])
# valid = pd.read_csv('./data/valid_environment.csv', usecols=['content', 'label'])

train = pd.read_csv( './data/train_dish.csv', usecols=['content', 'label'])
valid = pd.read_csv( './data/valid_dish.csv', usecols=['content', 'label'])

train = shuffle( train, random_state = 2020)
valid = shuffle( valid, random_state = 2020)

train = shuffle(train, random_state = 42)
valid = shuffle(valid, random_state = 42)

print(train.shape)
print(valid.shape)

(105000, 2)
(15000, 2)


In [4]:
# 读取 X
# 不同aspect，文本数据一样，不同体现在标签上

train_x = train.content.values.tolist()
valid_x = valid.content.values.tolist()

print( train_x[0:3])

[' 大众点评 十月份 VIP 会员 聚餐 十几个 兴趣相投 吃货们 点评 聚在 周日 晚上 万家 一炉 划分 音乐餐厅 自助餐厅 两个 区域 装修 看图片 点评 菜品 十七个 菜 只说 我爱吃 先赞 农家 扒茄条 口感鲜香 入口微甜 老少咸宜 必点 菜品 鲍汁 一品豆腐 入味 不油腻 美味 金丝虾球 也很 不错 土豆丝 炸的 酥脆 小朋友 肯定 喜欢 我还 喜欢 七彩鱼 皮 越往 吃芥末 越冲 过瘾 米粉肉 甜而不腻 美味 田七 爽口凉菜 酸汤肥牛 … 量少 … 吃了 忘记 味道 … 说 的是 音乐餐厅 八点 点歌 爱唱歌 的你 千万不要错过 媲美 专业音响 … ', ' 第一次 抽中 同城 试吃活动 很开心 ✌ ️ 听说 小伙伴 都是 老会员 参加 活动 好有爱 ❤ ️ 这家店 位置 不是很好 找 到了 饭店 没什么 顾客 地理位置 劣势 原因 服务员态度 搞错 家的 菜名 尴尬 招牌菜 金椒脆鱼 锅 每个人 习惯 鱼 口感 可以接受 还有一个 猪脚 虾 很好吃 猪脚 炖 入味 凉拌 蕨根粉 入味 酸辣粉 味道 店里 凉拌菜 觉 居多 凉拌菜 各类 肉品 锅 火锅 会吃 过多 拒绝 店家 提议 火锅 榴莲飞饼 味道 很好 榴莲肉 多点 更好 说 点的 口水鸡 味道 也很 鸡肉 上夹 肥 油太多 腻味 桂花糖莲藕 江浙菜馆 做的好 凉拌菜 川菜馆 特别 感受 果汁 兑水 兑多 没什么味道 店家 可以增加 炒菜 顾客 多点 选择 ^ _ ^ ', ' 同事 阅兵 小长假 美好 广场 吃的 说是 好吃 下班 直奔 旗舰店 想着 肯定 好吃 点菜 妹纸 讲 不出 道道 问 好吃 说 没吃过 上个 点评 评价点 上菜 搞不清 桌球 错了 想着 价位 比某 底捞 便宜 算了 人也 不容易 锅底 开吃 刚涮 根干 吧吧 鸭肠 妹纸 加汤 可乐 来瓶 茶杯 喝了 上了 三 玻璃杯 华丽丽 的囧 饮料杯 干嘛 早上 服务 无力吐槽 菜品 鸭肠 挂 架子 上的 看着 甩 洋气 表面 干 一夹 长条 缠 架子 自己动手 放 盘子 里 夹 琥珀 肉 口感 还能 吃到 筋 原谅我 牙口 不好 鲜毛肚 也是醉了 黑黑白白 都有 安慰 极品毛肚 求 新鲜 就好 天下 宽粉 味道还行 QQ 弹弹 极品 五花肉 真的很 极品 白白的 肥肉 无从 下口 首相 丸子 姜 喜欢 吃姜 这是 不愉快 拔

In [5]:
# 读取 Y
# 标签
train_y = train.label.values+2
valid_y = valid.label.values+2

In [6]:
n_features = 1000

tfidf_vectorizer = TfidfVectorizer(
                                        max_df=0.95, 
                                        min_df=2,
                                        max_features=n_features,
                                   )
corpus = train_x + valid_x
print(len(corpus))

t0 = time()
tfidf = tfidf_vectorizer.fit(corpus)

120000


In [7]:
print("done in %0.3fs." % (time() - t0))
train_x = tfidf.transform(train_x)
valid_x = tfidf.transform(valid_x)

done in 8.366s.


In [8]:
print(train_x.shape)
print(valid_x.shape)

(105000, 1000)
(15000, 1000)


In [9]:
# 标准化
scaler = StandardScaler(with_mean=False)
train_x = scaler.fit_transform(train_x)
valid_x = scaler.fit_transform(valid_x)

In [12]:
# C =  50.0/105000 约等于 0.0005
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1,
                            C = 50.0/105000,
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            max_iter=100,
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )

t0 = time()
clf.fit(train_x, train_y)

print("done in %0.3fs." % (time() - t0))
#convergence after 6 epochs took 1 seconds

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 6 epochs took 1 seconds
done in 1.128s.


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.1s finished


In [13]:
predict_y = clf.predict(valid_x)
predict_prob_y = clf.predict_proba(valid_x)

# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)

print('accuracy：%0.5f' % accuracy)
print('precision_avg：%0.5f' % precision_avg)
print('recall_avg：%0.5f' % recall_avg)
print('f1_score_avg：%0.5f' % f1_score_avg)

accuracy：0.69727
precision_avg：0.81552
recall_avg：0.69727
f1_score_avg：0.73461




In [18]:
# C =  5.0* 50.0/105000
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1,
                            C = 5.0* 50.0/105000,
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000, 
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )

t0 = time()
clf.fit(train_x, train_y)

print("done in %0.3fs." % (time() - t0))

# convergence after 4960 epochs took 1817 seconds

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 4960 epochs took 1817 seconds
done in 1816.978s.


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 30.3min finished


In [19]:
predict_y = clf.predict(valid_x)
predict_prob_y = clf.predict_proba(valid_x)

# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)

print('accuracy：%0.5f' % accuracy)
print('precision_avg：%0.5f' % precision_avg)
print('recall_avg：%0.5f' % recall_avg)
print('f1_score_avg：%0.5f' % f1_score_avg)

# 结果：过拟合了

accuracy：0.66113
precision_avg：0.81871
recall_avg：0.66113
f1_score_avg：0.71695


In [21]:
# C =  2.0* 50.0/105000

clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1,
                            C = 2.0* 50.0/105000,
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000, 
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )

t0 = time()
clf.fit(train_x, train_y)

print("done in %0.3fs." % (time() - t0))


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 10 epochs took 3 seconds
done in 2.628s.


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.6s finished


In [22]:
predict_y = clf.predict(valid_x)
predict_prob_y = clf.predict_proba(valid_x)

# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)

print('accuracy：%0.5f' % accuracy)
print('precision_avg：%0.5f' % precision_avg)
print('recall_avg：%0.5f' % recall_avg)
print('f1_score_avg：%0.5f' % f1_score_avg)


accuracy：0.69293
precision_avg：0.81835
recall_avg：0.69293
f1_score_avg：0.73554


In [23]:
# C =  0.5* 50.0/105000

clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1,
                            C = 0.5* 50.0/105000,
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000, 
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )

t0 = time()
clf.fit(train_x, train_y)

print("done in %0.3fs." % (time() - t0))


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 6 epochs took 1 seconds
done in 1.018s.


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.0s finished


In [24]:
predict_y = clf.predict(valid_x)
predict_prob_y = clf.predict_proba(valid_x)

# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)

print('accuracy：%0.5f' % accuracy)
print('precision_avg：%0.5f' % precision_avg)
print('recall_avg：%0.5f' % recall_avg)
print('f1_score_avg：%0.5f' % f1_score_avg)


accuracy：0.69367
precision_avg：0.81274
recall_avg：0.69367
f1_score_avg：0.72626


In [25]:
# C =  10* 50.0/105000

clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1,
                            C = 10 * 50.0/105000,
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000, # max_iter reached after 4056 seconds
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )

t0 = time()
clf.fit(train_x, train_y)

print("done in %0.3fs." % (time() - t0))


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


max_iter reached after 4056 seconds
done in 4055.830s.


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 67.6min finished


In [26]:
predict_y = clf.predict(valid_x)
predict_prob_y = clf.predict_proba(valid_x)

# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)

print('accuracy：%0.5f' % accuracy)
print('precision_avg：%0.5f' % precision_avg)
print('recall_avg：%0.5f' % recall_avg)
print('f1_score_avg：%0.5f' % f1_score_avg)

#过拟合

accuracy：0.60940
precision_avg：0.77741
recall_avg：0.60940
f1_score_avg：0.67423


In [27]:
C_range = [0.1*50.0/105000, 50.0/105000, 10*50.0/105000] 

tol_range = [0.1, 0.01, 0.001]

In [28]:
for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.000048
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 8 epochs took 2 seconds
done in 1.220s.
accuracy：0.61440
precision_avg: 0.73977
recall_avg: 0.61440
f1_score_avg: 0.58454
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 10 epochs took 1 seconds
done in 1.521s.
accuracy：0.61447
precision_avg: 0.73985
recall_avg: 0.61447
f1_score_avg: 0.58461
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 13 epochs took 2 seconds
done in 1.822s.
accuracy：0.61447
precision_avg: 0.73985
recall_avg: 0.61447
f1_score_avg: 0.58461
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 6 epochs took 1 seconds
done in 1.119s.
accuracy：0.69727
precision_avg: 0.81552
recall_avg: 0.69727
f1_score_avg: 0.73461
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 13 epochs took 3 seconds
done in 2.835s.
accuracy：0.69473
precision_avg: 0.81923
recall_avg: 0.69473
f1_score_avg: 0.73298
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


convergence after 18 epochs took 4 seconds
done in 3.934s.
accuracy：0.69487
precision_avg: 0.81936
recall_avg: 0.69487
f1_score_avg: 0.73302
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    3.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


max_iter reached after 4076 seconds
done in 4076.234s.
accuracy：0.60940
precision_avg: 0.77741
recall_avg: 0.60940
f1_score_avg: 0.67423
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 67.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


max_iter reached after 4096 seconds
done in 4095.264s.
accuracy：0.60940
precision_avg: 0.77741
recall_avg: 0.60940
f1_score_avg: 0.67423
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 68.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 30 concurrent workers.


max_iter reached after 4052 seconds
done in 4052.821s.
accuracy：0.60940
precision_avg: 0.77741
recall_avg: 0.60940
f1_score_avg: 0.67423
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 67.5min finished


In [10]:
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1, #location上确定的最优参数
                            C = 50.0/105000, #location上确定的最优参数
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000,
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


convergence after 2 epochs took 1 seconds
done in 0.419s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.59033
precision_avg: 0.61867
recall_avg: 0.59033
f1_score_avg: 0.58655
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.4s finished


In [18]:
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1, #location上确定的最优参数
                            C = 50.0/105000, #location上确定的最优参数
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000,
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


convergence after 3 epochs took 0 seconds
done in 0.615s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.57213
precision_avg: 0.59960
recall_avg: 0.57213
f1_score_avg: 0.56123
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.5s finished


In [26]:
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1, #location上确定的最优参数
                            C = 50.0/105000, #location上确定的最优参数
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000,
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


convergence after 2 epochs took 1 seconds
done in 0.414s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.61880
precision_avg: 0.66712
recall_avg: 0.61880
f1_score_avg: 0.62599
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.4s finished


In [34]:
clf = LogisticRegression(
                            penalty = 'elasticnet',
                            tol = 0.1, #location上确定的最优参数
                            C = 50.0/105000, #location上确定的最优参数
                            class_weight = 'balanced',
                            random_state = 2020,
                            solver = 'saga',
                            # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                            # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                            max_iter= 10000,
                            multi_class = 'multinomial',
                            verbose = 1,
                            n_jobs = -1,
                            l1_ratio = 0.5  
                        )
t0 = time()
clf.fit(train_x, train_y)
print("done in %0.3fs." % (time() - t0))
predict_y = clf.predict(valid_x)
# predict_prob_y = clf.predict_proba(valid_x)
# 评价指标值
accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
print('accuracy：%0.5f' % accuracy)
print('precision_avg: %0.5f' % precision_avg)
print('recall_avg: %0.5f' % recall_avg)
print('f1_score_avg: %0.5f' % f1_score_avg)
print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


convergence after 4 epochs took 1 seconds
done in 0.816s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54213
precision_avg: 0.66600
recall_avg: 0.54213
f1_score_avg: 0.58105
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.8s finished


In [10]:
C_range = [0.1*50.0/105000, 50.0/105000, 10*50.0/105000] 
tol_range = [0.1, 0.01, 0.001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.000048
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 5 epochs took 1 seconds
done in 0.823s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.42520
precision_avg: 0.45900
recall_avg: 0.42520
f1_score_avg: 0.36104
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.220s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.35740
precision_avg: 0.19123
recall_avg: 0.35740
f1_score_avg: 0.24418
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 13 epochs took 2 seconds
done in 1.922s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.41387
precision_avg: 0.46189
recall_avg: 0.41387
f1_score_avg: 0.34336
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 2 epochs took 1 seconds
done in 0.418s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.59033
precision_avg: 0.61867
recall_avg: 0.59033
f1_score_avg: 0.58655
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 7 epochs took 1 seconds
done in 1.421s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.57027
precision_avg: 0.62168
recall_avg: 0.57027
f1_score_avg: 0.56943
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 2 seconds
done in 2.126s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.57000
precision_avg: 0.62177
recall_avg: 0.57000
f1_score_avg: 0.56923
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 2 epochs took 1 seconds
done in 1.121s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.59953
precision_avg: 0.62679
recall_avg: 0.59953
f1_score_avg: 0.60276
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 13 epochs took 11 seconds
done in 10.158s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.61033
precision_avg: 0.64328
recall_avg: 0.61033
f1_score_avg: 0.61217
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   10.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 17 epochs took 16 seconds
done in 16.391s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.61013
precision_avg: 0.64313
recall_avg: 0.61013
f1_score_avg: 0.61198
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   16.4s finished


In [11]:
# 上面的最优参数取在边界，所以需要进一步确定
C_range = [100*50.0/105000] 
tol_range = [0.1, 0.01, 0.001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.047619
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 5 seconds
done in 5.341s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.58907
precision_avg: 0.63539
recall_avg: 0.58907
f1_score_avg: 0.59345
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    5.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 12 seconds
done in 11.866s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.61000
precision_avg: 0.64311
recall_avg: 0.61000
f1_score_avg: 0.61302
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   11.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 110 epochs took 234 seconds
done in 234.410s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.61007
precision_avg: 0.64294
recall_avg: 0.61007
f1_score_avg: 0.61298
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  3.9min finished


In [12]:
# 上面的最优参数取在边界，所以需要进一步确定
C_range = [1000*50.0/105000] 
tol_range = [0.1, 0.01, 0.001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.476190
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 7.761s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.58673
precision_avg: 0.63434
recall_avg: 0.58673
f1_score_avg: 0.59162
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    7.7s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 21 seconds
done in 21.007s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60953
precision_avg: 0.64224
recall_avg: 0.60953
f1_score_avg: 0.61255
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   21.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 17 epochs took 30 seconds
done in 30.740s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.61000
precision_avg: 0.64276
recall_avg: 0.61000
f1_score_avg: 0.61307
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   30.7s finished


In [13]:
# 上面的最优参数取在边界，所以需要进一步确定

C_range = [1000*50.0/105000] 

tol_range = [0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.476190
tol = 0.000100


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 1069 epochs took 2432 seconds
done in 2432.358s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60980
precision_avg: 0.64332
recall_avg: 0.60980
f1_score_avg: 0.61283
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 40.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 1628 epochs took 3784 seconds
done in 3783.575s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60993
precision_avg: 0.64339
recall_avg: 0.60993
f1_score_avg: 0.61293
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 63.1min finished


In [14]:
# 上面的最优参数取在边界，所以需要进一步确定
C_range = [10000*50.0/105000] 
tol_range = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 4.761905
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 7.954s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.58667
precision_avg: 0.63444
recall_avg: 0.58667
f1_score_avg: 0.59159
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    7.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 26 seconds
done in 25.935s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60987
precision_avg: 0.64246
recall_avg: 0.60987
f1_score_avg: 0.61291
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   25.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 17 epochs took 39 seconds
done in 39.567s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60980
precision_avg: 0.64253
recall_avg: 0.60980
f1_score_avg: 0.61290
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   39.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 39 epochs took 68 seconds
done in 68.783s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60980
precision_avg: 0.64254
recall_avg: 0.60980
f1_score_avg: 0.61288
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 23810 seconds
done in 23809.849s.
num_0: 4482
num_1: 1790
num_2: 2178
num_3: 6550
total: 15000
accuracy：0.60987
precision_avg: 0.64340
recall_avg: 0.60987
f1_score_avg: 0.61291
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 396.8min finished


In [10]:
C_range = [0.1*50.0/105000, 50.0/105000, 10*50.0/105000, 100*50.0/105000, 1000*50.0/105000, 10000*50.0/105000] 

tol_range = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.000048
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.324s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.37840
precision_avg: 0.46562
recall_avg: 0.37840
f1_score_avg: 0.27778
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 9 epochs took 1 seconds
done in 1.321s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.37847
precision_avg: 0.46606
recall_avg: 0.37847
f1_score_avg: 0.27784
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 14 epochs took 2 seconds
done in 1.924s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.37880
precision_avg: 0.46556
recall_avg: 0.37880
f1_score_avg: 0.27843
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 16 epochs took 2 seconds
done in 2.225s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.37880
precision_avg: 0.46556
recall_avg: 0.37880
f1_score_avg: 0.27843
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 105 epochs took 14 seconds
done in 14.081s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.37880
precision_avg: 0.46518
recall_avg: 0.37880
f1_score_avg: 0.27843
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   14.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 0 seconds
done in 0.518s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.57213
precision_avg: 0.59960
recall_avg: 0.57213
f1_score_avg: 0.56123
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.421s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.57147
precision_avg: 0.60305
recall_avg: 0.57147
f1_score_avg: 0.56159
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 12 epochs took 2 seconds
done in 2.425s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.57093
precision_avg: 0.60261
recall_avg: 0.57093
f1_score_avg: 0.56103
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 141 epochs took 32 seconds
done in 32.160s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.56867
precision_avg: 0.60073
recall_avg: 0.56867
f1_score_avg: 0.55839
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   32.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 419 epochs took 98 seconds
done in 97.922s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.56820
precision_avg: 0.60059
recall_avg: 0.56820
f1_score_avg: 0.55789
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 1 seconds
done in 1.120s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.57707
precision_avg: 0.60135
recall_avg: 0.57707
f1_score_avg: 0.57303
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 4 seconds
done in 3.629s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58667
precision_avg: 0.60875
recall_avg: 0.58667
f1_score_avg: 0.58266
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    3.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 15 epochs took 13 seconds
done in 12.568s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58807
precision_avg: 0.60957
recall_avg: 0.58807
f1_score_avg: 0.58396
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   12.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 66 epochs took 90 seconds
done in 90.577s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58793
precision_avg: 0.60947
recall_avg: 0.58793
f1_score_avg: 0.58377
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 613 epochs took 922 seconds
done in 922.088s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58813
precision_avg: 0.60966
recall_avg: 0.58813
f1_score_avg: 0.58400
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 15.4min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 5 seconds
done in 5.036s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.56820
precision_avg: 0.59316
recall_avg: 0.56820
f1_score_avg: 0.56649
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    5.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 12 seconds
done in 11.360s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58420
precision_avg: 0.60419
recall_avg: 0.58420
f1_score_avg: 0.58145
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   11.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 35 epochs took 63 seconds
done in 63.646s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58480
precision_avg: 0.60520
recall_avg: 0.58480
f1_score_avg: 0.58223
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 62 epochs took 124 seconds
done in 124.642s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58493
precision_avg: 0.60538
recall_avg: 0.58493
f1_score_avg: 0.58233
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  2.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 757 epochs took 1728 seconds
done in 1728.425s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58587
precision_avg: 0.60555
recall_avg: 0.58587
f1_score_avg: 0.58339
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 28.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 7.950s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.56660
precision_avg: 0.59159
recall_avg: 0.56660
f1_score_avg: 0.56507
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    7.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 20 seconds
done in 20.607s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58347
precision_avg: 0.60335
recall_avg: 0.58347
f1_score_avg: 0.58088
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   20.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 17 epochs took 32 seconds
done in 31.340s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58380
precision_avg: 0.60426
recall_avg: 0.58380
f1_score_avg: 0.58151
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   31.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 339 epochs took 705 seconds
done in 705.285s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58553
precision_avg: 0.60530
recall_avg: 0.58553
f1_score_avg: 0.58303
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 11.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 712 epochs took 1566 seconds
done in 1565.960s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58580
precision_avg: 0.60527
recall_avg: 0.58580
f1_score_avg: 0.58339
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 26.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 8.057s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.56660
precision_avg: 0.59144
recall_avg: 0.56660
f1_score_avg: 0.56503
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    8.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 26 seconds
done in 25.931s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58333
precision_avg: 0.60317
recall_avg: 0.58333
f1_score_avg: 0.58079
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   25.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 17 epochs took 37 seconds
done in 37.359s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58347
precision_avg: 0.60415
recall_avg: 0.58347
f1_score_avg: 0.58129
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   37.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 126 epochs took 198 seconds
done in 197.549s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58433
precision_avg: 0.60421
recall_avg: 0.58433
f1_score_avg: 0.58183
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  3.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3439 epochs took 7789 seconds
done in 7789.094s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58553
precision_avg: 0.60537
recall_avg: 0.58553
f1_score_avg: 0.58336
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 129.8min finished


In [11]:
# 最优值取在边界，需进一步确定
C_range = [10*50.0/105000] 
tol_range = [0.000001, 0.0000001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time( )
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict( valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1( valid_y, predict_y )
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.004762
tol = 0.000001


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 1151 epochs took 1739 seconds
done in 1738.441s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58787
precision_avg: 0.60932
recall_avg: 0.58787
f1_score_avg: 0.58376
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 29.0min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 1698 epochs took 2577 seconds
done in 2577.215s.
num_0: 4431
num_1: 1739
num_2: 3417
num_3: 5413
total: 15000
accuracy：0.58800
precision_avg: 0.60941
recall_avg: 0.58800
f1_score_avg: 0.58387
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 43.0min finished


In [10]:
C_range = [0.1*50.0/105000, 50.0/105000, 10*50.0/105000, 100*50.0/105000, 1000*50.0/105000, 10000*50.0/105000] 

tol_range = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.000048
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 5 epochs took 1 seconds
done in 0.825s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.52853
precision_avg: 0.51055
recall_avg: 0.52853
f1_score_avg: 0.46027
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.8s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.220s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.53240
precision_avg: 0.50349
recall_avg: 0.53240
f1_score_avg: 0.46330
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 9 epochs took 1 seconds
done in 1.320s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.53240
precision_avg: 0.50349
recall_avg: 0.53240
f1_score_avg: 0.46330
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 9 epochs took 1 seconds
done in 1.319s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.53240
precision_avg: 0.50349
recall_avg: 0.53240
f1_score_avg: 0.46330
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 9 epochs took 1 seconds
done in 1.219s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.53240
precision_avg: 0.50349
recall_avg: 0.53240
f1_score_avg: 0.46330
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 2 epochs took 0 seconds
done in 0.414s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.61880
precision_avg: 0.66712
recall_avg: 0.61880
f1_score_avg: 0.62599
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.619s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60007
precision_avg: 0.67287
recall_avg: 0.60007
f1_score_avg: 0.60869
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 12 epochs took 3 seconds
done in 2.823s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60020
precision_avg: 0.67301
recall_avg: 0.60020
f1_score_avg: 0.60880
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.7s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 111 epochs took 32 seconds
done in 32.242s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.59953
precision_avg: 0.67247
recall_avg: 0.59953
f1_score_avg: 0.60721
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   32.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 388 epochs took 115 seconds
done in 114.694s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.59787
precision_avg: 0.67270
recall_avg: 0.59787
f1_score_avg: 0.60545
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 1 seconds
done in 1.422s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.59367
precision_avg: 0.68083
recall_avg: 0.59367
f1_score_avg: 0.62069
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 12 epochs took 8 seconds
done in 8.039s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60840
precision_avg: 0.68430
recall_avg: 0.60840
f1_score_avg: 0.62863
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    8.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 15 epochs took 12 seconds
done in 12.567s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60767
precision_avg: 0.68381
recall_avg: 0.60767
f1_score_avg: 0.62797
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   12.5s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 38 epochs took 50 seconds
done in 49.983s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60793
precision_avg: 0.68404
recall_avg: 0.60793
f1_score_avg: 0.62805
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   49.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 576 epochs took 908 seconds
done in 907.693s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60767
precision_avg: 0.68469
recall_avg: 0.60767
f1_score_avg: 0.62757
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 15.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 6 seconds
done in 6.040s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.56807
precision_avg: 0.66871
recall_avg: 0.56807
f1_score_avg: 0.60115
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    5.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 12 epochs took 15 seconds
done in 14.683s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60507
precision_avg: 0.68322
recall_avg: 0.60507
f1_score_avg: 0.62768
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   14.7s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 102 epochs took 212 seconds
done in 212.561s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60553
precision_avg: 0.68424
recall_avg: 0.60553
f1_score_avg: 0.62794
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  3.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 104 epochs took 218 seconds
done in 217.780s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60553
precision_avg: 0.68423
recall_avg: 0.60553
f1_score_avg: 0.62794
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  3.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 620 epochs took 1412 seconds
done in 1411.707s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60420
precision_avg: 0.68336
recall_avg: 0.60420
f1_score_avg: 0.62682
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 23.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 8.044s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.56467
precision_avg: 0.66808
recall_avg: 0.56467
f1_score_avg: 0.59889
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    8.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 13 epochs took 28 seconds
done in 28.420s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60327
precision_avg: 0.68458
recall_avg: 0.60327
f1_score_avg: 0.62668
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   28.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 25 epochs took 45 seconds
done in 45.689s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60440
precision_avg: 0.68397
recall_avg: 0.60440
f1_score_avg: 0.62738
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   45.6s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 958 epochs took 2170 seconds
done in 2169.847s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60260
precision_avg: 0.68230
recall_avg: 0.60260
f1_score_avg: 0.62556
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 36.2min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 959 epochs took 2177 seconds
done in 2177.085s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60260
precision_avg: 0.68230
recall_avg: 0.60260
f1_score_avg: 0.62556
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 36.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 3 epochs took 8 seconds
done in 8.251s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.56440
precision_avg: 0.66778
recall_avg: 0.56440
f1_score_avg: 0.59867
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    8.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 13 epochs took 34 seconds
done in 34.457s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60320
precision_avg: 0.68459
recall_avg: 0.60320
f1_score_avg: 0.62666
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   34.4s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 24 epochs took 57 seconds
done in 56.959s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60453
precision_avg: 0.68419
recall_avg: 0.60453
f1_score_avg: 0.62754
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   56.9s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 76 epochs took 131 seconds
done in 130.969s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60393
precision_avg: 0.68337
recall_avg: 0.60393
f1_score_avg: 0.62685
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  2.2min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 9530 epochs took 22740 seconds
done in 22740.453s.
num_0: 5400
num_1: 1214
num_2: 1419
num_3: 6967
total: 15000
accuracy：0.60213
precision_avg: 0.68186
recall_avg: 0.60213
f1_score_avg: 0.62515
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 379.0min finished


In [None]:
C_range = [0.1*50.0/105000, 50.0/105000, 10*50.0/105000, 100*50.0/105000, 1000*50.0/105000, 10000*50.0/105000] 

tol_range = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.000048
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 4 epochs took 1 seconds
done in 0.725s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.22573
precision_avg: 0.51817
recall_avg: 0.22573
f1_score_avg: 0.26535
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.6s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 8 epochs took 1 seconds
done in 1.220s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.34533
precision_avg: 0.47903
recall_avg: 0.34533
f1_score_avg: 0.37995
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 12 epochs took 2 seconds
done in 1.821s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.34513
precision_avg: 0.47900
recall_avg: 0.34513
f1_score_avg: 0.37980
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    1.7s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 15 epochs took 2 seconds
done in 2.124s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.34513
precision_avg: 0.47916
recall_avg: 0.34513
f1_score_avg: 0.37982
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000048
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 283 epochs took 38 seconds
done in 38.063s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.34353
precision_avg: 0.47925
recall_avg: 0.34353
f1_score_avg: 0.37870
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   38.0s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 4 epochs took 0 seconds
done in 0.716s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54213
precision_avg: 0.66600
recall_avg: 0.54213
f1_score_avg: 0.58105
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    0.7s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 10 epochs took 3 seconds
done in 2.725s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.53720
precision_avg: 0.66685
recall_avg: 0.53720
f1_score_avg: 0.57783
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    2.6s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 14 epochs took 4 seconds
done in 4.334s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.53667
precision_avg: 0.66718
recall_avg: 0.53667
f1_score_avg: 0.57749
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:    4.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 159 epochs took 64 seconds
done in 63.847s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.53200
precision_avg: 0.66705
recall_avg: 0.53200
f1_score_avg: 0.57422
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.000476
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  1.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 515 epochs took 210 seconds
done in 209.378s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.53013
precision_avg: 0.66810
recall_avg: 0.53013
f1_score_avg: 0.57318
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:  3.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


convergence after 2554 epochs took 1448 seconds
done in 1448.637s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.56560
precision_avg: 0.68516
recall_avg: 0.56560
f1_score_avg: 0.60143
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 24.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 5701 seconds
done in 5700.652s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.58533
precision_avg: 0.68151
recall_avg: 0.58533
f1_score_avg: 0.61591
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 95.0min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 5729 seconds
done in 5729.456s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.58533
precision_avg: 0.68151
recall_avg: 0.58533
f1_score_avg: 0.61591
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 95.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 5720 seconds
done in 5719.995s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.58533
precision_avg: 0.68151
recall_avg: 0.58533
f1_score_avg: 0.61591
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.004762
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 95.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 5620 seconds
done in 5620.695s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.58533
precision_avg: 0.68151
recall_avg: 0.58533
f1_score_avg: 0.61591
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 93.7min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 19137 seconds
done in 19136.864s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 318.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 19136 seconds
done in 19136.253s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 318.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 19152 seconds
done in 19152.993s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 319.2min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


max_iter reached after 19217 seconds
done in 19217.210s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 320.3min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 20 concurrent workers.


In [10]:
C_range = [100*50.0/105000, 1000*50.0/105000, 10000*50.0/105000] 

tol_range = [0.1, 0.01, 0.001, 0.0001, 0.00001]

for C in C_range:
    for tol in tol_range:
        print('C = %f' % C)
        print('tol = %f' % tol)
        clf = LogisticRegression(
                                    penalty = 'elasticnet',
                                    tol = tol,
                                    C = C,
                                    class_weight = 'balanced',
                                    random_state = 2020,
                                    solver = 'saga',
                                    # max_iter=100, # C =  10.0* 50.0/105000, max_iter reached after 41 seconds
                                    # max_iter= 1000, # C =  10.0* 50.0/105000, max_iter reached after 404 seconds
                                    max_iter= 10000,
                                    multi_class = 'multinomial',
                                    verbose = 1,
                                    n_jobs = -1,
                                    l1_ratio = 0.5  
                                )
        t0 = time()
        clf.fit(train_x, train_y)
        print("done in %0.3fs." % (time() - t0))
        predict_y = clf.predict(valid_x)
        # predict_prob_y = clf.predict_proba(valid_x)
        # 评价指标值
        accuracy, precision_avg, recall_avg, f1_score_avg = eval_p_r_f1(valid_y, predict_y)
        print('accuracy：%0.5f' % accuracy)
        print('precision_avg: %0.5f' % precision_avg)
        print('recall_avg: %0.5f' % recall_avg)
        print('f1_score_avg: %0.5f' % f1_score_avg)
        print('>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')

C = 0.047619
tol = 0.100000


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 21861 seconds
done in 21861.367s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 364.4min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 21835 seconds
done in 21835.335s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 363.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 21830 seconds
done in 21830.636s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 363.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 21821 seconds
done in 21820.354s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.047619
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 363.7min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 21819 seconds
done in 21819.508s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.54240
precision_avg: 0.67081
recall_avg: 0.54240
f1_score_avg: 0.57871
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 363.7min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 29574 seconds
done in 29574.228s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.51813
precision_avg: 0.65924
recall_avg: 0.51813
f1_score_avg: 0.55698
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 492.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 29590 seconds
done in 29589.787s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.51813
precision_avg: 0.65924
recall_avg: 0.51813
f1_score_avg: 0.55698
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 493.2min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 29613 seconds
done in 29612.414s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.51813
precision_avg: 0.65924
recall_avg: 0.51813
f1_score_avg: 0.55698
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 493.5min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 29616 seconds
done in 29615.912s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.51813
precision_avg: 0.65924
recall_avg: 0.51813
f1_score_avg: 0.55698
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 0.476190
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 493.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 29569 seconds
done in 29569.819s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.51813
precision_avg: 0.65924
recall_avg: 0.51813
f1_score_avg: 0.55698
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.100000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 492.8min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 30367 seconds
done in 30367.371s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.55113
precision_avg: 0.66010
recall_avg: 0.55113
f1_score_avg: 0.58416
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.010000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 506.1min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 30415 seconds
done in 30415.569s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.55113
precision_avg: 0.66010
recall_avg: 0.55113
f1_score_avg: 0.58416
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.001000


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 506.9min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 30394 seconds
done in 30394.194s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.55113
precision_avg: 0.66010
recall_avg: 0.55113
f1_score_avg: 0.58416
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000100


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 506.6min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 30383 seconds
done in 30383.028s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.55113
precision_avg: 0.66010
recall_avg: 0.55113
f1_score_avg: 0.58416
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
C = 4.761905
tol = 0.000010


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 506.4min finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 10 concurrent workers.


max_iter reached after 30443 seconds
done in 30443.070s.
num_0: 475
num_1: 1516
num_2: 3251
num_3: 9758
total: 15000
accuracy：0.55113
precision_avg: 0.66010
recall_avg: 0.55113
f1_score_avg: 0.58416
>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed: 507.4min finished
