-
Notifications
You must be signed in to change notification settings - Fork 5
/
sklearn_gcv.py
118 lines (94 loc) · 3.26 KB
/
sklearn_gcv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
# @Time : 2018/11/18 18:46
# @Author : Spareribs
# @File : sklearn_gcv.py
# @Software: PyCharm
"""
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn_config import features_path, clfs, status_vali
# 去除 warnings 的警告
import warnings
warnings.filterwarnings('ignore')
"""=====================================================================================================================
1 读取数据
"""
data_fp = open(features_path, 'rb')
x_train, y_train = pickle.load(data_fp)
data_fp.close()
"""=====================================================================================================================
2 逻辑回归 - 使用网络搜索获得最优的参数
"""
clf_name = "lr"
clf = clfs[clf_name]
param_grid = {
'C': [0.05, 0.1, 0.5, 1.5],
'penalty': ['l1', 'l2'],
'tol': [1e-4, 1e-5, 1e-6]
}
grid = GridSearchCV(clf, param_grid, scoring='f1')
grid.fit(x_train, y_train)
print("最优参数:{0}".format(grid.best_params_))
print("最好的分数{0}".format(grid.best_score_))
"""=====================================================================================================================
3 SVM - 使用网络搜索获得最优的参数
"""
clf_name = "svm"
clf = clfs[clf_name]
param_grid = {
'C': [0.05, 0.1, 0.5, 1.5],
'penalty': ['l2'],
'dual': [True]
}
grid = GridSearchCV(clf, param_grid, scoring='f1')
grid.fit(x_train, y_train)
print("最优参数:{0}".format(grid.best_params_))
print("最好的分数{0}".format(grid.best_score_))
"""=====================================================================================================================
4 随机森林 - 使用网络搜索获得最优的参数
"""
clf_name = "rf"
clf = clfs[clf_name]
param_grid = {
'criterion': ['gini'],
'n_estimators': range(10, 71, 10),
'max_depth': range(5, 10),
'class_weight': ['balanced', None]
}
grid = GridSearchCV(clf, param_grid, scoring='f1')
grid.fit(x_train, y_train)
print("最优参数:{0}".format(grid.best_params_))
print("最好的分数{0}".format(grid.best_score_))
"""=====================================================================================================================
5 xgboost - 使用网络搜索获得最优的参数
"""
clf_name = "xgb"
clf = clfs[clf_name]
param_grid = {
'max_depth': [3, 10, 30, 50],
'learning_rate': [0.05, 0.1, 0.16],
'n_estimators': [50, 100, 500],
"learning_rate": [0.05, 0.1, 0.16]
}
grid = GridSearchCV(clf, param_grid, scoring='f1')
grid.fit(x_train, y_train)
print("最优参数:{0}".format(grid.best_params_))
print("最好的分数{0}".format(grid.best_score_))
"""=====================================================================================================================
6 lightgbm - 使用网络搜索获得最优的参数
"""
clf_name = "lgb"
clf = clfs[clf_name]
param_grid = {
'boosting_type': ['gbdt'],
"max_depth": [5, 10, 15],
"learning_rate": [0.01, 0.05, 0.1],
"num_leaves": [30, 90, 120],
'n_estimators': [100, 200, 250]
}
grid = GridSearchCV(clf, param_grid, scoring='f1')
grid.fit(x_train, y_train)
print("最优参数:{0}".format(grid.best_params_))
print("最好的分数{0}".format(grid.best_score_))