## 导入包

In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from my_tools import *

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
jibing_res = pd.read_excel("./jibing_feature_res_final.xlsx")
jibing = pd.read_excel("./jibing_feature_final.xlsx")

In [4]:
jibing.head()

Unnamed: 0,左右,是否外伤,症状持续时间,明显夜间痛,年龄,高血压,高血脂,2型糖尿病,吸烟与否,饮酒与否,...,腺苷脱氨酶ADA,果糖胺,肌酸激酶,α-L-盐藻糖苷酶,乳酸,淀粉酶,同型半胱氨酸,铁,总铁结合力,血型
0,1,0,3,1,62,1,0,0,0,0,...,9.0,2.12,135.0,24.0,1.5,55.0,10.8,35.5,49.7,3
1,0,0,3,0,54,0,0,0,1,0,...,8.0,1.96,60.0,32.0,1.8,71.0,13.5,15.7,58.5,3
2,1,1,3,0,70,1,0,0,1,1,...,12.0,1.82,123.0,26.0,3.1,60.0,13.3,9.1,52.8,0
3,1,0,4,1,60,0,0,0,1,1,...,10.0,2.1,93.0,30.0,1.6,46.0,18.6,23.1,56.5,0
4,1,1,3,0,76,1,0,0,0,0,...,13.0,2.44,82.0,19.0,1.8,95.0,16.5,19.0,50.8,3


In [5]:
jibing_res.head()

Unnamed: 0,结果
0,1
1,0
2,1
3,0
4,0


### 归一化

In [6]:
jibing = guiyihua(jibing)

### 标准化

In [7]:
jibing = biaozhunhua(jibing)

In [8]:
jibing.iloc[0]

左右                1.000000
是否外伤              0.000000
症状持续时间            3.000000
明显夜间痛             1.000000
年龄                0.180789
高血压               1.000000
高血脂               0.000000
2型糖尿病             0.000000
吸烟与否              0.000000
饮酒与否             -0.450418
红细胞计数*10^12/L     0.050503
血红蛋白              1.398995
红细胞压积             0.597193
血小板计数            -1.466795
血小板压积            -0.051550
总蛋白g/L           -0.809788
白蛋白g/L           -0.540361
球蛋白g/L           -0.615103
白球比               0.049687
AST天门冬氨酸氨基转移酶     0.277485
碱性磷酸酶             0.930419
谷氨酸转肽酶            0.111575
AST:ALT          -0.127025
总胆红素              2.485844
直接胆红素             0.985923
间接胆红素             2.507003
钾                 0.295217
钠                -0.182696
氯                 0.072913
钙                -0.360614
磷                -2.187720
镁                 0.275316
葡萄糖              -0.506560
肌酐                1.817830
尿素                2.326748
尿酸                0.242501
甘油三酯             -0.187264
总

### 要调的参数
- penalty：惩罚项，L1 正则化和 L2 正则化。
L2 正则化通常会导致模型的参数值变小，使得模型的决策边界会更平滑。它也可以帮助防止过拟合。所以这里选择l2 正则化
- c:正则化系数的倒数
- 对于l2 正则化，可以选择 newton-cg，lbfgs，saga

  saga:随机平均梯度下降
  
  newton-cg:类似于牛顿法
  
  lbfgs:一种梯度下降的方法

### 确定用于网格搜索的字典

In [9]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'solver': ['newton-cg', 'saga','lbfgs']}

### 创建模型并进行网格搜索

In [10]:
logistic_regression = LogisticRegression(random_state=30)
grid_search = GridSearchCV(estimator=logistic_regression, param_grid=param_grid, cv=5)

In [11]:
grid_search.fit(jibing, jibing_res)

GridSearchCV(cv=5, estimator=LogisticRegression(random_state=30),
             param_grid={'C': [0.001, 0.01, 0.1, 1, 10, 100],
                         'solver': ['newton-cg', 'saga', 'lbfgs']})

### 获取最佳参数
#### 牛顿法更加适合这个模型

In [12]:
c = grid_search.best_params_['C']
solver = grid_search.best_params_['solver']
print("c={},solver={}".format(c,solver))

c=0.001,solver=newton-cg


In [13]:
clf = LogisticRegression(C=c, solver=solver,random_state=30)

In [14]:
Xtrain,Xtest,Ytrain,Ytest = train_test_split(jibing,jibing_res,test_size=0.2)
clf.fit(Xtrain, Ytrain)
y_pre = clf.predict(Xtest)
metrics_ = res_metrics(Ytest,y_pre)
metrics_

{'precision': 0.8427672955974843, 'recall': 1.0, 'f1': 0.9146757679180888}