In [36]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,SGDRegressor,Ridge,RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib

# 正规方程

In [2]:
# 1. 获取数据
data = load_boston()

In [4]:
# 2. 数据基本处理
#    1. 切割数据
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state =22)

In [6]:
# 3. 特征工程
#    1. 特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)

In [7]:
# 4. 建立模型
#    1. 正规方程
estimator = LinearRegression()

In [8]:
estimator.fit(x_train,y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [10]:
# 5. 模型评估
#    均方误差
pre = estimator.predict(x_test)
print("均方误差\n",mean_squared_error(y_test,pre))
print("回归系数\n",estimator.coef_)
print("偏置 ",estimator.intercept_)

20.765767538052206

# 梯度下降法



In [23]:
# 1. 获取数据
data = load_boston()
# 2. 数据基本处理
#    1. 切割数据
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state =22)
# 3. 特征工程
#    1. 特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 建立模型
#    1. 正规方程
# estimator = LinearRegression()
estimator = SGDRegressor(max_iter=1000,learning_rate="constant",eta0=0.001)
estimator.fit(x_train,y_train)
# 5. 模型评估
#    均方误差
pre = estimator.predict(x_test)
print("均方误差\n",mean_squared_error(y_test,pre))
print("回归系数\n",estimator.coef_)
print("偏置 ",estimator.intercept_)

均方误差
 20.868134059323125
回归系数
 [-0.71076945  1.12798929 -0.13690293  0.85464758 -2.02480284  2.71818672
 -0.14310194 -3.38051822  2.5625931  -1.67712958 -1.66926933  0.91772807
 -3.78877832]
偏置  [22.57126489]


# 岭回归

In [31]:
# 1. 获取数据
data = load_boston()
# 2. 数据基本处理
#    1. 切割数据
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state =22)
# 3. 特征工程
#    1. 特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 建立模型
#    1. 正规方程
# estimator = LinearRegression()
# estimator = SGDRegressor(max_iter=1000,learning_rate="constant",eta0=0.001)
estimator = Ridge(alpha=100)
estimator.fit(x_train,y_train)
# 5. 模型评估
#    均方误差
pre = estimator.predict(x_test)
print("均方误差\n",mean_squared_error(y_test,pre))
print("回归系数\n",estimator.coef_)
print("偏置 ",estimator.intercept_)

均方误差
 22.717204695248682
回归系数
 [-0.44470202  0.50601476 -0.53723366  0.86805726 -0.85263804  2.73443404
 -0.3065462  -1.60265073  0.67087182 -0.38676485 -1.32172721  0.79633395
 -2.94114521]
偏置  22.579702970297042


## 交叉验证

In [34]:
# 1. 获取数据
data = load_boston()
# 2. 数据基本处理
#    1. 切割数据
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state =22)
# 3. 特征工程
#    1. 特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 建立模型
#    1. 正规方程
# estimator = LinearRegression()
# estimator = SGDRegressor(max_iter=1000,learning_rate="constant",eta0=0.001)
estimator = RidgeCV(alphas=(0.1,10,20,100))
estimator.fit(x_train,y_train)
# 5. 模型评估
#    均方误差
pre = estimator.predict(x_test)
print("均方误差\n",mean_squared_error(y_test,pre))
print("回归系数\n",estimator.coef_)
print("偏置 ",estimator.intercept_)
print("最优正则化力度\n",estimator.alpha_)

均方误差
 21.033604774721336
回归系数
 [-0.61990598  0.94631264 -0.31665966  0.88594399 -1.74413196  2.79010443
 -0.20188216 -3.01252177  1.94493073 -1.14271415 -1.61041215  0.91048372
 -3.67949124]
偏置  22.579702970297042
最优正则化力度
 10.0


#  模型保存和加载

## 模型保存

In [37]:

# 1. 获取数据
data = load_boston()
# 2. 数据基本处理
#    1. 切割数据
x_train,x_test,y_train,y_test = train_test_split(data.data,data.target,test_size=0.2,random_state =22)
# 3. 特征工程
#    1. 特征标准化
transfer = StandardScaler()
x_train = transfer.fit_transform(x_train)
x_test = transfer.transform(x_test)
# 4. 建立模型
#    1. 正规方程
# estimator = LinearRegression()
# estimator = SGDRegressor(max_iter=1000,learning_rate="constant",eta0=0.001)
estimator = RidgeCV(alphas=(0.1,10,20,100))
estimator.fit(x_train,y_train)
# 实现模型保存
joblib.dump(estimator,"./python28.pkl")
# 5. 模型评估
#    均方误差
pre = estimator.predict(x_test)
print("均方误差\n",mean_squared_error(y_test,pre))
print("回归系数\n",estimator.coef_)
print("偏置 ",estimator.intercept_)
print("最优正则化力度\n",estimator.alpha_)

均方误差
 21.033604774721336
回归系数
 [-0.61990598  0.94631264 -0.31665966  0.88594399 -1.74413196  2.79010443
 -0.20188216 -3.01252177  1.94493073 -1.14271415 -1.61041215  0.91048372
 -3.67949124]
偏置  22.579702970297042
最优正则化力度
 10.0


## 模型加载

In [38]:
estimator1 = joblib.load('./python28.pkl')

In [40]:
pre = estimator1.predict(x_test)

In [41]:
estimator1.coef_

array([-0.61990598,  0.94631264, -0.31665966,  0.88594399, -1.74413196,
        2.79010443, -0.20188216, -3.01252177,  1.94493073, -1.14271415,
       -1.61041215,  0.91048372, -3.67949124])

In [42]:
estimator1.intercept_

22.579702970297042

In [43]:
mean_squared_error(y_test,pre)

21.033604774721336