### 分类：
    1.批量梯度下降（BGD）：每次使用全量的训练集样本来更新模型参数
    2.随机梯度下降（SGD）：每次从训练集中随机选择一个样本来进行迭代
    3.Mini-batch梯度下降算法：次更新从训练集中随机选择b,b<m个样本进行学习

In [22]:
# 构建一个数据检验函数，验证样本特征和目标值是否一样，
# 每个样本的特征值是否一样，目标值是否单一
def checkdate(X, Y):
    """
    检查数据是否规范
        X: 特征值, list object
        Y: 目标值, list object
    """
    if len(X) != len(Y):
        raise Exception('数据异常。')
    else:
        l = len(X[0])
        for m in X:
            if len(m) != l:
                raise Exception('数据异常。')
            if len(Y[0]) != 1:     # 保证目标值数量单一
                raise Exception('数据异常。') 

def calcdiffe(x, y, a):
    """
    计算差值，实际结果与预测结果间的差
        x: 某个样本的特征值，向量
        y: 样本对应得目标值，
    """
    lx = len(x)
    la = len(a)
    if lx == la:   # 样本的长度和theta的长度一样
        result = 0
        for i in range(lx):
            result += x[i] * a[i]  # θ1*x1 + θ2*x2 + ```
        return y - result
    elif lx + 1 == la:    # 有常数项
        result = 0
        for i in range(lx):
            result += x[i] * a[i]
        result += 1 * a[lx]    
        return y - result
    else:
        raise Exception('参数错误')

        
def fit(X, Y, alphas, threshold=1e-6, maxIter=200, addConstant=True):
    """
    梯度下降模型
        X: 特征值,2D
        Y: 目标值,2D
        threshold: 阈值,达到该阈值时,停止迭代
    return: 
    """
    import math
    import numpy as np
    # 校验 X Y
    checkdate(X, Y)
    # 构建模型
    l = len(alphas)     # 学习率的长度
    m = len(Y)          # 目标值的长度
    n = len(X[0]) + 1 if addConstant else len(X[0])    # 特征值的长度
    B = [True for i in range(l)]     # 空值最优模型
    J = [np.nan for i in range(l)]     # loss的函数值
    
    # 计算迭代后与迭代前的差异值
    # 初始化theta值，初始值均为0，样本的长度等于theta的长度, 一个特征值对应一个theta, 
    # a 求解的系数，二维数组，m*n  m是样本中的特征值的长度， n是本次测试的学习率的长度
    a = [[0 for i in range(n)] for j in range(l)]   
    # 开始计算, 最多迭代200次
    for times in range(maxIter):
        for i in range(l):    # 如果已确定最优解，则退出本次循环
            if not B[i]:
                continue
            
            ta = a[i]   # theta的初始值
            for j in range(n):       # 样本的特征值
                alpha = alphas[i]    # 取出一个学习率
                ts = 0
                for k in range(m):   # 样本的目标值
                    if j == n-1 and addConstant:     # 有常数项
                        ts += alpha * calcdiffe(X[k], Y[k][0], a[i]) * 1
                    else:
                        ts += alpha * calcdiffe(X[k], Y[k][0], a[i]) * X[k][j]
                t = ta[j] + ts   # 学习后的theta值
                ta[j] = t      #
                
            flag = True
            js = 0
            for k in range(m):
                js += math.pow(calcdiffe(X[k], Y[k][0], a[i]), 2) + a[i][j]  # 梯度下降求解公式
                # 求每个样本的差值的平方 + theta的累加值值
                if js > J[i]:
                    flag = False
                    break
            
            if flag:
                J[i] = js
                for j in range(n):
                    a[i][j] = ta[j]    # 更新 theta 的值
            else:
                B[i] = False
                
            r = [0 for j in J if j < threshold]   # 求解结果，如果小于阈值，则退出循环
            if len(r) > 0:
                break
            
            r = [0 for b in B if not b]
            if len(r) > 0:
                break
                
    min_a = a[0]    # 找到最小的系数theta
    min_j = J[0]
    min_alpha = alphas[0]
    for i in range(l):
        if J[i] < min_j:
            min_j = J[i]
            min_a = a[i]
            min_alpha = alphas[i]
            
    print('最优的alpha值是：', min_alpha)
    return min_a

def predict(X, a):
    """
    预测结果
    """
    Y = []
    n = len(a) - 1
    for x in X:
        result = 0
        for i in range(n):
            result += x[i] * a[i]   # 求所有样本特征乘以系数的累加和
        result += a[n]     # 加上常数项
        Y.append(result)
    return Y


# 计算实际值和预测值之间的相关性
def calcRScore(y, py):
    if len(y) != len(py):
        raise Exception("参数异常")
    import math 
    import numpy as np
    avgy = np.average(y)
    m = len(y)
    rss = 0.0
    tss = 0
    for i in range(m):
        rss += math.pow(y[i] - py[i], 2)
        tss += math.pow(y[i] - avgy, 2)
    r = 1.0 - 1.0 * rss / tss
    return r        

In [10]:
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import warnings
import sklearn
from sklearn.linear_model import LinearRegression,Ridge, LassoCV, RidgeCV, ElasticNetCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
# from sklearn.linear_model.coordinate_descent import ConvergenceWarning

In [11]:
## 设置字符集，防止中文乱码
mpl.rcParams['font.sans-serif']=[u'simHei']
mpl.rcParams['axes.unicode_minus']=False

In [15]:
np.random.seed(0)
np.set_printoptions(linewidth=1000, suppress=True)
N = 10
x = np.linspace(0, 6, N) + np.random.randn(N)
y = 1.8*x**3 + x**2 - 14*x - 7 + np.random.randn(N)
x.shape = -1, 1
y.shape = -1, 1
y

array([[-18.55967185],
       [-17.1576482 ],
       [-11.01504472],
       [ 89.02611774],
       [118.31991291],
       [-10.55888129],
       [168.02519061],
       [115.67331998],
       [204.96267299],
       [417.70060165]])

In [23]:
plt.figure(figsize=(12,6), facecolor='w')

## 模拟数据产生
x_hat = np.linspace(x.min(), x.max(), num=100)
x_hat.shape = -1,1

## 线性模型
model = LinearRegression()
model.fit(x,y)
y_hat = model.predict(x_hat)
s1 = calcRScore(y, model.predict(x))
print(model.score(x,y)) ## 自带R^2输出
print("模块自带实现===============")
print("参数列表:", model.coef_)
print("截距:", model.intercept_)


## 自模型
ma = fit(x,y,np.logspace(-4,-2,100))
y_hat2 = predict(x_hat, ma)
s2 = calcRScore(y, predict(x,ma))
print ("自定义实现模型=============")
print ("参数列表:", ma)

0.8374376988248429
参数列表: [[72.0576022]]
截距: [-163.71132966]
最优的alpha值是： 0.01
参数列表: [70.87936393633888, -158.4997458365991]


<Figure size 864x432 with 0 Axes>