In [1]:
from scipy.linalg import hilbert

x = hilbert(10)
x

array([[1.        , 0.5       , 0.33333333, 0.25      , 0.2       ,
        0.16666667, 0.14285714, 0.125     , 0.11111111, 0.1       ],
       [0.5       , 0.33333333, 0.25      , 0.2       , 0.16666667,
        0.14285714, 0.125     , 0.11111111, 0.1       , 0.09090909],
       [0.33333333, 0.25      , 0.2       , 0.16666667, 0.14285714,
        0.125     , 0.11111111, 0.1       , 0.09090909, 0.08333333],
       [0.25      , 0.2       , 0.16666667, 0.14285714, 0.125     ,
        0.11111111, 0.1       , 0.09090909, 0.08333333, 0.07692308],
       [0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
        0.1       , 0.09090909, 0.08333333, 0.07692308, 0.07142857],
       [0.16666667, 0.14285714, 0.125     , 0.11111111, 0.1       ,
        0.09090909, 0.08333333, 0.07692308, 0.07142857, 0.06666667],
       [0.14285714, 0.125     , 0.11111111, 0.1       , 0.09090909,
        0.08333333, 0.07692308, 0.07142857, 0.06666667, 0.0625    ],
       [0.125     , 0.11111111, 0.1      

In [3]:
import numpy as np

np.linalg.det(np.matrix(x).T * np.matrix(x))
# 行列式的值趋近于0

9.147819724891032e-91

In [4]:
import pandas as pd

pd.DataFrame(x, columns=["x%d" % i for i in range(1, 11)]).corr()
# 结果都是接近1，每一列数据间都存在着较高的数值相关性

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10
x1,1.0,0.985344,0.965392,0.948277,0.93423,0.922665,0.913025,0.904883,0.897921,0.891902
x2,0.985344,1.0,0.995632,0.988183,0.98072,0.973927,0.967905,0.962598,0.957918,0.953774
x3,0.965392,0.995632,1.0,0.99816,0.994616,0.990719,0.986928,0.983393,0.980155,0.977207
x4,0.948277,0.988183,0.99816,1.0,0.999065,0.99712,0.994845,0.992525,0.990281,0.988163
x5,0.93423,0.98072,0.994616,0.999065,1.0,0.999465,0.998294,0.99686,0.995346,0.993839
x6,0.922665,0.973927,0.990719,0.99712,0.999465,1.0,0.999669,0.998914,0.997959,0.996922
x7,0.913025,0.967905,0.986928,0.994845,0.998294,0.999669,1.0,0.999782,0.999271,0.998608
x8,0.904883,0.962598,0.983393,0.992525,0.99686,0.998914,0.999782,1.0,0.99985,0.999491
x9,0.897921,0.957918,0.980155,0.990281,0.995346,0.997959,0.999271,0.99985,1.0,0.999893
x10,0.891902,0.953774,0.977207,0.988163,0.993839,0.996922,0.998608,0.999491,0.999893,1.0


In [6]:
# 假造一个函数

from scipy.optimize import leastsq

x = hilbert(10)  # 生成 10x10 的希尔伯特矩阵
np.random.seed(10)  # 随机数种子能保证每次生成的随机数一致
w = np.random.randint(2, 10, 10)  # 随机生成 w 系数

# y = XW
y_temp = np.matrix(x) * np.matrix(w).T  # 计算 y 值
y = np.array(y_temp.T)[0]  # 将 y 值转换成 1 维行向量

print("实际参数 w: ", w)
print("实际函数值 y: ", y)

实际参数 w:  [3 7 6 9 2 3 5 6 3 7]
实际函数值 y:  [14.14761905 10.1232684   8.12233045  6.8529637   5.95634643  5.28188478
  4.75274309  4.32480306  3.97061256  3.67205737]


In [15]:
# 以假造的函数的数据来进行拟合，结果跟实际差距很大

func = lambda p, x: np.dot(x, p)  # 函数公式
err_func = lambda p, x, y: func(p, x) - y  # 残差函数
p_init = np.random.randint(1, 2, 10)  # 全部参数初始化为 1

parameters = leastsq(err_func, p_init, args=(x, y))  # 最小二乘法求解
print("拟合参数 w: ", parameters[0])
# 实际参数和拟合参数差距非常大

拟合参数 w:  [  3.00317552   6.84694891   7.80922602   0.11787948  23.84530017
 -26.56612176  30.04685297 -11.87888984  14.59294259   3.18066383]


(array([  3.00317552,   6.84694891,   7.80922602,   0.11787948,
         23.84530017, -26.56612176,  30.04685297, -11.87888984,
         14.59294259,   3.18066383]),
 2)