# 预测鲍鱼年龄

In [1]:
import numpy as np

In [2]:
def loadDataSet(fileName):
    numFeat = len(open(fileName).readline().split('\t'))-1
    dataMat = [];
    labelMat = []
    fr = open(fileName)
    for line in fr.readlines():
        lineArr = []
        curLine = line.strip().split('\t')
        for i in range(numFeat):
            lineArr.append(float(curLine[i]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat

def standRegres(xArr, yArr):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    xTx = xMat.T*xMat
    if np.linalg.det(xTx) == 0.0:
        print("The matrix is singular, cannot do inverse")
        return 
    ws = xTx.T * (xMat.T * yMat)
    return ws

In [3]:
def rssError(yArr, yHatArr):
    return((yArr-yHatArr)**2).sum()


In [4]:
abX, abY = loadDataSet('abalone.txt')
ws = standRegres(abX[0:99], abY[0:99])
yHat = np.mat(abX[100:199]) * ws
rssError(abY[100:199], yHat.T.A)

4414335119580.887

# 局部加权线性回归（Locally Weighted Linear Regression）

In [5]:
def lwlr(testPoint, xArr, yArr, k=1.0):
    xMat = np.mat(xArr)
    yMat = np.mat(yArr).T
    m = np.shape(xMat)[0]
    weights = np.mat(np.eye((m))) #生成对角矩阵
    for j in range(m):
        diffMat = testPoint- xMat[j,:]
        weights[j, j] = np.exp(diffMat*diffMat.T/(-2.0*k**2)) #权重大小以指数级衰减
    xTx = xMat.T * (weights*xMat)
    if np.linalg.det(xTx) == 0.0:
        print("This matrix is singular, cannot do inverse")
        return 
    ws = xTx.T*(xMat.T*(weights*yMat))
    return testPoint * ws 

def lwlrTest(testArr, xArr, yArr, k):
    m = np.shape(testArr)[0]
    yHat = np.zeros(m)
    for i in range(m):
        yHat[i] = lwlr(testArr[i], xArr, yArr, k)
    return yHat

In [6]:
yHat01 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 0.1)
rssError(abY[100:199], yHat01.T)

533346214.0812403

In [7]:
yHat1 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 1)
rssError(abY[100:199], yHat1.T)

814020454141.4115

In [8]:
yHat10 = lwlrTest(abX[100:199], abX[0:99], abY[0:99], 10)
rssError(abY[100:199], yHat10.T)

4248895733295.944