### Curve fitting

The method to describe `model error` and `Least Square` is very revealing.

1. Assume model have several features,  the error of a feature is uniformlly or triangularly located between a range. Then for every measurement, assume the model error are sum of all the model error source, then the overall model error must be normal distribution. (Refer to [quiz problem 3](https://courses.edx.org/courses/course-v1:MITx+6.00.2x_4+3T2015/courseware/fe76f342c0a34327848a80d87c13cf4a/0a78020641cf4a81aac6b61584be7742/) )
2. The the goal to fit every observation is to maxmize the likehood of this normally distributed error. 

$$ max \prod _{i} L_{err}(predict_i - obs_i) = \prod _{i} $$ 

Equivalently, use log instead, then minimize the error of $$ min - In \sum _{i}  \frac {1} {\sqrt {2\pi}  \sigma}  e ^ {\frac {-(predict_i - obs_i)^2} { {\sigma}^2  }  }   $$

In [10]:
# L7 PROBLEM 2  
import pylab, random

def testTriangularErrors(ntrials=10000,npts=100):
    results = [0] * ntrials
    for i in xrange(ntrials):
        s = 0   # sum of random points
        for j in xrange(npts):
            s += random.triangular(-1,1)
        results[i] =s
    # plot results in a histogram
    
    pylab.hist(results,bins=50)
    pylab.title('Sum of 100 random points -- Triangular PDF (10,000 trials)')
    pylab.xlabel('Sum')
    pylab.ylabel('Number of trials')

def testUniformErrors(ntrials=10000,npts=100):
    results = [0] * ntrials
    for i in xrange(ntrials):
        s = 0   # sum of random points
        for j in xrange(npts):
            s += random.uniform(-1,1)
        results[i] =s
    # plot results in a histogram
    pylab.hist(results,bins=50)
    pylab.title('Sum of 100 random points -- Uniform PDF (10,000 trials)')
    pylab.xlabel('Sum')
    pylab.ylabel('Number of trials')
    
pylab.figure(1)
testTriangularErrors()
pylab.figure(2)
testUniformErrors()
pylab.show()


In [14]:
def getData(fileName):
    dataFile = open(fileName, 'r')
    distances = []
    masses = []
    discardHeader = dataFile.readline()
    for line in dataFile:
        d, m = line.split()
        distances.append(float(d))
        masses.append(float(m))
    dataFile.close()
    return (masses, distances)

def rSquare(measured, estimated):
    """measured: one dimensional array of measured values
       estimate: one dimensional array of predicted values"""
    SEE = ((estimated - measured)**2).sum()
    mMean = measured.sum()/float(len(measured))
    MV = ((mMean - measured)**2).sum()
    return 1 - SEE/MV

def fitData(fileName):
    xVals, yVals = getData(fileName)
    xVals = pylab.array(xVals)
    yVals = pylab.array(yVals)
    xVals = xVals*9.81  # convert mass to force (F = mg)
    pylab.plot(xVals, yVals, 'bo', label = 'Measured points')
    pylab.title('Measured Displacement of Spring')
    pylab.xlabel('Force (Newtons)')
    pylab.ylabel('Distance (meters)')
    a,b = pylab.polyfit(xVals, yVals, 1)  # fit y = ax + b
    # use line equation to graph predicted values
    estYVals = a*xVals + b
    k = 1/a
    print rSquare(yVals, estYVals)
    pylab.plot(xVals, estYVals, label = 'Linear fit, k = '
               + str(round(k, 5)))
    pylab.legend(loc = 'best')

fitData('springData.txt')
pylab.show()

def fitData3(fileName):
    xVals, yVals = getData(fileName)
    xVals = pylab.array(xVals[:-6])
    yVals = pylab.array(yVals[:-6])
    xVals = xVals*9.81  # convert mass to force (F = mg)
    pylab.plot(xVals, yVals, 'bo', label = 'Measured points')
    pylab.title('Measured Displacement of Spring')
    pylab.xlabel('Force (Newtons)')
    pylab.ylabel('Distance (meters)')
    a,b = pylab.polyfit(xVals, yVals, 1)  # fix y = ax + b
    # use line equation to graph predicted values
    estYVals = a*xVals + b
    k = 1/a
    print rSquare(yVals, estYVals)
    pylab.plot(xVals, estYVals, label = 'Linear fit, k = '
               + str(round(k, 5)))
    pylab.legend(loc = 'best')

fitData3('springData.txt')
pylab.show()

0.88151239836
0.953884467286
