In [140]:
from numpy import *
from Tkinter import *
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
from matplotlib.figure import Figure

In [56]:
def loadDataSet(fileName):      #general function to parse tab -delimited floats
    dataMat = []                #assume last column is target value
    fr = open(fileName)
    for line in fr.readlines():
        curLine = line.strip().split('\t')
        fltLine = list(map(float,curLine)) #map all elements to float()
        dataMat.append(fltLine)
    return dataMat

def binSplitDataSet(dataSet, feature, value):
    mat0 = dataSet[nonzero(dataSet[:,feature] > value)[0],:]
    mat1 = dataSet[nonzero(dataSet[:,feature] <= value)[0],:]
    return mat0,mat1

In [57]:
testMat = mat(eye(4))

In [58]:
testMat

matrix([[1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [59]:
mat0, mat1= binSplitDataSet(testMat,1,0.5)

In [60]:
mat0

matrix([[0., 1., 0., 0.]])

In [61]:
mat1

matrix([[1., 0., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])

In [62]:
myDat = loadDataSet('ex00.txt')
myMat = mat(myDat)
myMat

matrix([[ 3.609800e-02,  1.550960e-01],
        [ 9.933490e-01,  1.077553e+00],
        [ 5.308970e-01,  8.934620e-01],
        [ 7.123860e-01,  5.648580e-01],
        [ 3.435540e-01, -3.717000e-01],
        [ 9.801600e-02, -3.327600e-01],
        [ 6.911150e-01,  8.343910e-01],
        [ 9.135800e-02,  9.993500e-02],
        [ 7.270980e-01,  1.000567e+00],
        [ 9.519490e-01,  9.452550e-01],
        [ 7.685960e-01,  7.602190e-01],
        [ 5.413140e-01,  8.937480e-01],
        [ 1.463660e-01,  3.428300e-02],
        [ 6.731950e-01,  9.150770e-01],
        [ 1.835100e-01,  1.848430e-01],
        [ 3.395630e-01,  2.067830e-01],
        [ 5.179210e-01,  1.493586e+00],
        [ 7.037550e-01,  1.101678e+00],
        [ 8.307000e-03,  6.997600e-02],
        [ 2.439090e-01, -2.946700e-02],
        [ 3.069640e-01, -1.773210e-01],
        [ 3.649200e-02,  4.081550e-01],
        [ 2.955110e-01,  2.882000e-03],
        [ 8.375220e-01,  1.229373e+00],
        [ 2.020540e-01, -8.774400e-02],


In [63]:
shape(myMat)

(200L, 2L)

In [65]:
def regLeaf(dataSet):#returns the value used for each leaf
    return mean(dataSet[:,-1])

def regErr(dataSet):
    return var(dataSet[:,-1]) * shape(dataSet)[0]

def linearSolve(dataSet):   #helper function used in two places
    m,n = shape(dataSet)
    X = mat(ones((m,n))); Y = mat(ones((m,1)))#create a copy of data with 1 in 0th postion
    X[:,1:n] = dataSet[:,0:n-1]; Y = dataSet[:,-1]#and strip out Y
    xTx = X.T*X
    if linalg.det(xTx) == 0.0:
        raise NameError('This matrix is singular, cannot do inverse,\n\
        try increasing the second value of ops')
    ws = xTx.I * (X.T * Y)
    return ws,X,Y

def modelLeaf(dataSet):#create linear model and return coeficients
    ws,X,Y = linearSolve(dataSet)
    return ws

def modelErr(dataSet):
    ws,X,Y = linearSolve(dataSet)
    yHat = X * ws
    return sum(power(Y - yHat,2))

def chooseBestSplit(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):
    tolS = ops[0]; tolN = ops[1]
    #if all the target variables are the same value: quit and return value
    if len(set(dataSet[:,-1].T.tolist()[0])) == 1: #exit cond 1
        return None, leafType(dataSet)
    m,n = shape(dataSet)
    #the choice of the best feature is driven by Reduction in RSS error from mean
    S = errType(dataSet)
    bestS = inf; bestIndex = 0; bestValue = 0
    for featIndex in range(n-1):
        for splitVal in set(dataSet[:,featIndex].T.tolist()[0]):
            mat0, mat1 = binSplitDataSet(dataSet, featIndex, splitVal)
            if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN): continue
            newS = errType(mat0) + errType(mat1)
            if newS < bestS: 
                bestIndex = featIndex
                bestValue = splitVal
                bestS = newS
    #if the decrease (S-bestS) is less than a threshold don't do the split
    if (S - bestS) < tolS: 
        return None, leafType(dataSet) #exit cond 2
    mat0, mat1 = binSplitDataSet(dataSet, bestIndex, bestValue)
    if (shape(mat0)[0] < tolN) or (shape(mat1)[0] < tolN):  #exit cond 3
        return None, leafType(dataSet)
    return bestIndex,bestValue#returns the best feature to split on
                              #and the value used for that split

def createTree(dataSet, leafType=regLeaf, errType=regErr, ops=(1,4)):#assume dataSet is NumPy Mat so we can array filtering
    feat, val = chooseBestSplit(dataSet, leafType, errType, ops)#choose the best split
    if feat == None: return val #if the splitting hit a stop condition return val
    retTree = {}
    retTree['spInd'] = feat
    retTree['spVal'] = val
    lSet, rSet = binSplitDataSet(dataSet, feat, val)
    retTree['left'] = createTree(lSet, leafType, errType, ops)
    retTree['right'] = createTree(rSet, leafType, errType, ops)
    return retTree

In [66]:
createTree(myMat)

{'left': 1.0180967672413792,
 'right': -0.04465028571428572,
 'spInd': 0,
 'spVal': 0.48813}

In [67]:
myDat1 = loadDataSet('ex0.txt')

In [68]:
myMat1 = mat(myDat1)

In [70]:
myMat1

matrix([[1.      , 0.067732, 3.176513],
        [1.      , 0.42781 , 3.816464],
        [1.      , 0.995731, 4.550095],
        [1.      , 0.738336, 4.256571],
        [1.      , 0.981083, 4.560815],
        [1.      , 0.526171, 3.929515],
        [1.      , 0.378887, 3.52617 ],
        [1.      , 0.033859, 3.156393],
        [1.      , 0.132791, 3.110301],
        [1.      , 0.138306, 3.149813],
        [1.      , 0.247809, 3.476346],
        [1.      , 0.64827 , 4.119688],
        [1.      , 0.731209, 4.282233],
        [1.      , 0.236833, 3.486582],
        [1.      , 0.969788, 4.655492],
        [1.      , 0.607492, 3.965162],
        [1.      , 0.358622, 3.5149  ],
        [1.      , 0.147846, 3.125947],
        [1.      , 0.63782 , 4.094115],
        [1.      , 0.230372, 3.476039],
        [1.      , 0.070237, 3.21061 ],
        [1.      , 0.067154, 3.190612],
        [1.      , 0.925577, 4.631504],
        [1.      , 0.717733, 4.29589 ],
        [1.      , 0.015371, 3.085028],


In [71]:
createTree(myMat1)

{'left': {'left': 4.581648499999999,
  'right': {'left': 4.233747156250001,
   'right': 3.912047575757576,
   'spInd': 1,
   'spVal': 0.621523},
  'spInd': 1,
  'spVal': 0.808177},
 'right': {'left': 3.563709000000001,
  'right': 3.1889351956521743,
  'spInd': 1,
  'spVal': 0.212575},
 'spInd': 1,
 'spVal': 0.441815}

In [72]:
createTree(myMat,ops=(0,1))

{'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': 1.035533,
                'right': 1.077553,
                'spInd': 0,
                'spVal': 0.993349},
               'right': {'left': 0.744207,
                'right': 1.069062,
                'spInd': 0,
                'spVal': 0.988852},
               'spInd': 0,
               'spVal': 0.989888},
              'right': 1.227946,
              'spInd': 0,
              'spVal': 0.985425},
             'right': {'left': {'left': 0.862911,
               'right': 0.673579,
               'spInd': 0,
               'spVal': 0.975022},
              'right': {'left': {'left': 1.06469,
                'right': {'left': 0.945255,
                 'right': 1.022906,
                 'spInd': 0,
                 'spVal': 0.950153},
                'spInd': 0,
                'spVal': 0.951949},
               'right': {'left': 0.631862,
 

In [73]:
myDat2 = loadDataSet('ex2.txt')

In [74]:
myMat2 = mat(myDat2)

In [75]:
myMat2

matrix([[ 2.28628000e-01, -2.26627300e+00],
        [ 9.65969000e-01,  1.12386764e+02],
        [ 3.42761000e-01, -3.15848550e+01],
        [ 9.01444000e-01,  8.73006250e+01],
        [ 5.85413000e-01,  1.25295113e+02],
        [ 3.34900000e-01,  1.89766500e+01],
        [ 7.69043000e-01,  6.40419410e+01],
        [ 2.97107000e-01, -1.79837700e+00],
        [ 9.01421000e-01,  1.00133819e+02],
        [ 1.76523000e-01,  9.46348000e-01],
        [ 7.10234000e-01,  1.08553919e+02],
        [ 9.81980000e-01,  8.63996370e+01],
        [ 8.58730000e-02, -1.01371040e+01],
        [ 5.37834000e-01,  9.09955360e+01],
        [ 8.06158000e-01,  6.28776980e+01],
        [ 7.08890000e-01,  1.35416767e+02],
        [ 7.87755000e-01,  1.18642009e+02],
        [ 4.63241000e-01,  1.71710570e+01],
        [ 3.00318000e-01, -1.80513180e+01],
        [ 8.15215000e-01,  1.18319942e+02],
        [ 1.39880000e-01,  7.33678400e+00],
        [ 6.83730000e-02, -1.51608360e+01],
        [ 4.57563000e-01, -3.404

In [76]:
createTree(myMat2)

{'left': {'left': {'left': {'left': 105.24862350000001,
    'right': 112.42895575000001,
    'spInd': 0,
    'spVal': 0.958512},
   'right': {'left': {'left': {'left': {'left': 87.3103875,
       'right': {'left': {'left': 96.452867,
         'right': {'left': 104.825409,
          'right': {'left': 95.181793,
           'right': 102.25234449999999,
           'spInd': 0,
           'spVal': 0.872883},
          'spInd': 0,
          'spVal': 0.892999},
         'spInd': 0,
         'spVal': 0.910975},
        'right': 95.27584316666666,
        'spInd': 0,
        'spVal': 0.85497},
       'spInd': 0,
       'spVal': 0.944221},
      'right': {'left': 81.110152,
       'right': 88.78449880000001,
       'spInd': 0,
       'spVal': 0.811602},
      'spInd': 0,
      'spVal': 0.833026},
     'right': 102.35780185714285,
     'spInd': 0,
     'spVal': 0.790312},
    'right': 78.08564325,
    'spInd': 0,
    'spVal': 0.759504},
   'spInd': 0,
   'spVal': 0.952833},
  'right': {'left': {'l

In [77]:
createTree(myMat2,ops=(10000,4))

{'left': 101.35815937735848,
 'right': -2.637719329787234,
 'spInd': 0,
 'spVal': 0.499171}

In [78]:
def isTree(obj):
    return (type(obj).__name__=='dict')

def getMean(tree):
    if isTree(tree['right']): tree['right'] = getMean(tree['right'])
    if isTree(tree['left']): tree['left'] = getMean(tree['left'])
    return (tree['left']+tree['right'])/2.0
    
def prune(tree, testData):
    if shape(testData)[0] == 0: return getMean(tree) #if we have no test data collapse the tree
    if (isTree(tree['right']) or isTree(tree['left'])):#if the branches are not trees try to prune them
        lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])
    if isTree(tree['left']): tree['left'] = prune(tree['left'], lSet)
    if isTree(tree['right']): tree['right'] =  prune(tree['right'], rSet)
    #if they are now both leafs, see if we can merge them
    if not isTree(tree['left']) and not isTree(tree['right']):
        lSet, rSet = binSplitDataSet(testData, tree['spInd'], tree['spVal'])
        errorNoMerge = sum(power(lSet[:,-1] - tree['left'],2)) +\
            sum(power(rSet[:,-1] - tree['right'],2))
        treeMean = (tree['left']+tree['right'])/2.0
        errorMerge = sum(power(testData[:,-1] - treeMean,2))
        if errorMerge < errorNoMerge: 
            print("merging")
            return treeMean
        else: return tree
    else: return tree

In [79]:
myTree = createTree(myMat2, ops=(0,1))

In [80]:
myDatTest = loadDataSet('ex2test.txt')

In [81]:
myMat2Test = mat(myDatTest)

In [82]:
myMat2Test

matrix([[ 4.21862000e-01,  1.08302410e+01],
        [ 1.05349000e-01, -2.24161100e+00],
        [ 1.55196000e-01,  2.18729760e+01],
        [ 1.61152000e-01,  2.01541800e+00],
        [ 3.82632000e-01, -3.87789790e+01],
        [ 1.77100000e-02,  2.01091130e+01],
        [ 1.29656000e-01,  1.52668870e+01],
        [ 6.13926000e-01,  1.11900063e+02],
        [ 4.09277000e-01,  1.87473100e+00],
        [ 8.07556000e-01,  1.11223754e+02],
        [ 5.93722000e-01,  1.33835486e+02],
        [ 9.53239000e-01,  1.10465070e+02],
        [ 2.57402000e-01,  1.53328990e+01],
        [ 6.45385000e-01,  9.39830540e+01],
        [ 5.63460000e-01,  9.36452770e+01],
        [ 4.08338000e-01, -3.07198780e+01],
        [ 8.74394000e-01,  9.18735050e+01],
        [ 2.63805000e-01, -1.92752000e-01],
        [ 4.11198000e-01,  1.07511180e+01],
        [ 4.49884000e-01,  9.21190100e+00],
        [ 6.46315000e-01,  1.13533660e+02],
        [ 6.73718000e-01,  1.25135638e+02],
        [ 8.05148000e-01,  1.133

In [83]:
prune(myTree, myMat2Test)

merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging
merging


{'left': {'left': {'left': {'left': 92.5239915,
    'right': {'left': {'left': {'left': 112.386764,
       'right': 123.559747,
       'spInd': 0,
       'spVal': 0.960398},
      'right': 135.837013,
      'spInd': 0,
      'spVal': 0.958512},
     'right': 111.2013225,
     'spInd': 0,
     'spVal': 0.956951},
    'spInd': 0,
    'spVal': 0.965969},
   'right': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': {'left': 96.41885225,
              'right': 69.318649,
              'spInd': 0,
              'spVal': 0.948822},
             'right': {'left': {'left': 110.03503850000001,
               'right': {'left': 65.548418,
                'right': {'left': 115.753994,
                 'right': {'left': {'left': 94.3961145,
                   'right': 85.005351,
                   'spInd': 0,
                   'spVal': 0.912161},
                  'right': {'left': {'left': 106.814667,
                    'right': 118.513475,
               

In [97]:
def linearSolve(dataSet):
    m,n = shape(dataSet)
    X = mat(ones((m,n))); Y = mat(ones((m,1)))
    X[:,1:n] = dataSet[:,0:n-1]; Y = dataSet[:,-1]
    xTx = X.T*X
    if linalg.det(xTx) == 0.0:
        raise 
        NameError('This matrix is singular, cannot do inverse, try increasing the second value of ops')
    ws = xTx.I * (X.T * Y)
    return ws,X,Y
    
def modelLeaf(dataSet):
    ws,X,Y = linearSolve(dataSet)
    return ws

def modelErr(dataSet):
    ws,X,Y = linearSolve(dataSet)
    yHat = X * ws
    return sum(power(Y - yHat, 2))

In [99]:
myMat2 = mat(loadDataSet('exp2.txt'))

In [86]:
myMat2

matrix([[7.0670000e-02, 3.4708290e+00],
        [5.3407600e-01, 6.3771320e+00],
        [7.4722100e-01, 8.9494070e+00],
        [6.6897000e-01, 8.0340810e+00],
        [5.8608200e-01, 6.9977210e+00],
        [7.6496200e-01, 9.3181100e+00],
        [6.5812500e-01, 7.8803330e+00],
        [3.4673400e-01, 4.2133590e+00],
        [3.1396700e-01, 3.7624960e+00],
        [6.0141800e-01, 7.1888050e+00],
        [4.0439600e-01, 4.8934030e+00],
        [1.5434500e-01, 3.6831750e+00],
        [9.8406100e-01, 1.1712928e+01],
        [5.9751400e-01, 7.1466940e+00],
        [5.1440000e-03, 3.3331500e+00],
        [1.4229500e-01, 3.7436810e+00],
        [2.8000700e-01, 3.7373760e+00],
        [5.4200800e-01, 6.4942750e+00],
        [4.6678100e-01, 5.5322550e+00],
        [7.0697000e-01, 8.4767180e+00],
        [1.9103800e-01, 3.6739210e+00],
        [7.5659100e-01, 9.1767220e+00],
        [9.1287900e-01, 1.0850358e+01],
        [5.2470100e-01, 6.0674440e+00],
        [3.0609000e-01, 3.6811480e+00],


In [100]:
createTree(myMat2, modelLeaf, modelErr,(1,10))

{'left': matrix([[1.69855694e-03],
         [1.19647739e+01]]), 'right': matrix([[3.46877936],
         [1.18521743]]), 'spInd': 0, 'spVal': 0.285477}

In [112]:
def regTreeEval(model, inDat):
    return float(model)

def modelTreeEval(model, inDat):
    n = shape(inDat)[1]
    X = mat(ones((1,n+1)))
    X[:,1:n+1]=inDat
    return float(X*model)

def treeForeCast(tree, inData, modelEval=regTreeEval):
    if not isTree(tree): 
        return modelEval(tree, inData)
    if inData[tree['spInd']] > tree['spVal']:
        if isTree(tree['left']):
            return treeForeCast(tree['left'], inData , modelEval)
        else:
            return modelEval(tree['left'], inData)
    else:
        if isTree(tree['right']):
            return treeForeCast(tree['right'], inData , modelEval)
        else:
            return modelEval(tree['right'], inData)

def createForeCast(tree, testData, modelEval=regTreeEval):
    m=len(testData)
    yHat = mat(zeros((m,1)))
    for i in range(m):
        yHat[i,0] = treeForeCast(tree, mat(testData[i]), modelEval)
    return yHat

In [114]:
trainMat = mat(loadDataSet('bikeSpeedVsIq_train.txt'))
testMat = mat(loadDataSet('bikeSpeedVsIq_test.txt'))

In [109]:
myTree = createTree(trainMat, ops=(1,20))
myTree

{'left': {'left': {'left': 168.34161286956524,
   'right': 157.0484078846154,
   'spInd': 0,
   'spVal': 20.0},
  'right': {'left': 141.06067981481482,
   'right': 122.90893026923078,
   'spInd': 0,
   'spVal': 14.0},
  'spInd': 0,
  'spVal': 17.0},
 'right': {'left': 94.7066578125,
  'right': {'left': 69.02117757692308,
   'right': 50.94683665,
   'spInd': 0,
   'spVal': 5.0},
  'spInd': 0,
  'spVal': 7.0},
 'spInd': 0,
 'spVal': 10.0}

In [116]:
yHat = createForeCast(myTree, testMat[:,0])
yHat

matrix([[122.90893027],
        [157.04840788],
        [122.90893027],
        [141.06067981],
        [ 50.94683665],
        [ 69.02117758],
        [ 50.94683665],
        [157.04840788],
        [122.90893027],
        [168.34161287],
        [ 50.94683665],
        [ 50.94683665],
        [122.90893027],
        [ 50.94683665],
        [ 69.02117758],
        [ 69.02117758],
        [168.34161287],
        [157.04840788],
        [ 94.70665781],
        [157.04840788],
        [ 50.94683665],
        [141.06067981],
        [ 50.94683665],
        [ 94.70665781],
        [ 69.02117758],
        [122.90893027],
        [122.90893027],
        [ 50.94683665],
        [ 50.94683665],
        [141.06067981],
        [157.04840788],
        [ 94.70665781],
        [157.04840788],
        [122.90893027],
        [ 50.94683665],
        [157.04840788],
        [157.04840788],
        [168.34161287],
        [141.06067981],
        [141.06067981],
        [157.04840788],
        [157.048

In [117]:
corrcoef(yHat, testMat[:,1],rowvar=0)[0,1]

0.9640852318222145

In [120]:
myTree = createTree(trainMat, modelLeaf, modelErr,(1,20))
myTree

{'left': {'left': {'left': {'left': matrix([[47.58621512],
            [ 5.51066299]]), 'right': matrix([[37.54851927],
            [ 6.23298637]]), 'spInd': 0, 'spVal': 20.0},
   'right': matrix([[43.41251481],
           [ 6.37966738]]),
   'spInd': 0,
   'spVal': 16.0},
  'right': {'left': matrix([[-2.87684083],
           [10.20804482]]), 'right': {'left': matrix([[-11.84548851],
            [ 12.12382261]]), 'right': matrix([[-17.21714265],
            [ 13.72153115]]), 'spInd': 0, 'spVal': 6.0}, 'spInd': 0, 'spVal': 9.0},
  'spInd': 0,
  'spVal': 12.0},
 'right': matrix([[ 68.87014372],
         [-11.78556471]]),
 'spInd': 0,
 'spVal': 4.0}

In [121]:
yHat = createForeCast(myTree, testMat[:,0], modelTreeEval)
yHat

matrix([[119.61969695],
        [155.97526025],
        [119.61969695],
        [139.1075255 ],
        [ 45.2990143 ],
        [ 65.11204425],
        [ 33.5134496 ],
        [162.20824662],
        [109.41165214],
        [163.31013796],
        [ 33.5134496 ],
        [ 33.5134496 ],
        [126.34819074],
        [ 57.08457901],
        [ 73.02126975],
        [ 73.02126975],
        [174.33146395],
        [149.74227389],
        [ 97.26891497],
        [149.74227389],
        [ 68.87014372],
        [139.1075255 ],
        [ 33.5134496 ],
        [ 85.14509236],
        [ 73.02126975],
        [109.41165214],
        [126.34819074],
        [ 51.3905131 ],
        [ 33.5134496 ],
        [139.1075255 ],
        [155.97526025],
        [ 97.26891497],
        [162.20824662],
        [109.41165214],
        [ 21.72788489],
        [162.20824662],
        [149.74227389],
        [168.82080096],
        [145.48719288],
        [143.50928752],
        [155.97526025],
        [155.975

In [122]:
corrcoef(yHat, testMat[:,1],rowvar=0)[0,1]

0.9760412191380623

In [126]:
ws,X,Y = linearSolve(trainMat)

In [127]:
ws

matrix([[37.58916794],
        [ 6.18978355]])

In [128]:
X

matrix([[ 1.,  3.],
        [ 1., 23.],
        [ 1.,  0.],
        [ 1.,  6.],
        [ 1., 15.],
        [ 1., 17.],
        [ 1., 12.],
        [ 1.,  8.],
        [ 1.,  9.],
        [ 1.,  7.],
        [ 1.,  8.],
        [ 1.,  1.],
        [ 1., 16.],
        [ 1.,  9.],
        [ 1., 14.],
        [ 1., 15.],
        [ 1., 17.],
        [ 1., 19.],
        [ 1., 21.],
        [ 1., 21.],
        [ 1.,  3.],
        [ 1.,  6.],
        [ 1.,  4.],
        [ 1.,  5.],
        [ 1.,  0.],
        [ 1.,  5.],
        [ 1.,  1.],
        [ 1., 14.],
        [ 1., 21.],
        [ 1.,  5.],
        [ 1.,  5.],
        [ 1.,  9.],
        [ 1., 22.],
        [ 1., 17.],
        [ 1.,  9.],
        [ 1.,  1.],
        [ 1.,  9.],
        [ 1., 16.],
        [ 1., 18.],
        [ 1.,  3.],
        [ 1., 16.],
        [ 1.,  2.],
        [ 1.,  6.],
        [ 1., 20.],
        [ 1., 12.],
        [ 1.,  6.],
        [ 1., 16.],
        [ 1., 15.],
        [ 1., 19.],
        [ 1., 17.],


In [129]:
Y

matrix([[ 46.852122],
        [178.676107],
        [ 86.154024],
        [ 68.707614],
        [139.737693],
        [141.988903],
        [ 94.477135],
        [ 86.083788],
        [ 97.265824],
        [ 80.400027],
        [ 83.414554],
        [ 52.525471],
        [127.060008],
        [101.639269],
        [146.41268 ],
        [144.157101],
        [152.69991 ],
        [136.669023],
        [166.971736],
        [165.467251],
        [ 38.455193],
        [ 75.557721],
        [ 22.171763],
        [ 50.321915],
        [ 74.412428],
        [ 42.052392],
        [ 42.489057],
        [139.185416],
        [140.713725],
        [ 63.222944],
        [ 56.294626],
        [ 91.674826],
        [173.497655],
        [152.692482],
        [113.920633],
        [ 51.552411],
        [100.075315],
        [137.803868],
        [135.925777],
        [ 45.550762],
        [149.933224],
        [ 27.914173],
        [ 62.103546],
        [173.942381],
        [119.200505],
        [ 

In [130]:
for i in range(shape(testMat)[0]):
    yHat[i]=testMat[i,0]*ws[1,0]+ws[0,0]

In [131]:
corrcoef(yHat, testMat[:,1],rowvar=0)[0,1]

0.9434684235674762

In [133]:
root = Tk()
myLabel = Label(root, text="Hello World")
myLabel.grid()
root.mainloop()

In [145]:
def reDraw(tolS,tolN):
    reDraw.f.clf()        # clear the figure
    reDraw.a = reDraw.f.add_subplot(111)
    if chkBtnVar.get():
        if tolN < 2: tolN = 2
        myTree = createTree(reDraw.rawDat, regTrees.modelLeaf,\
                                   regTrees.modelErr, (tolS,tolN))
        yHat = createForeCast(myTree, reDraw.testDat, \
                                       regTrees.modelTreeEval)
    else:
        myTree = createTree(reDraw.rawDat, ops=(tolS,tolN))
        yHat = createForeCast(myTree, reDraw.testDat)
    reDraw.a.scatter(reDraw.rawDat[:,0].flatten().A[0], reDraw.rawDat[:,1].flatten().A[0], s=5) #use scatter for data set
    reDraw.a.plot(reDraw.testDat, yHat, linewidth=2.0) #use plot for yHat
    reDraw.canvas.show()
    
def getInputs():
    try: tolN = int(tolNentry.get())
    except: 
        tolN = 10 
        print("enter Integer for tolN")
        tolNentry.delete(0, END)
        tolNentry.insert(0,'10')
    try: tolS = float(tolSentry.get())
    except: 
        tolS = 1.0 
        print("enter Float for tolS")
        tolSentry.delete(0, END)
        tolSentry.insert(0,'1.0')
    return tolN,tolS

def drawNewTree():
    tolN,tolS = getInputs()#get values from Entry boxes
    reDraw(tolS,tolN)
    
root=Tk()

reDraw.f = Figure(figsize=(5,4), dpi=100) #create canvas
reDraw.canvas = FigureCanvasTkAgg(reDraw.f, master=root)
reDraw.canvas.show()
reDraw.canvas.get_tk_widget().grid(row=0, columnspan=3)

Label(root, text="tolN").grid(row=1, column=0)
tolNentry = Entry(root)
tolNentry.grid(row=1, column=1)
tolNentry.insert(0,'10')
Label(root, text="tolS").grid(row=2, column=0)
tolSentry = Entry(root)
tolSentry.grid(row=2, column=1)
tolSentry.insert(0,'1.0')
Button(root, text="ReDraw", command=drawNewTree).grid(row=1, column=2, rowspan=3)
chkBtnVar = IntVar()
chkBtn = Checkbutton(root, text="Model Tree", variable = chkBtnVar)
chkBtn.grid(row=3, column=0, columnspan=2)

reDraw.rawDat = mat(loadDataSet('sine.txt'))
reDraw.testDat = arange(min(reDraw.rawDat[:,0]),max(reDraw.rawDat[:,0]),0.01)
reDraw(1.0, 10)
               
root.mainloop()

  del sys.path[0]
