In [1]:
from xclib.data import data_utils
import pandas as pd
import sys
import numpy as np
import math
import statistics 
import time
import pickle
from tqdm import tqdm
#Loading the data first
trainX = data_utils.read_sparse_file('train_x.txt').toarray()
trainY = pd.read_csv('train_y.txt',header=None).to_numpy()
testX = data_utils.read_sparse_file('test_x.txt').toarray()
testY = pd.read_csv('test_y.txt',header=None).to_numpy()
validX = data_utils.read_sparse_file('valid_x.txt').toarray()
validY = pd.read_csv('valid_y.txt',header=None).to_numpy()



In [2]:
print(trainX.shape)
print(testX.shape)
print(validX.shape)

(64713, 482)
(21571, 482)
(21572, 482)


# Tree with on the Go Accuracy

In [3]:
class Node:
    def __init__(self):
        self.nodeCount = 0
        self.featureIndex = None # These all have been initialized to None 
        self.isLeaf = True
        self.threshold = None #These are default values assigned which would change later.
        self.lChild = None
        self.rChild = None
        self.yLabel = None
        self.trainInd = None #Storing indexes of the data point coming to this node.
        self.testInd = None
        self.valiInd = None

In [4]:
def findEntropy(yPara):
    if yPara.size == 0:
        return 0
    
    y0, y1 = yPara[yPara==0], yPara[yPara==1]
    lP, rP = y1.size/yPara.size, y0.size/yPara.size
    
    if lP == 0 or rP == 0:
        return 0
    else:
        return -(lP * math.log2(lP)) - (rP * math.log2(rP)) 

In [5]:
def info_gain( index,thres, x, y):
    
    entropy = findEntropy(y)
    
    if entropy == 0:
        return -1
    else:
        
        lData = x[x[:,index] <= thres]
        rData = x[x[:,index] > thres]
        
        ly = y[ x[:,index] <= thres]
        ry = y[ x[:,index] > thres]
        
        
        #Returning -1 when data is either left skewed or right skewed
        
        if ly.size==0 or ry.size==0:
            return -1
        
        leftEntropy, rightEntropy = findEntropy(ly), findEntropy(ry)


        leftProba, rightProba = (ly.size/y.size), (ry.size/ y.size)
        condEntropy = leftProba * leftEntropy + rightProba * rightEntropy
        infoGain = entropy - condEntropy
        return infoGain

In [6]:
def findMedians(xPara):
    if xPara.shape[0]%2 == 0:
        dataToAppend = np.zeros(xPara.shape[1]).reshape(1,-1)
        xPara = np.append(xPara,dataToAppend,axis=0)
        med = np.median(xPara,axis =0)
        return med
    else:
        med = np.median(xPara,axis=0)
        return med

#We already have trainX, trainY, testX, testY, validX, validY
#This function will handle everything through indexes only.
nodeC = 0 #It is global to handle node number of the nodes of the tree

trainPredict, testPredict, validPredict = np.copy(trainY), np.copy(testY), np.copy(validY)
trainAccuracyDict, testAccuracyDict, validAccuracyDict =  {},{},{}

def decisionTreeWithGo(nodePara,trainIndex,testIndex,valiIndex):
    global nodeC,trainPredict,testPredict,validPredict
    global trainAccuracyDict, testAccuracyDict, validAccuracyDict
    
    trainXPara = trainX[trainIndex]
    trainYPara = trainY[trainIndex]
    
    medians = findMedians(trainXPara.copy())
    
    indexOfMedians = np.array(list(range(medians.size)))
    
    mutualInfo,index,thresholdPara, = -sys.maxsize, -1, -1
    
    for(indexP,medP) in tqdm(zip(indexOfMedians,medians)):
        
        infoGain = info_gain(indexP, medP, trainXPara, trainYPara)
        
        if infoGain != -1 and infoGain > mutualInfo:
            mutualInfo = infoGain
            index = indexP
            thresholdPara = medP
    
    #Now we'll update trainY, testY and validY
    
    nodePara.trainInd = trainIndex
    nodePara.testInd = testIndex
    nodePara.valiInd = valiIndex
    
    nodePara.yLabel = np.bincount(trainYPara.flatten()).argmax()
    nodeC += 1
    nodePara.nodeCount = nodeC
    
    if testIndex.size != 0:
        testPredict[testIndex] = nodePara.yLabel
        testAccuracy = np.sum(testPredict==testY)/testY.size * 100
        testAccuracyDict[nodePara.nodeCount] = testAccuracy
    else:
        testAccuracyDict[nodePara.nodeCount] = testAccuracyDict[nodePara.nodeCount - 1]
        
    if valiIndex.size != 0:
        validPredict[valiIndex] = nodePara.yLabel
        validAccuracy = np.sum(validPredict==validY)/validY.size * 100
        validAccuracyDict[nodePara.nodeCount] = validAccuracy
    else:
        validAccuracyDict[nodePara.nodeCount] = validAccuracyDict[nodePara.nodeCount - 1]
        
    # Now for the train one, if it is leaf it won't enter the else part and the this function call execution
    # will be over.
    
#     print("printing trainIndex here ")
#     print(trainIndex)
#     print("nodePara.yLabel is ",nodePara.yLabel)
    trainPredict[trainIndex]=nodePara.yLabel
    trainAccuracy = np.sum(trainPredict==trainY)/trainY.size * 100
    trainAccuracyDict[nodePara.nodeCount] = trainAccuracy
#     print("printing accuracy for debugging")
#     print(trainAccuracyDict[nodePara.nodeCount])
    
    if index == -1:
        z = 2 # Dummy basically doing nothing  
    else:
        nodePara.threshold = thresholdPara
        nodePara.featureIndex = index
        nodePara.isLeaf = False
        
        #Now we'll calculate indexes for left and right subTree of the given Node
        
        left, right = Node(), Node()       
                
        trainLeftIndex = np.intersect1d((np.array(trainIndex),) , np.where(trainX[:,index] <= thresholdPara))
        trainRightIndex = np.intersect1d((np.array(trainIndex),) , np.where(trainX[:,index] > thresholdPara))
        
        testLeftIndex = np.intersect1d((np.array(testIndex),) , np.where(testX[:,index] <= thresholdPara))
        testRightIndex = np.intersect1d((np.array(testIndex),) , np.where(testX[:,index] > thresholdPara))
        
        validLeftIndex = np.intersect1d((np.array(valiIndex),) , np.where(validX[:,index] <= thresholdPara))
        validRightIndex = np.intersect1d((np.array(valiIndex),) , np.where(validX[:,index] > thresholdPara))
        
        yLeft = trainY[trainLeftIndex]
        yRight = trainY[trainRightIndex]
        
        lEntropy = findEntropy(yLeft)
        rEntropy = findEntropy(yRight)
        
        left.trainInd,left.testInd, left.valiInd = trainLeftIndex, testLeftIndex, validLeftIndex
        right.trainInd, right.testInd, right.valiInd = trainRightIndex, testRightIndex, validRightIndex
        
        nodePara.lChild, nodePara.rChild  = left, right
        
        if lEntropy==0 and rEntropy==0:
            nodePara.lChild.yLabel = np.bincount(yLeft.flatten()).argmax()
            nodeC+=1
            nodePara.lChild.nodeCount = nodeC
            
            #For Train Accuracy
            #It has come here, it means data will surely be partitioned, it won't be skewed.
            
            trainPredict[trainLeftIndex] = nodePara.lChild.yLabel
            trainAccuracy = np.sum(trainPredict==trainY)/trainY.size * 100
            trainAccuracyDict[nodePara.lChild.nodeCount] = trainAccuracy
            
            #For Test Accuracy
            if testLeftIndex.size == 0:
                testAccuracyDict[nodePara.lChild.nodeCount] = testAccuracyDict[nodePara.lChild.nodeCount - 1]
            else:
                testPredict[testLeftIndex]=nodePara.lChild.yLabel
                testAccuracy = np.sum(testPredict==testY)/testY.size * 100
                testAccuracyDict[nodePara.lChild.nodeCount] = testAccuracy
            
            #For Validate Accuracy
            if validLeftIndex.size == 0:
                validAccuracyDict[nodePara.lChild.nodeCount] = validAccuracyDict[nodePara.lChild.nodeCount -1]
            else:
                validPredict[validLeftIndex]=nodePara.lChild.yLabel
                validAccuracy = np.sum(validPredict==validY)/validY.size * 100
                validAccuracyDict[nodePara.lChild.nodeCount] = validAccuracy
            
            #******************Now for the  right subTree part*************************************
            
            nodePara.rChild.yLabel = np.bincount(yRight.flatten()).argmax()
            nodeC+=1
            nodePara.rChild.nodeCount = nodeC
            
            #For Train Accuracy
            #It has come here, it means data will surely be partitioned, it won't be skewed.
            
            trainPredict[trainRightIndex] = nodePara.rChild.yLabel
            trainAccuracy = np.sum(trainPredict==trainY)/trainY.size * 100
            trainAccuracyDict[nodePara.rChild.nodeCount] = trainAccuracy
            
            #For Test Accuracy
            if testRightIndex.size == 0:
                testAccuracyDict[nodePara.rChild.nodeCount] = testAccuracyDict[nodePara.rChild.nodeCount - 1]
            else:
                testPredict[testRightIndex]=nodePara.rChild.yLabel
                testAccuracy = np.sum(testPredict==testY)/testY.size * 100
                testAccuracyDict[nodePara.rChild.nodeCount] = testAccuracy
            
            #For Validate Accuracy
            if validRightIndex.size == 0:
                validAccuracyDict[nodePara.rChild.nodeCount] = validAccuracyDict[nodePara.rChild.nodeCount -1]
            else:
                validPredict[validRightIndex]=nodePara.rChild.yLabel
                validAccuracy = np.sum(validPredict==validY)/validY.size * 100
                validAccuracyDict[nodePara.rChild.nodeCount] = validAccuracy
            
        elif lEntropy==0:
            nodePara.lChild.yLabel = np.bincount(yLeft.flatten()).argmax()
            nodeC+=1
            nodePara.lChild.nodeCount = nodeC
            
            #For Train Accuracy
            #It has come here, it means data will surely be partitioned, it won't be skewed.
            
            trainPredict[trainLeftIndex] = nodePara.lChild.yLabel
            trainAccuracy = np.sum(trainPredict==trainY)/trainY.size * 100
            trainAccuracyDict[nodePara.lChild.nodeCount] = trainAccuracy
            
            #For Test Accuracy
            if testLeftIndex.size == 0:
                testAccuracyDict[nodePara.lChild.nodeCount] = testAccuracyDict[nodePara.lChild.nodeCount - 1]
            else:
                testPredict[testLeftIndex]=nodePara.lChild.yLabel
                testAccuracy = np.sum(testPredict==testY)/testY.size * 100
                testAccuracyDict[nodePara.lChild.nodeCount] = testAccuracy
            
            #For Validate Accuracy
            if validLeftIndex.size == 0:
                validAccuracyDict[nodePara.lChild.nodeCount] = validAccuracyDict[nodePara.lChild.nodeCount -1]
            else:
                validPredict[validLeftIndex]=nodePara.lChild.yLabel
                validAccuracy = np.sum(validPredict==validY)/validY.size * 100
                validAccuracyDict[nodePara.lChild.nodeCount] = validAccuracy
            
            #***************Now calling the function recursively for right subtree part*************
            
            decisionTreeWithGo(right,trainRightIndex,testRightIndex,validRightIndex)
            
        elif rEntropy==0:
            nodePara.rChild.yLabel = np.bincount(yRight.flatten()).argmax()
            nodeC+=1
            nodePara.rChild.nodeCount = nodeC
            
            #For Train Accuracy
            #It has come here, it means data will surely be partitioned, it won't be skewed.
            
            trainPredict[trainRightIndex] = nodePara.rChild.yLabel
            trainAccuracy = np.sum(trainPredict==trainY)/trainY.size * 100
            trainAccuracyDict[nodePara.rChild.nodeCount] = trainAccuracy
            
            #For Test Accuracy
            if testRightIndex.size == 0:
                testAccuracyDict[nodePara.rChild.nodeCount] = testAccuracyDict[nodePara.rChild.nodeCount - 1]
            else:
                testPredict[testRightIndex]=nodePara.rChild.yLabel
                testAccuracy = np.sum(testPredict==testY)/testY.size * 100
                testAccuracyDict[nodePara.rChild.nodeCount] = testAccuracy
            
            #For Validate Accuracy
            if validRightIndex.size == 0:
                validAccuracyDict[nodePara.rChild.nodeCount] = validAccuracyDict[nodePara.rChild.nodeCount -1]
            else:
                validPredict[validRightIndex]=nodePara.rChild.yLabel
                validAccuracy = np.sum(validPredict==validY)/validY.size * 100
                validAccuracyDict[nodePara.rChild.nodeCount] = validAccuracy
            
            #********Now calling the function recursively for left subTree
            decisionTreeWithGo(left,trainLeftIndex,testLeftIndex,validLeftIndex)
        else:
            #Now here we would call the function recursively for both left and right subtree
            decisionTreeWithGo(left,trainLeftIndex,testLeftIndex,validLeftIndex)
            decisionTreeWithGo(right,trainRightIndex,testRightIndex,validRightIndex)


In [7]:
trainIndex = np.array(list(range(trainX.shape[0])))
testIndex = np.array(list(range(testX.shape[0])))
validIndex = np.array(list(range(validX.shape[0])))

root = Node()
start = time.time()
decisionTreeWithGo(root,trainIndex,testIndex,validIndex)
print("Time it took to build the decision tree is ",time.time()-start)

482it [00:28, 17.12it/s]
482it [00:21, 22.04it/s]
482it [00:18, 25.73it/s]
482it [00:16, 29.09it/s]
482it [00:14, 32.54it/s]
482it [00:06, 79.36it/s]
482it [00:04, 97.65it/s] 
482it [00:04, 109.70it/s]
482it [00:04, 110.67it/s]
482it [00:04, 114.55it/s]
482it [00:03, 130.47it/s]
482it [00:03, 134.82it/s]
482it [00:02, 187.78it/s]
482it [00:01, 299.91it/s]
482it [00:01, 299.43it/s]
482it [00:01, 288.08it/s]
482it [00:01, 329.66it/s]
482it [00:01, 335.83it/s]
482it [00:01, 334.30it/s]
482it [00:01, 333.24it/s]
482it [00:01, 322.84it/s]
482it [00:01, 329.26it/s]
482it [00:01, 333.19it/s]
482it [00:01, 323.09it/s]
482it [00:01, 331.18it/s]
482it [00:01, 334.28it/s]
482it [00:01, 324.22it/s]
482it [00:00, 22776.64it/s]
482it [00:00, 21890.28it/s]
482it [00:00, 23925.19it/s]
482it [00:00, 25057.38it/s]
482it [00:00, 21406.99it/s]
482it [00:00, 25849.72it/s]
482it [00:00, 26083.17it/s]
482it [00:00, 20667.51it/s]
482it [00:00, 19486.39it/s]
482it [00:00, 22235.78it/s]
482it [00:00, 7254.86it/

482it [00:00, 14856.48it/s]
482it [00:00, 12126.70it/s]
482it [00:00, 20024.11it/s]
482it [00:00, 24111.23it/s]
482it [00:00, 20366.85it/s]
482it [00:00, 20495.07it/s]
482it [00:00, 28571.39it/s]
482it [00:00, 27967.83it/s]
482it [00:00, 20442.64it/s]
482it [00:00, 20474.94it/s]
482it [00:00, 23747.85it/s]
482it [00:00, 26645.59it/s]
482it [00:00, 19270.74it/s]
482it [00:00, 19571.28it/s]
482it [00:00, 17761.54it/s]
482it [00:00, 22428.19it/s]
482it [00:00, 30956.64it/s]
482it [00:00, 20809.83it/s]
482it [00:00, 20891.55it/s]
482it [00:00, 10145.86it/s]
482it [00:00, 11948.31it/s]
482it [00:00, 12504.05it/s]
482it [00:00, 11997.52it/s]
482it [00:00, 13830.56it/s]
482it [00:00, 13613.10it/s]
482it [00:00, 14690.23it/s]
482it [00:00, 12822.88it/s]
482it [00:00, 13390.30it/s]
482it [00:00, 13676.37it/s]
482it [00:00, 15236.73it/s]
482it [00:00, 13266.15it/s]
482it [00:00, 13420.35it/s]
482it [00:00, 13338.80it/s]
482it [00:00, 14268.96it/s]
482it [00:00, 14795.16it/s]
482it [00:00, 17066.

482it [00:00, 20616.30it/s]
482it [00:00, 26359.67it/s]
482it [00:00, 26680.76it/s]
482it [00:00, 20561.15it/s]
482it [00:00, 24347.31it/s]
482it [00:00, 20452.98it/s]
482it [00:00, 26024.08it/s]
482it [00:00, 24667.86it/s]
482it [00:00, 22297.33it/s]
482it [00:00, 20653.15it/s]
482it [00:00, 25474.16it/s]
482it [00:00, 22427.69it/s]
482it [00:00, 23722.21it/s]
482it [00:00, 20662.65it/s]
482it [00:00, 24245.11it/s]
482it [00:00, 1848.45it/s]
482it [00:00, 1886.95it/s]
482it [00:00, 5641.80it/s]
482it [00:00, 6045.36it/s]
482it [00:00, 10053.13it/s]
482it [00:00, 9333.20it/s]
482it [00:00, 10015.73it/s]
482it [00:00, 13999.21it/s]
482it [00:00, 14221.78it/s]
482it [00:00, 18900.50it/s]
482it [00:00, 19413.23it/s]
482it [00:00, 18220.82it/s]
482it [00:00, 19843.10it/s]
482it [00:00, 20111.96it/s]
482it [00:00, 19039.34it/s]
482it [00:00, 17437.83it/s]
482it [00:00, 21354.98it/s]
482it [00:00, 19550.08it/s]
482it [00:00, 25178.15it/s]
482it [00:00, 19906.01it/s]
482it [00:00, 19809.46it/

482it [00:00, 21052.54it/s]
482it [00:00, 21704.38it/s]
482it [00:00, 23185.71it/s]
482it [00:00, 18961.84it/s]
482it [00:00, 4086.48it/s]
482it [00:00, 3984.31it/s]
482it [00:00, 7760.34it/s]
482it [00:00, 7807.42it/s]
482it [00:00, 7980.23it/s]
482it [00:00, 12140.90it/s]
482it [00:00, 12964.14it/s]
482it [00:00, 17103.82it/s]
482it [00:00, 19788.71it/s]
482it [00:00, 20902.14it/s]
482it [00:00, 21396.57it/s]
482it [00:00, 26294.87it/s]
482it [00:00, 21025.39it/s]
482it [00:00, 22641.70it/s]
482it [00:00, 21569.61it/s]
482it [00:00, 25989.62it/s]
482it [00:00, 18610.46it/s]
482it [00:00, 22133.05it/s]
482it [00:00, 26936.00it/s]
482it [00:00, 23130.00it/s]
482it [00:00, 22455.59it/s]
482it [00:00, 21303.67it/s]
482it [00:00, 20405.50it/s]
482it [00:00, 24876.08it/s]
482it [00:00, 23436.48it/s]
482it [00:00, 23478.67it/s]
482it [00:00, 23367.14it/s]
482it [00:00, 25391.29it/s]
482it [00:00, 21304.57it/s]
482it [00:00, 15752.82it/s]
482it [00:00, 22650.32it/s]
482it [00:00, 18776.92it/

482it [00:00, 18757.75it/s]
482it [00:00, 24720.65it/s]
482it [00:00, 22225.75it/s]
482it [00:00, 23076.93it/s]
482it [00:00, 21875.83it/s]
482it [00:00, 18066.62it/s]
482it [00:00, 22123.84it/s]
482it [00:00, 16771.09it/s]
482it [00:00, 14447.30it/s]
482it [00:00, 22532.68it/s]
482it [00:00, 23389.85it/s]
482it [00:00, 23305.72it/s]
482it [00:00, 13055.15it/s]
482it [00:00, 16533.94it/s]
482it [00:00, 19872.75it/s]
482it [00:00, 26984.18it/s]
482it [00:00, 25119.03it/s]
482it [00:00, 21336.50it/s]
482it [00:00, 22568.40it/s]
482it [00:00, 19225.84it/s]
482it [00:00, 23083.78it/s]
482it [00:00, 23668.61it/s]
482it [00:00, 6833.91it/s]
482it [00:00, 6896.93it/s]
482it [00:00, 8094.68it/s]
482it [00:00, 8053.44it/s]
482it [00:00, 8029.51it/s]
482it [00:00, 8616.31it/s]
482it [00:00, 13663.61it/s]
482it [00:00, 16140.70it/s]
482it [00:00, 12909.59it/s]
482it [00:00, 16068.21it/s]
482it [00:00, 18690.12it/s]
482it [00:00, 19488.83it/s]
482it [00:00, 19234.07it/s]
482it [00:00, 28711.38it/s

482it [00:00, 24635.40it/s]
482it [00:00, 23066.14it/s]
482it [00:00, 25422.90it/s]
482it [00:00, 22587.06it/s]
482it [00:00, 20409.62it/s]
482it [00:00, 24509.95it/s]
482it [00:00, 25037.52it/s]
482it [00:00, 22250.95it/s]
482it [00:00, 20458.36it/s]
482it [00:00, 23652.55it/s]
482it [00:00, 24811.67it/s]
482it [00:00, 19396.65it/s]
482it [00:00, 21264.23it/s]
482it [00:00, 23156.23it/s]
482it [00:00, 25670.83it/s]
482it [00:00, 21053.42it/s]
482it [00:00, 21623.59it/s]
482it [00:00, 20710.70it/s]
482it [00:01, 394.60it/s]
482it [00:00, 589.52it/s]
482it [00:00, 638.51it/s]
482it [00:00, 743.97it/s]
482it [00:00, 745.07it/s]
482it [00:00, 1309.87it/s]
482it [00:00, 1484.81it/s]
482it [00:00, 1761.56it/s]
482it [00:00, 3711.94it/s]
482it [00:00, 4055.00it/s]
482it [00:00, 5310.44it/s]
482it [00:00, 6053.42it/s]
482it [00:00, 6936.87it/s]
482it [00:00, 7689.36it/s]
482it [00:00, 7208.04it/s]
482it [00:00, 8110.98it/s]
482it [00:00, 8298.39it/s]
482it [00:00, 9199.50it/s]
482it [00:00, 9

482it [00:00, 24560.28it/s]
482it [00:00, 23209.66it/s]
482it [00:00, 19677.38it/s]
482it [00:00, 19280.85it/s]
482it [00:00, 22815.71it/s]
482it [00:00, 22632.32it/s]
482it [00:00, 23479.76it/s]
482it [00:00, 27337.39it/s]
482it [00:00, 18532.33it/s]
482it [00:00, 15568.43it/s]
482it [00:00, 17363.69it/s]
482it [00:00, 19802.28it/s]
482it [00:00, 25651.28it/s]
482it [00:00, 26295.21it/s]
482it [00:00, 24184.50it/s]
482it [00:00, 21303.67it/s]
482it [00:00, 14730.15it/s]
482it [00:00, 15578.75it/s]
482it [00:00, 19578.67it/s]
482it [00:00, 19130.87it/s]
482it [00:00, 25303.26it/s]
482it [00:00, 32317.00it/s]
482it [00:00, 18800.49it/s]
482it [00:00, 23513.35it/s]
482it [00:00, 22421.47it/s]
482it [00:00, 22065.41it/s]
482it [00:00, 22073.60it/s]
482it [00:00, 19445.90it/s]
482it [00:00, 26151.67it/s]
482it [00:00, 24948.84it/s]
482it [00:00, 22745.38it/s]
482it [00:00, 20126.18it/s]
482it [00:00, 20149.25it/s]
482it [00:00, 23540.46it/s]
482it [00:00, 21784.60it/s]
482it [00:00, 7587.1

482it [00:00, 21184.91it/s]
482it [00:00, 27613.70it/s]
482it [00:00, 27157.81it/s]
482it [00:00, 21890.51it/s]
482it [00:00, 23278.61it/s]
482it [00:00, 19422.18it/s]
482it [00:00, 20029.87it/s]
482it [00:00, 24199.84it/s]
482it [00:00, 24105.77it/s]
482it [00:00, 21573.52it/s]
482it [00:00, 19253.30it/s]
482it [00:00, 24801.01it/s]
482it [00:00, 23863.62it/s]
482it [00:00, 27349.59it/s]
482it [00:00, 22473.07it/s]
482it [00:00, 32359.93it/s]
482it [00:00, 21359.27it/s]
482it [00:00, 12844.14it/s]
482it [00:00, 15368.77it/s]
482it [00:00, 17638.19it/s]
482it [00:00, 20597.81it/s]
482it [00:00, 23562.68it/s]
482it [00:00, 19360.98it/s]
482it [00:00, 23105.42it/s]
482it [00:00, 23082.20it/s]
482it [00:00, 24073.62it/s]
482it [00:00, 20082.20it/s]
482it [00:00, 24877.62it/s]
482it [00:00, 27568.52it/s]
482it [00:00, 23758.18it/s]
482it [00:00, 20709.22it/s]
482it [00:00, 19218.71it/s]
482it [00:00, 20662.23it/s]
482it [00:00, 21410.39it/s]
482it [00:00, 22661.75it/s]
482it [00:00, 27802.

482it [00:00, 26347.98it/s]
482it [00:00, 24009.01it/s]
482it [00:00, 26641.03it/s]
482it [00:00, 18871.74it/s]
482it [00:00, 27254.47it/s]
482it [00:00, 27243.08it/s]
482it [00:00, 25782.79it/s]
482it [00:00, 21749.68it/s]
482it [00:00, 21634.70it/s]
482it [00:00, 27369.22it/s]
482it [00:00, 18839.04it/s]
482it [00:00, 24708.56it/s]
482it [00:00, 20666.03it/s]
482it [00:00, 28403.20it/s]
482it [00:00, 20941.76it/s]
482it [00:00, 19371.19it/s]
482it [00:00, 24860.18it/s]
482it [00:00, 20813.48it/s]
482it [00:00, 20637.55it/s]
482it [00:00, 11652.59it/s]
482it [00:00, 18666.14it/s]
482it [00:00, 21005.95it/s]
482it [00:00, 22066.37it/s]
482it [00:00, 28716.28it/s]
482it [00:00, 19862.59it/s]
482it [00:00, 23586.32it/s]
482it [00:00, 24261.12it/s]
482it [00:00, 22459.33it/s]
482it [00:00, 29608.73it/s]
482it [00:00, 23117.04it/s]
482it [00:00, 8260.72it/s]
482it [00:00, 9581.71it/s]
482it [00:00, 10388.77it/s]
482it [00:00, 11345.69it/s]
482it [00:00, 16746.78it/s]
482it [00:00, 17040.10

482it [00:00, 26788.95it/s]
482it [00:00, 24582.97it/s]
482it [00:00, 23166.84it/s]
482it [00:00, 22705.01it/s]
482it [00:00, 24132.24it/s]
482it [00:00, 26824.85it/s]
482it [00:00, 20326.31it/s]
482it [00:00, 25360.08it/s]
482it [00:00, 21644.43it/s]
482it [00:00, 24314.81it/s]
482it [00:00, 21330.42it/s]
482it [00:00, 21829.53it/s]
482it [00:00, 30280.15it/s]
482it [00:00, 19533.65it/s]
482it [00:00, 23901.43it/s]
482it [00:00, 22339.46it/s]
482it [00:00, 23102.51it/s]
482it [00:00, 14252.96it/s]
482it [00:00, 19032.89it/s]
482it [00:00, 26212.02it/s]
482it [00:00, 23311.09it/s]
482it [00:00, 24104.62it/s]
482it [00:00, 27493.16it/s]
482it [00:00, 29844.32it/s]
482it [00:00, 21513.83it/s]
482it [00:00, 22338.97it/s]
482it [00:00, 30209.57it/s]
482it [00:00, 22799.24it/s]
482it [00:00, 23427.79it/s]
482it [00:00, 20040.59it/s]
482it [00:00, 14894.13it/s]
482it [00:00, 10104.74it/s]
482it [00:00, 12387.44it/s]
482it [00:00, 15512.17it/s]
482it [00:00, 13132.74it/s]
482it [00:00, 16696.

482it [00:00, 28949.02it/s]
482it [00:00, 22892.44it/s]
482it [00:00, 16847.96it/s]
482it [00:00, 21498.73it/s]
482it [00:00, 23278.88it/s]
482it [00:00, 20143.42it/s]
482it [00:00, 22373.58it/s]
482it [00:00, 25560.15it/s]
482it [00:00, 20058.09it/s]
482it [00:00, 21524.37it/s]
482it [00:00, 22705.27it/s]
482it [00:00, 20159.29it/s]
482it [00:00, 19413.23it/s]
482it [00:00, 21763.26it/s]
482it [00:00, 23730.85it/s]
482it [00:00, 25014.59it/s]
482it [00:00, 23485.49it/s]
482it [00:00, 13866.70it/s]
482it [00:00, 14110.60it/s]
482it [00:00, 16724.61it/s]
482it [00:00, 14548.67it/s]
482it [00:00, 15185.68it/s]
482it [00:00, 17542.99it/s]
482it [00:00, 20319.57it/s]
482it [00:00, 21779.91it/s]
482it [00:00, 19919.74it/s]
482it [00:00, 23434.31it/s]
482it [00:00, 19427.22it/s]
482it [00:00, 22421.47it/s]
482it [00:00, 25112.47it/s]
482it [00:00, 27624.27it/s]
482it [00:00, 21190.46it/s]
482it [00:00, 29581.87it/s]
482it [00:00, 22662.76it/s]
482it [00:00, 21582.04it/s]
482it [00:00, 21264.

482it [00:00, 18076.31it/s]
482it [00:00, 22361.95it/s]
482it [00:00, 22505.34it/s]
482it [00:00, 26482.94it/s]
482it [00:00, 26184.86it/s]
482it [00:00, 22988.50it/s]
482it [00:00, 24582.38it/s]
482it [00:00, 22488.82it/s]
482it [00:00, 22406.31it/s]
482it [00:00, 20515.25it/s]
482it [00:00, 20634.39it/s]
482it [00:00, 20468.93it/s]
482it [00:00, 28497.69it/s]
482it [00:00, 28121.50it/s]
482it [00:00, 23719.71it/s]
482it [00:00, 25836.83it/s]
482it [00:00, 20375.68it/s]
482it [00:00, 25157.79it/s]
482it [00:00, 24652.22it/s]
482it [00:00, 22756.13it/s]
482it [00:00, 19162.97it/s]
482it [00:00, 20928.53it/s]
482it [00:00, 24908.27it/s]
482it [00:00, 18647.89it/s]
482it [00:00, 21259.09it/s]
482it [00:00, 23053.51it/s]
482it [00:00, 21307.94it/s]
482it [00:00, 22372.10it/s]
482it [00:00, 21928.03it/s]
482it [00:00, 31908.94it/s]
482it [00:00, 22560.34it/s]
482it [00:00, 18304.63it/s]
482it [00:00, 21211.14it/s]
482it [00:00, 21684.59it/s]
482it [00:00, 20508.17it/s]
482it [00:00, 24753.

482it [00:00, 22116.58it/s]
482it [00:00, 20416.01it/s]
482it [00:00, 21494.39it/s]
482it [00:00, 18828.16it/s]
482it [00:00, 23090.11it/s]
482it [00:00, 22669.63it/s]
482it [00:00, 3820.69it/s]
482it [00:00, 6582.13it/s]
482it [00:00, 10085.63it/s]
482it [00:00, 10923.55it/s]
482it [00:00, 16074.47it/s]
482it [00:00, 21548.00it/s]
482it [00:00, 27181.18it/s]
482it [00:00, 30266.55it/s]
482it [00:00, 20219.38it/s]
482it [00:00, 21430.13it/s]
482it [00:00, 20877.10it/s]
482it [00:00, 26248.10it/s]
482it [00:00, 22858.02it/s]
482it [00:00, 22843.81it/s]
482it [00:00, 23570.65it/s]
482it [00:00, 16972.43it/s]
482it [00:00, 19028.23it/s]
482it [00:00, 24940.84it/s]
482it [00:00, 24225.94it/s]
482it [00:00, 23637.62it/s]
482it [00:00, 24018.71it/s]
482it [00:00, 25009.64it/s]
482it [00:00, 20927.45it/s]
482it [00:00, 23970.29it/s]
482it [00:00, 19552.54it/s]
482it [00:00, 22135.95it/s]
482it [00:00, 20937.85it/s]
482it [00:00, 22710.37it/s]
482it [00:00, 19115.49it/s]
482it [00:00, 14351.92

482it [00:00, 21099.78it/s]
482it [00:00, 21460.39it/s]
482it [00:00, 23571.75it/s]
482it [00:00, 27729.60it/s]
482it [00:00, 26260.03it/s]
482it [00:00, 20975.22it/s]
482it [00:00, 22494.82it/s]
482it [00:00, 22888.81it/s]
482it [00:00, 25663.98it/s]
482it [00:00, 21964.48it/s]
482it [00:00, 18230.68it/s]
482it [00:00, 20592.57it/s]
482it [00:00, 27447.99it/s]
482it [00:00, 26396.84it/s]
482it [00:00, 28211.36it/s]
482it [00:00, 21525.97it/s]
482it [00:00, 22076.73it/s]
482it [00:00, 21188.90it/s]
482it [00:00, 26356.92it/s]
482it [00:00, 22921.77it/s]
482it [00:00, 20353.11it/s]
482it [00:00, 24344.38it/s]
482it [00:00, 27359.22it/s]
482it [00:00, 28370.51it/s]
482it [00:00, 25526.59it/s]
482it [00:00, 19696.56it/s]
482it [00:00, 27628.80it/s]
482it [00:00, 24146.08it/s]
482it [00:00, 22932.17it/s]
482it [00:00, 25279.85it/s]
482it [00:00, 23972.85it/s]
482it [00:00, 24839.71it/s]
482it [00:00, 25939.93it/s]
482it [00:00, 20280.84it/s]
482it [00:00, 21843.21it/s]
482it [00:00, 17762.

482it [00:00, 24232.33it/s]
482it [00:00, 21022.77it/s]
482it [00:00, 21998.18it/s]
482it [00:00, 21899.05it/s]
482it [00:00, 23299.81it/s]
482it [00:00, 22102.56it/s]
482it [00:00, 16147.79it/s]
482it [00:00, 19864.35it/s]
482it [00:00, 20355.78it/s]
482it [00:00, 23255.32it/s]
482it [00:00, 21688.78it/s]
482it [00:00, 23049.04it/s]
482it [00:00, 24203.32it/s]
482it [00:00, 24540.00it/s]
482it [00:00, 18804.34it/s]
482it [00:00, 24408.16it/s]
482it [00:00, 23289.61it/s]
482it [00:00, 21752.49it/s]
482it [00:00, 19954.15it/s]
482it [00:00, 20686.97it/s]
482it [00:00, 20709.85it/s]
482it [00:00, 21768.88it/s]
482it [00:00, 26569.95it/s]
482it [00:00, 25417.47it/s]
482it [00:00, 26325.00it/s]
482it [00:00, 21871.09it/s]
482it [00:00, 24710.68it/s]
482it [00:00, 20910.57it/s]
482it [00:00, 29766.55it/s]
482it [00:00, 23279.42it/s]
482it [00:00, 20011.82it/s]
482it [00:00, 19586.83it/s]
482it [00:00, 22902.30it/s]
482it [00:00, 24433.23it/s]
482it [00:00, 23620.77it/s]
482it [00:00, 22981.

482it [00:00, 19049.39it/s]
482it [00:00, 20075.81it/s]
482it [00:00, 25708.37it/s]
482it [00:00, 21075.37it/s]
482it [00:00, 28704.45it/s]
482it [00:00, 23827.62it/s]
482it [00:00, 22074.56it/s]
482it [00:00, 25964.25it/s]
482it [00:00, 19173.14it/s]
482it [00:00, 21621.74it/s]
482it [00:00, 25941.93it/s]
482it [00:00, 23509.52it/s]
482it [00:00, 24065.31it/s]
482it [00:00, 21839.43it/s]
482it [00:00, 19969.92it/s]
482it [00:00, 18019.92it/s]
482it [00:00, 25753.89it/s]
482it [00:00, 27364.40it/s]
482it [00:00, 20383.28it/s]
482it [00:00, 20191.51it/s]
482it [00:00, 19895.43it/s]
482it [00:00, 19292.07it/s]
482it [00:00, 23106.21it/s]
482it [00:00, 24740.01it/s]
482it [00:00, 26177.74it/s]
482it [00:00, 20328.56it/s]
482it [00:00, 22014.47it/s]
482it [00:00, 24114.97it/s]
482it [00:00, 22649.05it/s]
482it [00:00, 27140.67it/s]
482it [00:00, 19013.92it/s]
482it [00:00, 26221.20it/s]
482it [00:00, 24474.05it/s]
482it [00:00, 23465.59it/s]
482it [00:00, 20000.14it/s]
482it [00:00, 17825.

482it [00:00, 21886.01it/s]
482it [00:00, 27375.15it/s]
482it [00:00, 21169.82it/s]
482it [00:00, 24625.19it/s]
482it [00:00, 19737.52it/s]
482it [00:00, 21520.70it/s]
482it [00:00, 25574.38it/s]
482it [00:00, 22926.71it/s]
482it [00:00, 30519.69it/s]
482it [00:00, 21710.91it/s]
482it [00:00, 13965.56it/s]
482it [00:00, 15590.40it/s]
482it [00:00, 17197.23it/s]
482it [00:00, 19865.91it/s]
482it [00:00, 20329.99it/s]
482it [00:00, 24796.75it/s]
482it [00:00, 24053.57it/s]
482it [00:00, 23399.86it/s]
482it [00:00, 21759.28it/s]
482it [00:00, 21274.08it/s]
482it [00:00, 23879.97it/s]
482it [00:00, 18457.04it/s]
482it [00:00, 26769.44it/s]
482it [00:00, 19505.94it/s]
482it [00:00, 19834.14it/s]
482it [00:00, 28914.24it/s]
482it [00:00, 24654.02it/s]
482it [00:00, 30697.64it/s]
482it [00:00, 27931.89it/s]
482it [00:00, 29933.14it/s]
482it [00:00, 24354.64it/s]
482it [00:00, 18929.35it/s]
482it [00:00, 21265.58it/s]
482it [00:00, 19711.34it/s]
482it [00:00, 25698.24it/s]
482it [00:00, 30726.

482it [00:00, 27019.52it/s]
482it [00:00, 25970.26it/s]
482it [00:00, 24553.71it/s]
482it [00:00, 10058.63it/s]
482it [00:00, 12047.00it/s]
482it [00:00, 13562.87it/s]
482it [00:00, 19789.29it/s]
482it [00:00, 19793.56it/s]
482it [00:00, 17342.54it/s]
482it [00:00, 23747.57it/s]
482it [00:00, 30352.44it/s]
482it [00:00, 18934.14it/s]
482it [00:00, 20577.26it/s]
482it [00:00, 23065.87it/s]
482it [00:00, 30455.32it/s]
482it [00:00, 26497.52it/s]
482it [00:00, 21761.62it/s]
482it [00:00, 25031.94it/s]
482it [00:00, 22454.35it/s]
482it [00:00, 23689.14it/s]
482it [00:00, 21444.23it/s]
482it [00:00, 23040.64it/s]
482it [00:00, 22618.14it/s]
482it [00:00, 21501.93it/s]
482it [00:00, 27943.09it/s]
482it [00:00, 21017.74it/s]
482it [00:00, 21667.16it/s]
482it [00:00, 24476.42it/s]
482it [00:00, 24344.38it/s]
482it [00:00, 24379.61it/s]
482it [00:00, 24850.70it/s]
482it [00:00, 24261.99it/s]
482it [00:00, 25567.91it/s]
482it [00:00, 25638.60it/s]
482it [00:00, 20692.90it/s]
482it [00:00, 19344.

482it [00:00, 25541.10it/s]
482it [00:00, 23958.93it/s]
482it [00:00, 18854.32it/s]
482it [00:00, 21883.40it/s]
482it [00:00, 18917.84it/s]
482it [00:00, 20624.29it/s]
482it [00:00, 25517.56it/s]
482it [00:00, 20587.32it/s]
482it [00:00, 22654.64it/s]
482it [00:00, 24038.13it/s]
482it [00:00, 8215.04it/s]
482it [00:00, 12000.23it/s]
482it [00:00, 14840.12it/s]
482it [00:00, 16276.89it/s]
482it [00:00, 20409.62it/s]
482it [00:00, 21324.12it/s]
482it [00:00, 22594.63it/s]
482it [00:00, 26543.09it/s]
482it [00:00, 22701.45it/s]
482it [00:00, 19966.17it/s]
482it [00:00, 27271.38it/s]
482it [00:00, 18049.68it/s]
482it [00:00, 19209.94it/s]
482it [00:00, 22650.83it/s]
482it [00:00, 18427.09it/s]
482it [00:00, 22597.41it/s]
482it [00:00, 25545.29it/s]
482it [00:00, 24160.22it/s]
482it [00:00, 24105.48it/s]
482it [00:00, 20717.92it/s]
482it [00:00, 21392.72it/s]
482it [00:00, 22120.45it/s]
482it [00:00, 24063.59it/s]
482it [00:00, 19466.69it/s]
482it [00:00, 28013.17it/s]
482it [00:00, 34631.0

482it [00:00, 29944.67it/s]
482it [00:00, 25643.15it/s]
482it [00:00, 19426.10it/s]
482it [00:00, 20644.08it/s]
482it [00:00, 19135.76it/s]
482it [00:00, 20022.13it/s]
482it [00:00, 24206.50it/s]
482it [00:00, 21313.10it/s]
482it [00:00, 22577.47it/s]
482it [00:00, 22238.47it/s]
482it [00:00, 21570.30it/s]
482it [00:00, 22772.79it/s]
482it [00:00, 19888.58it/s]
482it [00:00, 25346.72it/s]
482it [00:00, 24189.41it/s]
482it [00:00, 19252.75it/s]
482it [00:00, 26348.67it/s]
482it [00:00, 24083.08it/s]
482it [00:00, 27911.45it/s]
482it [00:00, 22390.68it/s]
482it [00:00, 19894.85it/s]
482it [00:00, 21918.04it/s]
482it [00:00, 26923.44it/s]
482it [00:00, 28354.20it/s]
482it [00:00, 18256.52it/s]
482it [00:00, 24014.43it/s]
482it [00:00, 24839.71it/s]
482it [00:00, 22845.88it/s]
482it [00:00, 26448.29it/s]
482it [00:00, 22441.13it/s]
482it [00:00, 19179.33it/s]
482it [00:00, 23344.47it/s]
482it [00:00, 22830.91it/s]
482it [00:00, 32092.81it/s]
482it [00:00, 30780.84it/s]
482it [00:00, 19094.

482it [00:00, 22492.32it/s]
482it [00:00, 22032.46it/s]
482it [00:00, 23987.36it/s]
482it [00:00, 19402.61it/s]
482it [00:00, 20749.82it/s]
482it [00:00, 26035.81it/s]
482it [00:00, 25355.94it/s]
482it [00:00, 24666.96it/s]
482it [00:00, 23155.43it/s]
482it [00:00, 21735.42it/s]
482it [00:00, 23071.66it/s]
482it [00:00, 25505.01it/s]
482it [00:00, 28046.59it/s]
482it [00:00, 21493.02it/s]
482it [00:00, 25024.50it/s]
482it [00:00, 22647.78it/s]
482it [00:00, 27210.81it/s]
482it [00:00, 21456.52it/s]
482it [00:00, 21602.34it/s]
482it [00:00, 18897.50it/s]
482it [00:00, 20901.05it/s]
482it [00:00, 16368.74it/s]
482it [00:00, 19003.91it/s]
482it [00:00, 22357.75it/s]
482it [00:00, 22539.21it/s]
482it [00:00, 24097.44it/s]
482it [00:00, 26971.58it/s]
482it [00:00, 32325.27it/s]
482it [00:00, 21762.32it/s]
482it [00:00, 22465.82it/s]
482it [00:00, 21309.29it/s]
482it [00:00, 25911.67it/s]
482it [00:00, 24447.71it/s]
482it [00:00, 23217.66it/s]
482it [00:00, 20906.03it/s]
482it [00:00, 28555.

482it [00:00, 18243.51it/s]
482it [00:00, 19070.95it/s]
482it [00:00, 25356.58it/s]
482it [00:00, 18890.26it/s]
482it [00:00, 20693.74it/s]
482it [00:00, 18802.42it/s]
482it [00:00, 818.26it/s]
482it [00:00, 928.40it/s]
482it [00:00, 2037.53it/s]
482it [00:00, 4711.99it/s]
482it [00:00, 7590.19it/s]
482it [00:00, 8938.22it/s]
482it [00:00, 8489.53it/s]
482it [00:00, 8644.28it/s]
482it [00:00, 11381.78it/s]
482it [00:00, 16240.54it/s]
482it [00:00, 18638.26it/s]
482it [00:00, 22226.00it/s]
482it [00:00, 20613.35it/s]
482it [00:00, 22715.22it/s]
482it [00:00, 26567.86it/s]
482it [00:00, 24054.14it/s]
482it [00:00, 20949.57it/s]
482it [00:00, 15413.53it/s]
482it [00:00, 20868.69it/s]
482it [00:00, 20769.00it/s]
482it [00:00, 19176.78it/s]
482it [00:00, 11377.55it/s]
482it [00:00, 15145.98it/s]
482it [00:00, 18594.37it/s]
482it [00:00, 20931.13it/s]
482it [00:00, 23068.24it/s]
482it [00:00, 28745.27it/s]
482it [00:00, 26568.56it/s]
482it [00:00, 20838.15it/s]
482it [00:00, 23055.61it/s]
48

482it [00:00, 18850.63it/s]
482it [00:00, 22175.53it/s]
482it [00:00, 19903.86it/s]
482it [00:00, 22159.98it/s]
482it [00:00, 24381.08it/s]
482it [00:00, 20464.37it/s]
482it [00:00, 20653.36it/s]
482it [00:00, 22011.11it/s]
482it [00:00, 20578.94it/s]
482it [00:00, 21641.42it/s]
482it [00:00, 23536.35it/s]
482it [00:00, 18397.74it/s]
482it [00:00, 32277.27it/s]
482it [00:00, 19701.35it/s]
482it [00:00, 22873.79it/s]
482it [00:00, 23293.90it/s]
482it [00:00, 22950.14it/s]
482it [00:00, 21240.11it/s]
482it [00:00, 20302.63it/s]
482it [00:00, 22974.65it/s]
482it [00:00, 28027.15it/s]
482it [00:00, 23149.33it/s]
482it [00:00, 22014.23it/s]
482it [00:00, 20816.26it/s]
482it [00:00, 21235.87it/s]
482it [00:00, 25687.13it/s]
482it [00:00, 28264.22it/s]
482it [00:00, 24298.73it/s]
482it [00:00, 15078.76it/s]
482it [00:00, 15975.02it/s]
482it [00:00, 18212.12it/s]
482it [00:00, 19853.62it/s]
482it [00:00, 20553.42it/s]
482it [00:00, 28170.48it/s]
482it [00:00, 22859.57it/s]
482it [00:00, 25376.

482it [00:00, 20657.37it/s]
482it [00:00, 25887.45it/s]
482it [00:00, 25920.31it/s]
482it [00:00, 22054.33it/s]
482it [00:00, 22613.84it/s]
482it [00:00, 22044.95it/s]
482it [00:00, 21725.14it/s]
482it [00:00, 23833.80it/s]
482it [00:00, 23141.38it/s]
482it [00:00, 25547.23it/s]
482it [00:00, 18206.54it/s]
482it [00:00, 21607.88it/s]
482it [00:00, 17095.58it/s]
482it [00:00, 20315.69it/s]
482it [00:00, 29620.88it/s]
482it [00:00, 14439.05it/s]
482it [00:00, 20128.38it/s]
482it [00:00, 16835.20it/s]
482it [00:00, 19617.81it/s]
482it [00:00, 21899.76it/s]
482it [00:00, 26829.12it/s]
482it [00:00, 24168.88it/s]
482it [00:00, 22195.50it/s]
482it [00:00, 21798.93it/s]
482it [00:00, 21640.03it/s]
482it [00:00, 20783.52it/s]
482it [00:00, 23990.49it/s]
482it [00:00, 21369.88it/s]
482it [00:00, 20613.98it/s]
482it [00:00, 6104.08it/s]
482it [00:00, 6705.70it/s]
482it [00:00, 7289.55it/s]
482it [00:00, 7342.07it/s]
482it [00:00, 9758.01it/s]
482it [00:00, 14241.82it/s]
482it [00:00, 18444.74it/

482it [00:00, 25187.56it/s]
482it [00:00, 25910.34it/s]
482it [00:00, 19649.46it/s]
482it [00:00, 16073.96it/s]
482it [00:00, 14573.84it/s]
482it [00:00, 17142.40it/s]
482it [00:00, 20176.80it/s]
482it [00:00, 21830.94it/s]
482it [00:00, 25499.22it/s]
482it [00:00, 24298.44it/s]
482it [00:00, 21631.92it/s]
482it [00:00, 18398.08it/s]
482it [00:00, 30194.68it/s]
482it [00:00, 23576.42it/s]
482it [00:00, 21943.97it/s]
482it [00:00, 19122.91it/s]
482it [00:00, 22112.23it/s]
482it [00:00, 27761.97it/s]
482it [00:00, 23499.96it/s]
482it [00:00, 19866.69it/s]
482it [00:00, 26852.64it/s]
482it [00:00, 30900.81it/s]
482it [00:00, 20350.45it/s]
482it [00:00, 19248.35it/s]
482it [00:00, 22893.22it/s]
482it [00:00, 26257.30it/s]
482it [00:00, 25117.78it/s]
482it [00:00, 25184.74it/s]
482it [00:00, 24963.94it/s]
482it [00:00, 23277.81it/s]
482it [00:00, 25641.52it/s]
482it [00:00, 10742.97it/s]
482it [00:00, 14284.99it/s]
482it [00:00, 17856.93it/s]
482it [00:00, 18266.92it/s]
482it [00:00, 22308.

482it [00:00, 21952.55it/s]
482it [00:00, 23547.59it/s]
482it [00:00, 26828.41it/s]
482it [00:00, 25961.92it/s]
482it [00:00, 20181.43it/s]
482it [00:00, 22653.11it/s]
482it [00:00, 18717.12it/s]
482it [00:00, 18399.08it/s]
482it [00:00, 23869.82it/s]
482it [00:00, 19182.78it/s]
482it [00:00, 20247.93it/s]
482it [00:00, 22942.32it/s]
482it [00:00, 21624.75it/s]
482it [00:00, 22620.42it/s]
482it [00:00, 23912.74it/s]
482it [00:00, 25384.59it/s]
482it [00:00, 24853.45it/s]
482it [00:00, 18941.05it/s]
482it [00:00, 21301.43it/s]
482it [00:00, 24826.29it/s]
482it [00:00, 21446.73it/s]
482it [00:00, 22874.83it/s]
482it [00:00, 19641.44it/s]
482it [00:00, 19672.79it/s]
482it [00:00, 25604.18it/s]
482it [00:00, 21990.04it/s]
482it [00:00, 22217.69it/s]
482it [00:00, 23311.63it/s]
482it [00:00, 25325.77it/s]
482it [00:00, 25504.04it/s]
482it [00:00, 24126.20it/s]
482it [00:00, 24374.02it/s]
482it [00:00, 28245.66it/s]
482it [00:00, 20313.24it/s]
482it [00:00, 21958.75it/s]
482it [00:00, 18864.

482it [00:00, 23990.20it/s]
482it [00:00, 23748.97it/s]
482it [00:00, 19887.21it/s]
482it [00:00, 21664.38it/s]
482it [00:00, 22513.61it/s]
482it [00:00, 21288.19it/s]
482it [00:00, 18829.21it/s]
482it [00:00, 22557.32it/s]
482it [00:00, 19772.65it/s]
482it [00:00, 18352.32it/s]
482it [00:00, 24280.35it/s]
482it [00:00, 14680.20it/s]
482it [00:00, 12304.58it/s]
482it [00:00, 12419.55it/s]
482it [00:00, 17296.84it/s]
482it [00:00, 18095.08it/s]
482it [00:00, 16110.08it/s]
482it [00:00, 19345.98it/s]
482it [00:00, 12899.21it/s]
482it [00:00, 22820.35it/s]
482it [00:00, 21023.21it/s]
482it [00:00, 21783.66it/s]
482it [00:00, 20567.22it/s]
482it [00:00, 23031.97it/s]
482it [00:00, 25057.07it/s]
482it [00:00, 21385.70it/s]
482it [00:00, 28366.14it/s]
482it [00:00, 23945.88it/s]
482it [00:00, 26967.62it/s]
482it [00:00, 21688.31it/s]
482it [00:00, 20954.78it/s]
482it [00:00, 22976.22it/s]
482it [00:00, 20778.18it/s]
482it [00:00, 28418.37it/s]
482it [00:00, 23616.91it/s]
482it [00:00, 24612.

482it [00:00, 6668.89it/s]
482it [00:00, 7383.50it/s]
482it [00:00, 10880.75it/s]
482it [00:00, 12176.37it/s]
482it [00:00, 12878.09it/s]
482it [00:00, 14797.86it/s]
482it [00:00, 14174.81it/s]
482it [00:00, 19183.15it/s]
482it [00:00, 12085.74it/s]
482it [00:00, 14726.61it/s]
482it [00:00, 17192.40it/s]
482it [00:00, 16520.97it/s]
482it [00:00, 15678.43it/s]
482it [00:00, 29114.53it/s]
482it [00:00, 22376.55it/s]
482it [00:00, 16468.08it/s]
482it [00:00, 21076.68it/s]
482it [00:00, 23874.61it/s]
482it [00:00, 16527.05it/s]
482it [00:00, 17753.43it/s]
482it [00:00, 13627.32it/s]
482it [00:00, 18948.69it/s]
482it [00:00, 17064.12it/s]
482it [00:00, 14721.36it/s]
482it [00:00, 19406.14it/s]
482it [00:00, 25598.02it/s]
482it [00:00, 20913.38it/s]
482it [00:00, 14523.38it/s]
482it [00:00, 13928.23it/s]
482it [00:00, 15944.15it/s]
482it [00:00, 15014.48it/s]
482it [00:00, 16338.32it/s]
482it [00:00, 10794.42it/s]
482it [00:00, 13990.11it/s]
482it [00:00, 15159.04it/s]
482it [00:00, 16414.33

482it [00:00, 21212.03it/s]
482it [00:00, 23002.36it/s]
482it [00:00, 25145.58it/s]
482it [00:00, 19357.65it/s]
482it [00:00, 18964.86it/s]
482it [00:00, 20656.11it/s]
482it [00:00, 24290.56it/s]
482it [00:00, 22106.42it/s]
482it [00:00, 15634.05it/s]
482it [00:00, 13306.84it/s]
482it [00:00, 15157.10it/s]
482it [00:00, 22180.15it/s]
482it [00:00, 16772.76it/s]
482it [00:00, 16900.36it/s]
482it [00:00, 20816.26it/s]
482it [00:00, 20375.88it/s]
482it [00:00, 23425.89it/s]
482it [00:00, 18798.92it/s]
482it [00:00, 21919.23it/s]
482it [00:00, 26232.43it/s]
482it [00:00, 20705.61it/s]
482it [00:00, 19050.29it/s]
482it [00:00, 17877.14it/s]
482it [00:00, 22477.81it/s]
482it [00:00, 28382.46it/s]
482it [00:00, 24833.92it/s]
482it [00:00, 22764.07it/s]
482it [00:00, 22239.69it/s]
482it [00:00, 27603.15it/s]
482it [00:00, 18713.13it/s]
482it [00:00, 21802.93it/s]
482it [00:00, 17315.51it/s]
482it [00:00, 22602.21it/s]
482it [00:00, 24326.51it/s]
482it [00:00, 20548.19it/s]
482it [00:00, 27208.

482it [00:00, 24346.14it/s]
482it [00:00, 19245.97it/s]
482it [00:00, 18741.24it/s]
482it [00:00, 20979.79it/s]
482it [00:00, 17725.57it/s]
482it [00:00, 14289.53it/s]
482it [00:00, 16049.72it/s]
482it [00:00, 17647.89it/s]
482it [00:00, 17105.70it/s]
482it [00:00, 16035.59it/s]
482it [00:00, 20129.99it/s]
482it [00:00, 20873.00it/s]
482it [00:00, 23045.36it/s]
482it [00:00, 20051.32it/s]
482it [00:00, 21409.94it/s]
482it [00:00, 18587.36it/s]
482it [00:00, 18842.72it/s]
482it [00:00, 20763.88it/s]
482it [00:00, 21426.73it/s]
482it [00:00, 22531.42it/s]
482it [00:00, 24591.05it/s]
482it [00:00, 18071.30it/s]
482it [00:00, 19096.17it/s]
482it [00:00, 20494.24it/s]
482it [00:00, 23753.71it/s]
482it [00:00, 16623.67it/s]
482it [00:00, 15436.72it/s]
482it [00:00, 19027.69it/s]
482it [00:00, 21016.65it/s]
482it [00:00, 15956.86it/s]
482it [00:00, 15025.08it/s]
482it [00:00, 14972.22it/s]
482it [00:00, 18372.16it/s]
482it [00:00, 16805.11it/s]
482it [00:00, 12442.48it/s]
482it [00:00, 13190.

482it [00:00, 20438.92it/s]
482it [00:00, 14193.32it/s]
482it [00:00, 19471.56it/s]
482it [00:00, 21588.03it/s]
482it [00:00, 25806.82it/s]
482it [00:00, 28882.02it/s]
482it [00:00, 23630.16it/s]
482it [00:00, 24750.31it/s]
482it [00:00, 21891.94it/s]
482it [00:00, 23612.49it/s]
482it [00:00, 19686.58it/s]
482it [00:00, 16328.03it/s]
482it [00:00, 26476.35it/s]
482it [00:00, 20769.43it/s]
482it [00:00, 11524.39it/s]
482it [00:00, 9698.42it/s]
482it [00:00, 12537.31it/s]
482it [00:00, 13398.64it/s]
482it [00:00, 19378.80it/s]
482it [00:00, 20009.84it/s]
482it [00:00, 26260.03it/s]
482it [00:00, 25094.08it/s]
482it [00:00, 15225.71it/s]
482it [00:00, 16916.62it/s]
482it [00:00, 21873.46it/s]
482it [00:00, 20474.11it/s]
482it [00:00, 19228.03it/s]
482it [00:00, 20741.51it/s]
482it [00:00, 18853.27it/s]
482it [00:00, 24416.41it/s]
482it [00:00, 25424.50it/s]
482it [00:00, 19344.87it/s]
482it [00:00, 16929.65it/s]
482it [00:00, 21932.31it/s]
482it [00:00, 20585.23it/s]
482it [00:00, 22960.5

482it [00:00, 15455.37it/s]
482it [00:00, 17031.63it/s]
482it [00:00, 18137.45it/s]
482it [00:00, 8044.40it/s]
482it [00:00, 6022.37it/s]
482it [00:00, 9289.80it/s]
482it [00:00, 13678.96it/s]
482it [00:00, 13927.37it/s]
482it [00:00, 9264.00it/s]
482it [00:00, 12146.01it/s]
482it [00:00, 15001.11it/s]
482it [00:00, 13536.08it/s]
482it [00:00, 13271.28it/s]
482it [00:00, 14557.37it/s]
482it [00:00, 15302.93it/s]
482it [00:00, 13505.88it/s]
482it [00:00, 19135.40it/s]
482it [00:00, 19536.86it/s]
482it [00:00, 20635.02it/s]
482it [00:00, 21488.45it/s]
482it [00:00, 19032.71it/s]
482it [00:00, 19378.80it/s]
482it [00:00, 20287.14it/s]
482it [00:00, 22126.75it/s]
482it [00:00, 21548.68it/s]
482it [00:00, 24862.32it/s]
482it [00:00, 22075.53it/s]
482it [00:00, 19478.32it/s]
482it [00:00, 21189.35it/s]
482it [00:00, 25663.98it/s]
482it [00:00, 16368.74it/s]
482it [00:00, 17846.53it/s]
482it [00:00, 26392.70it/s]
482it [00:00, 23482.76it/s]
482it [00:00, 21208.91it/s]
482it [00:00, 25614.88it

482it [00:00, 17963.40it/s]
482it [00:00, 20863.09it/s]
482it [00:00, 25225.91it/s]
482it [00:00, 20782.88it/s]
482it [00:00, 20357.83it/s]
482it [00:00, 18849.75it/s]
482it [00:00, 19842.51it/s]
482it [00:00, 20938.50it/s]
482it [00:00, 17818.37it/s]
482it [00:00, 20802.77it/s]
482it [00:00, 23922.36it/s]
482it [00:00, 21925.17it/s]
482it [00:00, 23105.42it/s]
482it [00:00, 19507.26it/s]
482it [00:00, 17464.79it/s]
482it [00:00, 17644.51it/s]
482it [00:00, 21278.33it/s]
482it [00:00, 20958.91it/s]
482it [00:00, 24161.08it/s]
482it [00:00, 20583.34it/s]
482it [00:00, 26036.14it/s]
482it [00:00, 18856.61it/s]
482it [00:00, 22843.81it/s]
482it [00:00, 18089.74it/s]
482it [00:00, 20847.60it/s]
482it [00:00, 24672.68it/s]
482it [00:00, 19736.55it/s]
482it [00:00, 20479.09it/s]
482it [00:00, 22210.13it/s]
482it [00:00, 15492.08it/s]
482it [00:00, 22525.65it/s]
482it [00:00, 15671.74it/s]
482it [00:00, 16225.55it/s]
482it [00:00, 14816.08it/s]
482it [00:00, 17373.99it/s]
482it [00:00, 22549.

482it [00:00, 6845.69it/s]
482it [00:00, 6750.21it/s]
482it [00:00, 11638.64it/s]
482it [00:00, 11903.43it/s]
482it [00:00, 17877.77it/s]
482it [00:00, 20541.93it/s]
482it [00:00, 21717.90it/s]
482it [00:00, 25000.67it/s]
482it [00:00, 22866.03it/s]
482it [00:00, 25507.91it/s]
482it [00:00, 26695.56it/s]
482it [00:00, 20285.92it/s]
482it [00:00, 22193.55it/s]
482it [00:00, 26072.41it/s]
482it [00:00, 20350.04it/s]
482it [00:00, 19134.49it/s]
482it [00:00, 18020.40it/s]
482it [00:00, 26839.09it/s]
482it [00:00, 23647.02it/s]
482it [00:00, 23272.45it/s]
482it [00:00, 25374.72it/s]
482it [00:00, 24761.22it/s]
482it [00:00, 25041.24it/s]
482it [00:00, 25833.86it/s]
482it [00:00, 25769.32it/s]
482it [00:00, 26539.95it/s]
482it [00:00, 24749.40it/s]
482it [00:00, 23049.04it/s]
482it [00:00, 23082.99it/s]
482it [00:00, 21109.70it/s]
482it [00:00, 20054.11it/s]
482it [00:00, 19726.92it/s]
482it [00:00, 19081.75it/s]
482it [00:00, 23157.02it/s]
482it [00:00, 28063.72it/s]
482it [00:00, 19330.07

482it [00:00, 23150.93it/s]
482it [00:00, 22424.46it/s]
482it [00:00, 19435.62it/s]
482it [00:00, 28630.47it/s]
482it [00:00, 26246.39it/s]
482it [00:00, 23421.01it/s]
482it [00:00, 23798.45it/s]
482it [00:00, 25451.39it/s]
482it [00:00, 21249.04it/s]
482it [00:00, 19877.83it/s]
482it [00:00, 24611.10it/s]
482it [00:00, 18411.99it/s]
482it [00:00, 20053.31it/s]
482it [00:00, 20244.89it/s]
482it [00:00, 10621.17it/s]
482it [00:00, 13240.34it/s]
482it [00:00, 16789.34it/s]
482it [00:00, 16419.00it/s]
482it [00:00, 18593.18it/s]
482it [00:00, 27692.38it/s]
482it [00:00, 26593.72it/s]
482it [00:00, 30816.03it/s]
482it [00:00, 20702.43it/s]
482it [00:00, 34287.41it/s]
482it [00:00, 22949.09it/s]
482it [00:00, 21821.05it/s]
482it [00:00, 19985.32it/s]
482it [00:00, 27169.12it/s]
482it [00:00, 24901.82it/s]
482it [00:00, 19462.57it/s]
482it [00:00, 20464.99it/s]
482it [00:00, 21818.22it/s]
482it [00:00, 24163.11it/s]
482it [00:00, 21613.19it/s]
482it [00:00, 26566.46it/s]
482it [00:00, 20677.

482it [00:00, 20597.60it/s]
482it [00:00, 22665.56it/s]
482it [00:00, 19842.12it/s]
482it [00:00, 22202.08it/s]
482it [00:00, 25621.05it/s]
482it [00:00, 23703.58it/s]
482it [00:00, 20920.74it/s]
482it [00:00, 28081.66it/s]
482it [00:00, 23505.70it/s]
482it [00:00, 23161.00it/s]
482it [00:00, 24165.13it/s]
482it [00:00, 27908.37it/s]
482it [00:00, 24382.84it/s]
482it [00:00, 23112.28it/s]
482it [00:00, 20696.92it/s]
482it [00:00, 23975.41it/s]
482it [00:00, 21741.26it/s]
482it [00:00, 24012.43it/s]
482it [00:00, 25256.47it/s]
482it [00:00, 20455.05it/s]
482it [00:00, 19015.17it/s]
482it [00:00, 22137.89it/s]
482it [00:00, 24176.69it/s]
482it [00:00, 23753.99it/s]
482it [00:00, 22366.16it/s]
482it [00:00, 23658.92it/s]
482it [00:00, 24230.58it/s]
482it [00:00, 25500.50it/s]
482it [00:00, 17746.89it/s]
482it [00:00, 18703.26it/s]
482it [00:00, 21769.82it/s]
482it [00:00, 23152.52it/s]
482it [00:00, 25973.26it/s]
482it [00:00, 20925.07it/s]
482it [00:00, 22379.53it/s]
482it [00:00, 20432.

482it [00:00, 22319.98it/s]
482it [00:00, 20540.89it/s]
482it [00:00, 20294.27it/s]
482it [00:00, 19522.90it/s]
482it [00:00, 18858.01it/s]
482it [00:00, 27433.47it/s]
482it [00:00, 23890.98it/s]
482it [00:00, 17973.78it/s]
482it [00:00, 21317.60it/s]
482it [00:00, 16922.57it/s]
482it [00:00, 22532.68it/s]
482it [00:00, 18541.68it/s]
482it [00:00, 21483.65it/s]
482it [00:00, 23752.04it/s]
482it [00:00, 20829.56it/s]
482it [00:00, 21916.14it/s]
482it [00:00, 13416.25it/s]
482it [00:00, 16826.09it/s]
482it [00:00, 19437.12it/s]
482it [00:00, 15933.09it/s]
482it [00:00, 16549.23it/s]
482it [00:00, 16103.80it/s]
482it [00:00, 19769.94it/s]
482it [00:00, 20122.37it/s]
482it [00:00, 26419.60it/s]
482it [00:00, 26175.71it/s]
482it [00:00, 18318.23it/s]
482it [00:00, 21686.22it/s]
482it [00:00, 21288.19it/s]
482it [00:00, 22339.71it/s]
482it [00:00, 21225.83it/s]
482it [00:00, 16238.97it/s]
482it [00:00, 19653.09it/s]
482it [00:00, 12720.41it/s]
482it [00:00, 18212.28it/s]
482it [00:00, 13769.

482it [00:00, 22209.15it/s]
482it [00:00, 19784.26it/s]
482it [00:00, 25630.47it/s]
482it [00:00, 22361.70it/s]
482it [00:00, 21922.80it/s]
482it [00:00, 26836.24it/s]
482it [00:00, 20785.87it/s]
482it [00:00, 24080.22it/s]
482it [00:00, 24571.02it/s]
482it [00:00, 21633.77it/s]
482it [00:00, 19192.62it/s]
482it [00:00, 23157.29it/s]
482it [00:00, 24653.12it/s]
482it [00:00, 27285.73it/s]
482it [00:00, 19770.33it/s]
482it [00:00, 24436.48it/s]
482it [00:00, 19765.50it/s]
482it [00:00, 22308.16it/s]
482it [00:00, 31069.87it/s]
482it [00:00, 21036.99it/s]
482it [00:00, 19365.25it/s]
482it [00:00, 23530.32it/s]
482it [00:00, 21358.14it/s]
482it [00:00, 24608.71it/s]
482it [00:00, 20050.92it/s]
482it [00:00, 22266.39it/s]
482it [00:00, 21089.88it/s]
482it [00:00, 21640.95it/s]
482it [00:00, 19078.51it/s]
482it [00:00, 16373.91it/s]
482it [00:00, 20894.14it/s]
482it [00:00, 23824.53it/s]
482it [00:00, 22200.86it/s]
482it [00:00, 24579.09it/s]
482it [00:00, 24153.87it/s]
482it [00:00, 19399.

482it [00:00, 18587.70it/s]
482it [00:00, 23213.66it/s]
482it [00:00, 21893.12it/s]
482it [00:00, 19865.13it/s]
482it [00:00, 20521.29it/s]
482it [00:00, 21985.26it/s]
482it [00:00, 21946.60it/s]
482it [00:00, 21178.25it/s]
482it [00:00, 20946.31it/s]
482it [00:00, 23582.20it/s]
482it [00:00, 21217.15it/s]
482it [00:00, 23303.03it/s]
482it [00:00, 18460.24it/s]
482it [00:00, 22564.62it/s]
482it [00:00, 22013.75it/s]
482it [00:00, 25923.30it/s]
482it [00:00, 24789.76it/s]
482it [00:00, 17994.09it/s]
482it [00:00, 19495.22it/s]
482it [00:00, 2957.65it/s]
482it [00:00, 3321.89it/s]
482it [00:00, 3296.77it/s]
482it [00:00, 5739.85it/s]
482it [00:00, 7537.30it/s]
482it [00:00, 7431.38it/s]
482it [00:00, 9737.85it/s]
482it [00:00, 12456.28it/s]
482it [00:00, 17394.62it/s]
482it [00:00, 19249.64it/s]
482it [00:00, 19789.10it/s]
482it [00:00, 20094.37it/s]
482it [00:00, 24803.45it/s]
482it [00:00, 19283.05it/s]
482it [00:00, 25077.58it/s]
482it [00:00, 22962.39it/s]
482it [00:00, 23526.21it/s]

Time it took to build the decision tree is  743.1897842884064


In [8]:
%matplotlib qt
import matplotlib.pyplot as plt
#Now plotting the graph
xValue = list(trainAccuracyDict.keys())
trainYValue = list(trainAccuracyDict.values())
testYValue = list(testAccuracyDict.values())
validYValue = list(validAccuracyDict.values())

def plottingOnTheGo(xValue,trainYValue,testYValue,validYValue):
    plt.plot(xValue,trainYValue,label='Train Accuracy')
    plt.plot(xValue,testYValue,label = 'Test Accuracy')
    plt.plot(xValue,validYValue,label = 'Validation Accuracy')
    plt.xlabel('Number of Nodes')
    plt.ylabel('Accuracies in (%)')
    plt.legend()
    plt.show()

plottingOnTheGo(xValue,trainYValue,testYValue,validYValue)

# Post Pruning of Decision Tree

In [None]:
#sabse pehle root pickle krunga
# import pickle
# pickle.dump(root,open('savedRoot','wb'))

In [None]:
#just storing these predicted values to reduce further efforts
# trainSave = trainPredict.copy()
# testSave = testPredict.copy()
# valiSave = validPredict.copy()

In [None]:
# pickle.dump(trainSave,open('trainPredict','wb'))
# pickle.dump(testSave,open('testPredict','wb'))
# pickle.dump(valiSave,open('validPredict','wb'))

In [None]:
# trainPredict = pickle.load(open('trainPredict','rb'))
# testPredict = pickle.load(open('testPredict','rb'))
# validPredict = pickle.load(open('validPredict','rb'))

In [None]:
# root = pickle.load(open('savedRoot','rb'))

In [9]:
# print(trainPredict.size)
# print(testPredict.size)
# print(validP)

#just checking
print("Train Accuracy is ",np.sum(trainPredict==trainY)/trainY.size * 100)
print("Test Accuracy is ",np.sum(testPredict==testY)/testY.size * 100)
print("Validation Accuracy is ",np.sum(validPredict==validY)/validY.size * 100)

Train Accuracy is  90.808647412421
Test Accuracy is  77.9936025219044
Validation Accuracy is  77.61450027813834


In [10]:
def findNodeCount(node):
    if node.isLeaf==True:
        return 1
    else:
        return 1 + findNodeCount(node.lChild) + findNodeCount(node.rChild)

# Post Pruning using Iterative approach

#Will Mark down it as we will use bottom up approach 
#Iterative way of post pruning
#We have got root as our rootNode of fully grown tree.

pruneTrainAcc, pruneTestAcc, pruneValAcc = {}, {}, {}

#Finding the final node count after the tree have fully grown

finalTotalNode = findNodeCount(root)

pruneTrainAcc[finalTotalNode] = trainAccuracyDict[finalTotalNode]
pruneTestAcc[finalTotalNode] = testAccuracyDict[finalTotalNode]
pruneValAcc[finalTotalNode] = validAccuracyDict[finalTotalNode]

#Maintaining global validation accuracy
valAccuracy = pruneValAcc[finalTotalNode]
    
def treePruning():
    
    nodeToPrune = None
    
    def postPruning(node):
        
        global valAccuracy
        nonlocal nodeToPrune
           
        valPre = np.copy(validPredict)

        if node.isLeaf == False: #and node.valiInd.size != 0:
            valPre[node.valiInd] = node.yLabel
            validAcc = np.sum(valPre==validY)/validY.size * 100
            if validAcc >= valAccuracy:

                valAccuracy = validAcc
                nodeToPrune = node

            postPruning(node.lChild)
            postPruning(node.rChild)
    
    postPruning(root)
        
    if nodeToPrune != None:  
                                                        
        trainPredict[nodeToPrune.trainInd] = nodeToPrune.yLabel
        trainAcc = np.sum(trainPredict==trainY)/trainY.size * 100
        testPredict[nodeToPrune.testInd] = nodeToPrune.yLabel
        testAcc = np.sum(testPredict==testY)/testY.size * 100
        validPredict[nodeToPrune.valiInd] = nodeToPrune.yLabel
        validAcc = np.sum(validPredict==validY)/validY.size * 100

        nodeToPrune.isLeaf = True #Only this would suffice as we are making it a leaf
        nNode = findNodeCount(root)
        pruneTrainAcc[nNode] = trainAcc
        pruneTestAcc[nNode] = testAcc
        pruneValAcc[nNode] = validAcc
                
        #calling function recursively now
        treePruning()
  
treePruning()

In [None]:
# pickle.dump(pruneTrainAcc,open('iterPrunTrain','wb'))
# pickle.dump(pruneTestAcc,open('iterPrunTest','wb'))
# pickle.dump(pruneValAcc,open('iterPrunVal','wb'))

In [None]:
# pruneTrain = pickle.load(open('iterPrunTrain','rb'))
# pruneTest = pickle.load(open('iterPrunTest','rb'))
# pruneVal = pickle.load(open('iterPrunVal','rb'))

# Post Pruning using Bottom Up (Post Order Traversal) Approach

In [11]:
#Recursive approach for pruning

#We have trainPredict, testPredict and validPredict obtained after we have grown full tree.


#They will contain accuracy as we prune the tree
recurTrainAcc, recurTestAcc, recurValAcc = {},{},{}

finalTotalNode = findNodeCount(root)

recurTrainAcc[finalTotalNode] = trainAccuracyDict[finalTotalNode]
recurTestAcc[finalTotalNode] = testAccuracyDict[finalTotalNode]
recurValAcc[finalTotalNode] = validAccuracyDict[finalTotalNode]

currentValAcc = recurValAcc[finalTotalNode]

def recurPruning(node):
    
    global currentValAcc
    if node.isLeaf == False:
        
        recurPruning(node.lChild)
        recurPruning(node.rChild)
        
        val = validPredict.copy()
        val[node.valiInd] = node.yLabel
        prediction = np.sum(val==validY)/validY.size * 100
        
        if prediction >= currentValAcc:
            currentValAcc = prediction
            node.isLeaf = True
            validPredict[node.valiInd] = node.yLabel
            trainPredict[node.trainInd] = node.yLabel
            testPredict[node.testInd] = node.yLabel
            valP = np.sum(validPredict==validY)/validY.size * 100
            trnP = np.sum(trainPredict==trainY)/trainY.size * 100
            tstP = np.sum(testPredict==testY)/testY.size * 100
            nC = findNodeCount(root)
            recurTrainAcc[nC] = trnP
            recurTestAcc[nC] = tstP
            recurValAcc[nC] = valP    

In [12]:
recurPruning(root)

In [None]:
# print(findNodeCount(root))

In [None]:
# pickle.dump(recurTrainAcc,open('recurPruneTrain','wb'))
# pickle.dump(recurTestAcc,open('recurPruneTest','wb'))
# pickle.dump(recurValAcc,open('recurPruneVal','wb'))

In [None]:
# pruneTrain = pickle.load(open('recurPruneTrain','rb'))
# pruneTest = pickle.load(open('recurPruneTest','rb'))
# pruneVal = pickle.load(open('recurPruneVal','rb'))

In [13]:
def prunePlotting(pruneX,pruneTrnAcc,pruneTstAcc,pruneValAcc):
    plt.plot(pruneX,pruneTrnAcc,label='Train Accuracy')
    plt.plot(pruneX,pruneTstAcc,label = 'Test Accuracy')
    plt.plot(pruneX,pruneValAcc,label = 'Validation Accuracy')
    ax=plt.gca()
    ax.set_xlim(ax.get_xlim()[::-1])
    plt.xlabel('Number of Nodes')
    plt.ylabel('Accuracies in (%) after pruning')
    plt.legend()
    plt.show()
    
xAxisVal = list(recurTrainAcc.keys())
trainYVal = list(recurTrainAcc.values())
testYVal = list(recurTestAcc.values())
valYVal = list(recurValAcc.values())

prunePlotting(xAxisVal,trainYVal,testYVal,valYVal)

# Random Forest

In [None]:
#Let's start the sklearn part here
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

#We have to vary the paramter values in this range as given in the question.

n_est = list(range(50,451,100))
max_feat = [0.1,0.3,0.5,0.7,0.9]
min_samp_spl = list(range(2,11,2))

def scorer(modelPara,xPara,yPara):
  return modelPara.oob_score_

# score = scorer()

clf = RandomForestClassifier(n_jobs = -1, oob_score = True)
# st = time.time()
parameters = {'n_estimators':n_est ,'max_features':max_feat,'min_samples_split': min_samp_spl}
model = GridSearchCV(clf, parameters, cv=5, scoring = scorer ,verbose = 2)
model.fit(trainX,trainY.flatten())
# print("Time it took for model for fitting in is ",time.time()-st)

# with open('model','wb') as f:
#     pickle.dump(model,f)

best_params = model.best_params_
best_max_feat = best_params['max_features']
best_min_samp_split = best_params['min_samples_split']
best_n_estimators = best_params['n_estimators']

finalModel = RandomForestClassifier(n_estimators = best_n_estimators, min_samples_split = best_min_samp_split, max_features = best_max_feat,oob_score = True)
finalModel.fit(trainX,trainY.flatten())

print("Oob Score for training data for best learned parameters is ", finalModel.oob_score_ * 100," %")
print("Accuracy score for training data for best learned parameters is ",accuracy_score(trainY,finalModel.predict(trainX)) * 100," %")
print("Accuracy score for test data for best learned parameters is ",accuracy_score(testY,finalModel.predict(testX)) * 100," %")
print("Accuracy score for validation data for best learned parameters is ",accuracy_score(validY,finalModel.predict(validX)) * 100," %")

__{'max_features': 0.1, 'min_samples_split': 10, 'n_estimators': 450}__

__Oob Score for training data for best learned parameters are 81.07644522738863  %__

__Accuracy score for training data for best learned parameters are  87.3642081189251  %__

__Accuracy score for test data for best learned parameters are  80.82610912799592  %__

__Accuracy score for validate data for best learned parameters are  80.73428518449842  %__


# Random Forest - Parameter Sensitivity Analysis

In [None]:
#Now Part D of sklearn of the assignment
#{'max_features': 0.1, 'min_samples_split': 10, 'n_estimators': 450}

#From the above part we have got best paramters in the variables best_max_feat, best_min_samp_split, best_n_estimators

import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from joblib import Parallel, delayed
from itertools import product

#Varying the parameters in this range

n_est = list(range(50,1001,50))
max_feat = np.linspace(0.01,1.0,20)
min_samp_spl = list(range(2,60,3))

varyfeatModel = list(product(max_feat,[best_min_samp_split],[best_n_estimators]))
varyMinSampModel = list(product([best_max_feat],min_samp_spl,[best_n_estimators]))
varyNEstim = list(product([best_max_feat],[best_min_samp_split],n_est))

finalList = varyfeatModel+varyMinSampModel+varyNEstim

#Here training all 60 models 

def varyParaAccuracy(max_feat,min_samp_split,n_estim):
  model = RandomForestClassifier(max_features = max_feat, min_samples_split=min_samp_split, n_estimators = n_estim)
  model.fit(trainX,trainY.flatten())
  return model

finalModels = Parallel(n_jobs = -1)(delayed(varyParaAccuracy)(x,y,z) for (x,y,z) in tqdm(finalList))  

In [None]:
from sklearn.metrics import accuracy_score

#Slicing different models as we have done trained 60 models.

models_var_feat = finalModels[:20]
models_min_sampsplit = finalModels[20:40]
models_nEstimators = finalModels[40:]

validAccVaryFeat = np.array([accuracy_score(validY,valiModel.predict(validX)) for valiModel in models_var_feat]) * 100
validAccVaryMinSample = np.array([accuracy_score(validY,valiModel.predict(validX)) for valiModel in models_min_sampsplit]) * 100
validAccVaryNestimators = np.array([accuracy_score(validY,valiModel.predict(validX)) for valiModel in models_nEstimators]) * 100

testAccVaryFeat = np.array([accuracy_score(testY,testModel.predict(testX)) for testModel in models_var_feat]) * 100
testAccVaryMinSample = np.array([accuracy_score(testY,testModel.predict(testX)) for testModel in models_min_sampsplit]) * 100
testAccVaryNestimators = np.array([accuracy_score(testY,testModel.predict(testX)) for testModel in models_nEstimators]) * 100


In [None]:
%matplotlib qt
import matplotlib.pyplot as plt
def plottingAccuracy(x,ty,vy,xlab,ylab):
    plt.plot(x,ty,label='Test Accuracy')
    plt.plot(x,vy,label='Validation Accuracy')
    plt.xlabel(xlab)
    plt.ylabel(ylab)
#     plt.ylim(75,95)
    plt.legend()
    plt.show()

In [None]:
plottingAccuracy(max_feat,testAccVaryFeat,validAccVaryFeat,'max_features','Accuracy in (%) varying max_features')

In [None]:
plottingAccuracy(min_samp_spl,testAccVaryMinSample,validAccVaryMinSample,'min_samples_split','Accuracy in (%) varying min_samples_split')

In [None]:
plottingAccuracy(n_est,testAccVaryNestimators,validAccVaryNestimators,'n_estimators','Accuracy in (%) varying n_estimators')