In [143]:
import pandas as pd
import numpy as np

data = pd.read_csv('/home/roshan/Documents/FreelanceWork/2017-10-17 Bayesian Networks/iris.data',header=None)
data.columns = ['x1','x2','x3','x4','Class']


In [144]:
label = data['Class']
data = data.drop('Class',axis = 1)

In [145]:
for col in data.columns:
    data[col] = pd.cut(data[col],bins=3,labels=['0','1','2']).astype('int')

label[label=='Iris-setosa']=0
label[label=='Iris-versicolor']=1
label[label=='Iris-virginica']=2

data['Class'] = label.astype(int)
data.head()

Unnamed: 0,x1,x2,x3,x4,Class
0,0,1,0,0,0
1,0,1,0,0,0
2,0,1,0,0,0
3,0,1,0,0,0
4,0,1,0,0,0


In [146]:
class Node(object):
    '''
    Defines a Node Class for storing characteristics and CPT of each node
    '''
    
    def __init__(self,name):
        self.parents = []
        self.children = []
        self.name = name
        self.cpt=[]
        self.limit = 3
        
    def addParent(self,x):
        self.parents.append(x)
    
    def addChild(self,x):
        self.children.append(x)
    
    def createCPT(self,data):
        cpt = computeProb(data,self.limit,self.parents,self.name)
        self.cpt = cpt

In [147]:
def computeProb(data,limit,cols,target):
    
    numCol = len(cols)
    
    if numCol==0:
        return(cpt_0(data,limit,cols,target))
    elif numCol ==1:
        return(cpt_1(data,limit,cols,target))
    elif numCol ==2:
        return(cpt_2(data,limit,cols,target))
    elif numCol ==3:
        return(cpt_3(data,limit,cols,target))
    else:
        return(cpt_4(data,limit,cols,target))
            

In [148]:
#Functions for computing the Conditional Probability Tables (CPTs)

def cpt_2(data,limit,cols,target):
    
    cpt = []
    alpha = 0.001
    
    for var1 in range(limit):
        for var2 in range(limit):
            
            totalN = len( data[ (data[cols[0]]==var1) & (data[cols[1]]==var2) ] )
            
            for targetVar in range(limit):
                
                count = len( data[ (data[cols[0]]==var1) & (data[cols[1]]==var2) & (data[target]==targetVar) ] )
                if totalN ==0:
                    cpt.append([var1,var2,targetVar, float(totalN + 3*alpha)])
                else:
                    cpt.append([var1,var2,targetVar, float(count)/float(totalN + 3*alpha)])
                    
    cpt = pd.DataFrame(cpt, columns=[cols[0],cols[1],target,'prob'])
                
    return(cpt)

def cpt_1(data,limit,cols,target):
    
    cpt = []
    alpha = 0.001
    
    for var1 in range(limit):
            
        
        totalN = len( data[ (data[cols[0]]==var1)] )
        
            
        for targetVar in range(limit):
            
            count = len( data[ (data[cols[0]]==var1) & (data[target]==targetVar) ] )
            
            if totalN ==0:
                cpt.append([var1,targetVar, float(totalN + 3*alpha)])
            else:
                cpt.append([var1,targetVar, float(count)/float(totalN + 3*alpha)])
                    
    cpt = pd.DataFrame(cpt, columns=[cols[0],target,'prob'])
                
    return(cpt)

def cpt_0(data,limit,cols,target):
    
    alpha = 0.001
    cpt = []
    
    
    totalN = len( data )
    
            
    for targetVar in range(limit):
            
        count = len( data[ (data[target]==targetVar) ] )
        if totalN ==0:
            cpt.append([targetVar, alpha/float(totalN + 3*alpha)])
        else:
            cpt.append([targetVar, float(count)/(totalN + 3*alpha)])
                    
    cpt = pd.DataFrame(cpt, columns=[target,'prob'])
                
    return(cpt)


def cpt_3(data,limit,cols,target):
    
    cpt = []
    alpha = 0.001
    
    for var1 in range(limit):
        for var2 in range(limit):
            for var3 in range(limit):
            
                totalN = len( data[ (data[cols[0]]==var1) & (data[cols[1]]==var2) & (data[cols[2]]==var3) ] )

                for targetVar in range(limit):

                    count = len( data[ (data[cols[0]]==var1) & (data[cols[1]]==var2) & (data[cols[2]]==var3) & (data[target]==targetVar) ] )
                    if totalN ==0:
                        cpt.append([var1,var2,var3,targetVar, alpha/float(totalN + 3*alpha)])
                    else:
                        cpt.append([var1,var2,var3,targetVar, float(count)/float(totalN + 3*alpha)])
                    
    cpt = pd.DataFrame(cpt, columns=[cols[0],cols[1],cols[2],target,'prob'])
                
    return(cpt)

In [126]:
x=computeProb(d,limit=2,cols=['a'],target='c')
x

Unnamed: 0,a,c,prob
0,0,0,0.666001
1,0,1,0.333
2,1,0,0.19988
3,1,1,0.79952


In [188]:
structMap = {0:[1,2],1:[1,3],2:[1,4],3:[2,3],4:[2,4],5:[3,4]}   # Mapping of the structure position and the nodes that it connects


class BayesNet(object):
    
    def __init__(self,numNodes):
        self.structure = [-1,0,0,0,0,1]  # Array that defines the structure of the Bayes Net
        self.numNodes = numNodes
        self.varNodes={}
        self.classNode=0
        
        
    def initGraph(self):
        '''
        Initializes components of the Bayes Net Graph
        '''
        
        self.classNode = Node('Class')
        
        for i in range(self.numNodes):
            self.varNodes['x'+str(i+1)]=Node('x'+str(i+1))
            self.varNodes['x'+str(i+1)].parents.append('Class')
        
        for i in range(len(self.structure)):
            
            edgeNodes = structMap[i]
            firstNode = 'x'+str(edgeNodes[0])
            secondNode = 'x'+str(edgeNodes[1])
            
            if self.structure[i]==1:
                self.varNodes[firstNode].children.append(secondNode)
                self.varNodes[secondNode].parents.append(firstNode)
            elif self.structure[i]==-1:
                self.varNodes[secondNode].children.append(firstNode)
                self.varNodes[firstNode].parents.append(secondNode)
    
    def compCPT(self,data):
        '''
        Computes Conditional Probability Table for all the nodes
        '''
        
        self.classNode.createCPT(data)
        
        for i in range(len(self.varNodes)):
            self.varNodes['x'+str(i+1)].createCPT(data)
            
    
    def predict(self,data):
        '''
        Predicts most likely class given a single data sample
        
        '''
        maxProb = 0
        maxProbClass = 0

        for classVal in range(3):

            dt = data.copy()
            dt["Class"] = classVal
            prob = 1.0

            for i in range(self.numNodes):
                #print('Node is x'+str(i+1))

                pt=self.varNodes['x'+str(i+1)].cpt

                mergeList = self.varNodes['x'+str(i+1)].parents + ['x'+str(i+1)]

                cpt_prob = pd.merge(left=pt,right=dt,on=mergeList,how='inner')['prob'][0]
                #print("cpt_prob is ",str(cpt_prob))

                prob = cpt_prob*prob

            #print("Class :%d Prob : %f"%(classVal,prob))

            if prob>maxProb:
                maxProb = prob
                maxProbClass = classVal
                
        return(maxProbClass)

            

In [128]:
net = BayesNet(4)
net.initGraph()


In [129]:
net.compCPT(data)

In [130]:
errors = 0

#Testing on the overall dataset

for i in range(150):
    
    y = data.iloc[i:(i+1)]
    out = net.predict(y)
    
    if out != label[i]:
        errors+=1
        
print(errors)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Node is x1
('cpt_prob is ', '0.999916673611')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.719810
Node is x1
('cpt_prob is ', '0.0434725905317')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.999916673611')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.719810
Node is x1
('cpt_prob is ', '0.0434725905317')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0

Node is x2
('cpt_prob is ', '0.039997600144')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.230715988618')
Node is x2
('cpt_prob is ', '0.259984400936')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.059975
Node is x1
('cpt_prob is ', '0.003')
Node is x2
('cpt_prob is ', '0.0')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.039997600144')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.769053295393')
Node is x2
('cpt_prob is ', '0.259984400936')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.199918
Node is x1
('cpt_prob is ', '0.003')
Node is x2
('cpt_prob is ', '0.0')
Node is x3
('cpt_prob is ', 

Class :0 Prob : 0.719810
Node is x1
('cpt_prob is ', '0.0434725905317')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.999916673611')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.719810
Node is x1
('cpt_prob is ', '0.0434725905317')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.769

Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.719810
Node is x1
('cpt_prob is ', '0.0434725905317')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :2 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.769053295393')
Node is x2
('cpt_prob is ', '0.259984400936')
Node is x3
('cpt_prob is ', '0.9999400036')
Node is x4
('cpt_prob is ', '0.9999400036')
Class :0 Prob : 0.199918
Node is x1
('cpt_prob is ', '0.003')
Node is x2
('cpt_prob is ', '0.0')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.039997600144')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '

('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.187945
Node is x1
('cpt_prob is ', '0.05262327001')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.119992800432')
Node is x4
('cpt_prob is ', '0.166583374979')
Class :2 Prob : 0.000400
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.959942403456')
Node is x4
('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.375882
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.119992800432')
Node is x4
('cpt_prob is ', '0.166583374979')
Class :2 Prob : 0.006795
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.019998800072')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_pro

('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.959942403456')
Node is x4
('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.375882
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.119992800432')
Node is x4
('cpt_prob is ', '0.166583374979')
Class :2 Prob : 0.006795
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.959942403456')
Node is x4
('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.375882
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.119992800432')
Node is x4
('cpt_prob is ', '0.166583374979')
Class :2 Prob : 0.0067

Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.370329222679')
Node is x2
('cpt_prob is ', '0.539967601944')
Node is x3
('cpt_prob is ', '0.959942403456')
Node is x4
('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.187945
Node is x1
('cpt_prob is ', '0.05262327001')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.119992800432')
Node is x4
('cpt_prob is ', '0.166583374979')
Class :2 Prob : 0.000400
Node is x1
('cpt_prob is ', '0.997008973081')
Node is x2
('cpt_prob is ', '0.019998800072')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.370329222679')
Node is x2
('cpt_prob is ', '0.539967601944')
Node is x3
('cpt_prob is ', '0.959942403456')
Node is x4
('cpt_prob is ', '0.979105472575')
Class :1 Prob : 0.187945
Node is x1
('cpt_prob is ', '0.05262327001')
Node is x2
('cpt_prob is ', '0.379977201368

Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.0')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.909028929846')
Class :2 Prob : 0.271921
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.0')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', 

Node is x4
('cpt_prob is ', '0.0')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.15786981003')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.909028929846')
Class :2 Prob : 0.047983
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.019998800072')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.592526756286')
Node is x2
('cpt_prob is ', '0.539967601944')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.99850224663')
Class :1 Prob : 0.012778
Node is x1
('cpt_prob is ', '0.78934905015')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.0909028929846')
Class :2 Prob : 0.023992
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob

Node is x1
('cpt_prob is ', '0.592526756286')
Node is x2
('cpt_prob is ', '0.539967601944')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.99850224663')
Class :1 Prob : 0.012778
Node is x1
('cpt_prob is ', '0.78934905015')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.0909028929846')
Class :2 Prob : 0.023992
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.019998800072')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.592526756286')
Node is x2
('cpt_prob is ', '0.539967601944')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.99850224663')
Class :1 Prob : 0.012778
Node is x1
('cpt_prob is ', '0.78934905015')
Node is x2
('cpt_prob is ', '0.379977201368')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.0909028929846')
Class :2 Prob

Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.0')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.909028929846')
Class :2 Prob : 0.271921
Node is x1
('cpt_prob is ', '0.0')
Node is x2
('cpt_prob is ', '0.719956802592')
Node is x3
('cpt_prob is ', '0.0')
Node is x4
('cpt_prob is ', '0.003')
Class :0 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.869451810633')
Node is x2
('cpt_prob is ', '0.459972401656')
Node is x3
('cpt_prob is ', '0.039997600144')
Node is x4
('cpt_prob is ', '0.0')
Class :1 Prob : 0.000000
Node is x1
('cpt_prob is ', '0.586146260732')
Node is x2
('cpt_prob is ', '0.579965202088')
Node is x3
('cpt_prob is ', '0.879947203168')
Node is x4
('cpt_prob is ', '0.909028929846')
Class :2 Prob : 0.271921
4


In [131]:
errors

4

In [150]:
x=np.repeat(list(range(10)),repeats=15)

In [151]:
len(x)

150

In [152]:
data['fold'] = pd.Series(x)

In [153]:
data.head()

Unnamed: 0,x1,x2,x3,x4,Class,fold
0,0,1,0,0,0,0
1,0,1,0,0,0,0
2,0,1,0,0,0,0
3,0,1,0,0,0,0
4,0,1,0,0,0,0


In [189]:
for fold in range(10):
    print("Fold is %d"%fold)
    
    train = data[data['fold']!=fold]
    test = data[data['fold']==fold]
   
    
    train.drop('fold',axis = 1, inplace=True)
    test.drop('fold',axis = 1, inplace=True)
    
    net = BayesNet(4)
    net.initGraph()
    net.compCPT(train)
    errors = 0

    #Testing

    for i in range(len(test)):

        y = test.iloc[i:(i+1)]
   
        #yLabel = test.iloc[i]['Class']
        out = net.predict(y)
 
    

        if out != test.iloc[i]['Class']:
            errors+=1

    print(errors)
    print("---------------------------------")
    

Fold is 0


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


0
---------------------------------
Fold is 1
0
---------------------------------
Fold is 2
0
---------------------------------
Fold is 3
2
---------------------------------
Fold is 4
1
---------------------------------
Fold is 5
4
---------------------------------
Fold is 6
0
---------------------------------
Fold is 7
1
---------------------------------
Fold is 8
2
---------------------------------
Fold is 9
0
---------------------------------


In [190]:
def kFold(data,k=10):
    
    foldSize = len(data)/k
    accuracyList = []
    
    for fold in range(k):
        

        train = data[data['fold']!=fold]
        test = data[data['fold']==fold]

        train.drop('fold',axis = 1, inplace=True)
        test.drop('fold',axis = 1, inplace=True)

        net = BayesNet(4)
        net.initGraph()
        net.compCPT(train)
        errors = 0

        #Testing
        for i in range(len(test)):

            y = test.iloc[i:(i+1)]
            out = net.predict(y)

            if out != test.iloc[i]['Class']:
                errors+=1
        acc = float(foldSize - errors)/foldSize
        accuracyList.append(acc)
        
        print("Fold :%d Accuracy : %f"%(fold,acc))
    
    print("Overall CV accuracy : %f"%(np.mean(accuracyList)))


In [191]:
kFold(data,10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  del sys.path[0]


Fold :0 Accuracy : 1.000000
Fold :1 Accuracy : 1.000000
Fold :2 Accuracy : 1.000000
Fold :3 Accuracy : 0.866667
Fold :4 Accuracy : 0.933333
Fold :5 Accuracy : 0.733333
Fold :6 Accuracy : 1.000000
Fold :7 Accuracy : 0.933333
Fold :8 Accuracy : 0.866667
Fold :9 Accuracy : 1.000000
Overall CV accuracy : 0.933333


In [155]:
train = data[data['fold']!=0]
test = data[data['fold']==0]

In [156]:
net = BayesNet(4)
net.initGraph()
net.compCPT(train)

In [157]:
i = 0
dt = test.iloc[i:(i+1)]
print(dt)

   x1  x2  x3  x4  Class  fold
0   0   1   0   0      0     0


In [174]:
d=[]

for i in range(2):
    d.append([1,1])
for i in range(18):
    d.append([1,0])
for i in range(6):
    d.append([0,0])
for i in range(24):
    d.append([0,1])
    
d = pd.DataFrame(d,columns=['x1','x2'])

cpt_1(d,limit=2,cols=['x1'],target='x2')

Unnamed: 0,x1,x2,prob
0,0,0,0.2
1,0,1,0.8
2,1,0,0.9
3,1,1,0.1


In [176]:
cpt_0(d,limit=2,cols=['x1'],target='x1')

Unnamed: 0,x1,prob
0,0,0.6
1,1,0.4


In [16]:
data.head()

Unnamed: 0,x1,x2,x3,x4,Class
0,0,1,0,0,0
1,0,1,0,0,0
2,0,1,0,0,0
3,0,1,0,0,0
4,0,1,0,0,0


In [62]:
def predict(net,data):
    
    maxProb = 0
    maxProbClass = 0
    
    for classVal in range(3):
        
        dt = data
        dt["Class"] = classVal
        prob = 1.0
        
        for i in range(net.numNodes):
            #print('Node is x'+str(i+1))

            pt=net.varNodes['x'+str(i+1)].cpt

            mergeList = net.varNodes['x'+str(i+1)].parents + ['x'+str(i+1)]

            cpt_prob = pd.merge(left=pt,right=dt,on=mergeList,how='inner')['prob'][0]
            print("cpt_prob is ",str(cpt_prob))

            prob = cpt_prob*prob
            
        print("Class :%d Prob : %f"%(classVal,prob))
            
        if prob>maxProb:
            maxProb = prob
            maxProbClass = classVal
                
        
    