In [1]:
%matplotlib inline

In [2]:
import random

In [3]:
class BayesModel:

    """Bayesian belief network model with boolean variables."""

    def __init__(self, node_specs=[]):
        
        self.nodes = []
        self.variables = []
        for node_spec in node_specs:
            self.add(node_spec)

    def add(self, node_spec):
        """Parent nodes should be added first."""
        
        node = Node(*node_spec)

        assert node.variable not in self.variables
        assert all((parent in self.variables) for parent in node.parents)
        self.nodes.append(node)
        self.variables.append(node.variable)
        for parent in node.parents:
            self.variable_node(parent).children.append(node)

    def variable_node(self, var):
        for n in self.nodes:
            if n.variable == var:
                return n

    def variable_values(self, var):
        return [True, False]

    def __repr__(self):
        return 'BayesModel(%r)' % self.nodes

In [4]:
class Node:

    """Takes conditional probability distribution of variables in the network."""

    def __init__(self, X, parents, cpt):
        """
        inputs:
            X       : variable name
            parents : string of variable names separated by space e.g. 'P Q', parent variables P and Q
            cpt     : dictinary of the probabilities for booleans
            example : Node('Z', 'X Y',{(T, T): 0.7, (T, F): 0.1, (F, T): 0.2, (F, F): 0.3})
            
        """
        if isinstance(parents, str):
            parents = parents.split()      #takes second string (the parents), split the variables 
            
        if isinstance(cpt, (float, int)):  # no parents
            cpt = {(): cpt}

        elif isinstance(cpt, dict):        # one parent
            if cpt and isinstance(list(cpt.keys())[0], bool):
                cpt = {(v,): p for v, p in cpt.items()}

        assert isinstance(cpt, dict)
        for vs, p in cpt.items():
            assert isinstance(vs, tuple) and len(vs) == len(parents)
            assert all(isinstance(v, bool) for v in vs)
            assert 0 <= p <= 1

        self.variable = X
        self.parents = parents
        self.cpt = cpt
        self.children = []
        

    def p(self, value, event):
        """Returns conditional probability.
           e.g. P(X| parents=parent_values)
                p(True, {'X': True, 'Y': True}
        
        """
        assert isinstance(value, bool)
        ptrue = self.cpt[event_values(event, self.parents)]
        
        return ptrue if value else 1 - ptrue

    
    def __repr__(self):
        return repr((self.variable, ' '.join(self.parents)))


In [5]:
class Distribution:

    """
    Probability Distribution
    Example: p = Distribution('X', {'a': 2, 'b': 4})
             p['a'],p['b'] returns (2,4) 
    """

    def __init__(self, varname='?', freqs=None):
        """If freqs is given, it is a dictionary of value: frequency pairs,
        and the Distribution then is normalized."""
        self.prob = {}
        self.varname = varname
        self.values = []
        if freqs:
            for (v, p) in freqs.items():
                self[v] = p
            self.normalize()

    def __getitem__(self, val):
        """Input a value, output probability of the value."""
        try:
            return self.prob[val]
        except KeyError:
            return 0

    def __setitem__(self, val, p):
        if val not in self.values:
            self.values.append(val)
        self.prob[val] = p

    def normalize(self):
        """Returns the normalized distribution. Probabilities sum up to 1."""
        total = sum(self.prob.values())
        
        if not isclose(total, 1.0):
            for val in self.prob:
                self.prob[val] /= total
        return self
    

    def __repr__(self):
        return "P(%s)" % self.varname

In [6]:
#OTHER FUNCTIONS

def event_values(event, variables):
    if isinstance(event, tuple) and len(event) == len(variables):
        return event
    else:
        return tuple([event[var] for var in variables])
    
def copy(s, var, val):
    s2 = s.copy()
    s2[var] = val
    return s2

def product(numbers):
    result = 1
    for x in numbers:
        result *= x
    return result

def probability(p):
    return p > random.uniform(0.0, 1.0)

def isclose(a, b, rel_tol=1e-09, abs_tol=0.0): #makes sure I/O division by zero error will not occur
    "Return true if numbers a and b are close to each other."
    return abs(a - b) <= max(rel_tol * max(abs(a), abs(b)), abs_tol)

In [7]:
def gibbs(X, e, bn, N):
    
    """
    Input X: variable interested in
          e: evidence variables
          bn:Bayesian network model
          N: number of iterations
    """
    
    assert X not in e,                                     " makes sure X is distinct from evidence"
    counts = {x: 0 for x in bn.variable_values(X)}          # counter of the many times X is False/True
    Z = [var for var in bn.variables if var not in e]       # non-evidence variables
    state = dict(e)                                         # current state of the network

    for Zi in Z:
        state[Zi] = random.choice(bn.variable_values(Zi))   # initialize state (either True/False) randomly for non-evidence variables
    for j in range(N):
        for Zi in Z:
            state[Zi] = markov_blanket(Zi, state, bn)
            counts[state[X]] += 1
    
    total = counts[False] + counts[True]
    nTrue = counts[True]/total
    nFalse = counts[False]/total
    
    return {True: nTrue, False: nFalse}

In [8]:
def markov_blanket(X, e, bn):
    """
    Returns a sample from P(X | mb) where mb = variables in the Markov blanket of X.
    Markov blanket of X is X's parents, children, and children's parents.
    """
    
    Xnode = bn.variable_node(X)
    D = Distribution(X)

    for xi in bn.variable_values(X):
        ei = copy(e, X, xi)
        D[xi] = Xnode.p(xi, e) * product(Yj.p(ei[Yj.variable], ei)     #Markov blanket formula
                                         for Yj in Xnode.children)
        
    return probability(D.normalize()[True])

In [9]:
T,F = True,False

In [10]:
sprinkler = BayesModel([
    ('Cloudy', '', 0.5),
    ('Sprinkler', 'Cloudy', {T: 0.10, F: 0.50}),
    ('Rain', 'Cloudy', {T: 0.80, F: 0.20}),
    ('WetGrass', 'Sprinkler Rain',
     {(T, T): 0.99, (T, F): 0.90, (F, T): 0.90, (F, F): 0.01})])

In [11]:
sprinkler

BayesModel([('Cloudy', ''), ('Sprinkler', 'Cloudy'), ('Rain', 'Cloudy'), ('WetGrass', 'Sprinkler Rain')])

In [12]:
#P(Rain| Sprinkler=true, WetGrass = true)

In [13]:
r = gibbs('Rain',dict(Sprinkler=T, WetGrass=T),sprinkler,100)

In [14]:
r

{False: 0.615, True: 0.385}

In [15]:
r[True]

0.385

In [16]:
T,F = True, False

In [17]:
cd "D:\GRADUATE\NTHU\1st Year\1st Sem\Introduction to Intelligent Computing\Final Project\Data"

D:\GRADUATE\NTHU\1st Year\1st Sem\Introduction to Intelligent Computing\Final Project\Data


In [18]:
import pandas as pd

In [19]:
mat = pd.read_csv('student-mat.csv')
por = pd.read_csv('student-por.csv')

In [20]:
data = pd.merge(mat, por, on=["school","sex","age","address","famsize","Pstatus","Medu","Fedu","Mjob","Fjob","reason","nursery","internet"], how='inner')

In [21]:
len(data)

382

In [22]:
data.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian_x', 'traveltime_x', 'studytime_x',
       'failures_x', 'schoolsup_x', 'famsup_x', 'paid_x', 'activities_x',
       'nursery', 'higher_x', 'internet', 'romantic_x', 'famrel_x',
       'freetime_x', 'goout_x', 'Dalc_x', 'Walc_x', 'health_x', 'absences_x',
       'G1_x', 'G2_x', 'G3_x', 'guardian_y', 'traveltime_y', 'studytime_y',
       'failures_y', 'schoolsup_y', 'famsup_y', 'paid_y', 'activities_y',
       'higher_y', 'romantic_y', 'famrel_y', 'freetime_y', 'goout_y', 'Dalc_y',
       'Walc_y', 'health_y', 'absences_y', 'G1_y', 'G2_y', 'G3_y'],
      dtype='object')

In [23]:
data = data.drop(['guardian_y', 'traveltime_y', 'studytime_y',
       'failures_y', 'schoolsup_y', 'famsup_y', 'paid_y', 'activities_y',
       'higher_y', 'romantic_y', 'famrel_y', 'freetime_y', 'goout_y', 'Dalc_y',
       'Walc_y', 'health_y', 'absences_y', 'G1_y', 'G2_y','G1_x', 'G2_x'], axis = 1)

In [24]:
data.rename(columns={'guardian_x'   : 'guardian' ,  'traveltime_x' : 'traveltime',
                     'studytime_x'  : 'studytime',  'failures_x' : 'failures', 'schoolsup_x': 'schoolsup', 
                     'famsup_x':'famsup',  'paid_x': 'paid', 'activities_x' : 'activities',
                     'higher_x' : 'higher', 'romantic_x': 'romantic', 'famrel_x':'famrel', 'freetime_x':'freetime', 
                     'goout_x':'goout', 'Dalc_x':'Dalc','Walc_x':'Walc', 'health_x':'health', 'absences_x':'absences', 
                     'G3_y':'G3_Por','G3_x':'G3_Mat'
                    }, inplace=True)

In [25]:
data.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime',
       'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery',
       'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
       'Walc', 'health', 'absences', 'G3_Mat', 'G3_Por'],
      dtype='object')

In [26]:
set(data.famsize)

{'GT3', 'LE3'}

In [27]:
data['AvgAlc'] = ((data.Walc*2) + (data.Dalc*5.0))/7.0
data['alcohol'] = data.AvgAlc.map(lambda x: 'True' if x >= 3 else 'False')        #True: Non-alcoholic

In [28]:
data['higher'].head(2)

0    yes
1    yes
Name: higher, dtype: object

In [29]:
data['Average'] = (data.G3_Mat + data.G3_Por)/2.0
data['performance'] = data.Average.map(lambda x: 'True' if x >= 12 else 'False')  #True: Good performance
data['famsize']   = data.famsize.map(lambda x: 'True' if x == 'GT3' else 'False') #True: Greater than 3, large family
data['famsup']   = data.famsup.map(lambda x: 'True' if x == 'yes' else 'False') 
data['studytime']   = data.studytime.map(lambda x: 'True' if x >= 3 else 'False')  #True: 3 : 5 to 10 hours, or 4 : >10 hours
data['higher']   = data.higher.map(lambda x: 'True' if x == 'yes' else 'False')  
data['sex']   = data.sex.map(lambda x: 'True' if x == 'M' else 'False')           #True: Male
data['freetime']   = data.freetime.map(lambda x: 'True' if x >= 3 else 'False')    #True: 4-5 => range from 1-5 (Very low to Very high)
data['goout']   = data.goout.map(lambda x: 'True' if x >= 3 else 'False')          #True: 4-5 => range from 1-5 (Very low to Very high)

In [30]:
data.head(2)

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,Dalc,Walc,health,absences,G3_Mat,G3_Por,AvgAlc,alcohol,Average,performance
0,GP,False,18,U,True,A,4,4,at_home,teacher,...,1,1,3,6,6,11,1.0,False,8.5,False
1,GP,False,17,U,True,T,1,1,at_home,other,...,1,1,3,4,6,11,1.0,False,8.5,False


In [31]:
data.to_csv("D:\\GRADUATE\\NTHU\\1st Year\\1st Sem\\Introduction to Intelligent Computing\\Final Project\\data.csv")

In [87]:
#Random sampling

num_obs = len(data)
num_train_sample = int((num_obs) * 0.80)

from random import sample
import numpy as np

#create random index
rindex = np.array(sample(range(len(data)),num_train_sample))

#get "num_train_sample" random rows from train

train = data.ix[rindex]
test =  data.drop(rindex)

In [88]:
train.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,Dalc,Walc,health,absences,G3_Mat,G3_Por,AvgAlc,alcohol,Average,performance
3,GP,False,15,U,True,T,4,2,health,services,...,1,1,5,2,15,14,1.0,False,14.5,True
91,GP,True,16,U,True,T,2,2,services,other,...,1,1,3,12,10,11,1.0,False,10.5,False
317,GP,False,17,R,False,T,2,2,services,services,...,2,2,3,3,11,10,2.0,False,10.5,False
314,GP,False,18,U,True,T,4,4,teacher,other,...,3,3,5,2,11,13,3.0,True,12.0,True
349,MS,False,17,R,True,T,4,4,teacher,services,...,1,2,5,4,13,12,1.285714,False,12.5,True


In [89]:
train[['alcohol','sex']].groupby(['alcohol']).count()

Unnamed: 0_level_0,sex
alcohol,Unnamed: 1_level_1
False,276
True,29


In [90]:
len(train[train.alcohol == 'True'])/len(train)

0.09508196721311475

In [91]:
len(test[test.alcohol == 'True'])/len(test)

0.14285714285714285

In [92]:
test[['alcohol','sex']].groupby(['alcohol']).count()

Unnamed: 0_level_0,sex
alcohol,Unnamed: 1_level_1
False,66
True,11


In [93]:
train.to_csv("D:\\GRADUATE\\NTHU\\1st Year\\1st Sem\\Introduction to Intelligent Computing\\Final Project\\train.csv")
test.to_csv("D:\\GRADUATE\\NTHU\\1st Year\\1st Sem\\Introduction to Intelligent Computing\\Final Project\\test.csv")

In [94]:
print(len(train))

305


In [95]:
print(len(test))

77


# Training data

In [96]:
def Count():
    global count,count_,count1,count2,count3,count4,count5,count6,count7,count8,count9,count10,count11,count12,count13,count14,count15,count16,count17,count18,count19,count20,count21,count22,count23,count24,count25,count26,count27,count28,count29,count30
    count=count_=count1=count2=count3=count4=count5=count6=count7=count8=count9=count10=count11=count12=count13=count14=count15=count16=count17=count18=count19=count20=count21=count22=count23=count24=count25=count26=count27=count28=count29=count30=0


In [97]:
#famsize
Count()
for i, j in train.iterrows():
    if train.famsize[i] == "True":
         count = count + 1 
    else:
        count_ = count_ + 1
GT3=count
P_GT3=count/len(train)
LE3=count_
P_LE3=count_/len(train)

In [98]:
print(GT3)

222


In [99]:
print(LE3)

83


In [100]:
#studytime
Count()
for i, j in train.iterrows():
    if (train.studytime[i] == 'True'):
        count = count + 1 
    elif (train.studytime[i] == 'False'):        
        count_ = count_ + 1
FIVE=count
P_FIVE=count/len(train)
LFIVE=count_
P_LFIVE=count_/len(train)
print ("\nStudy time is less than 5 hours:\t", FIVE, P_FIVE)
print ("Study time is more than 5 hours:\t", LFIVE, P_LFIVE)


Study time is less than 5 hours:	 75 0.2459016393442623
Study time is more than 5 hours:	 230 0.7540983606557377


In [101]:
#print("FAMILY SUPPORT given FAMILY SIZE")
Count()
for i, j in train.iterrows():
    if (train.famsup[i] == "True") and  (train.famsize[i] == "True"):
        count = count + 1 
    elif (train.famsup[i] == "True") and  (train.famsize[i] == "False"):
        count_ = count_ + 1
    elif (train.famsup[i] == "False") and  (train.famsize[i] == "True"):        
        count1 = count1 + 1
    elif (train.famsup[i] == "False") and  (train.famsize[i] == "False"):      
        count2 = count2 + 1
SUPSIZE=count
P_SUPSIZE=count/GT3
SUPNSIZE=count_
P_SUPNSIZE=count_/LE3
NSUPSIZE=count1
P_NSUPSIZE=count1/GT3
NSUPNSIZE=count2
P_NSUPNSIZE=count2/LE3

print ("FAMILY SUPPORT and FAMILY SIZE GT3:\t", SUPSIZE, P_SUPSIZE)
print ("FAMILY SUPPORT and FAMILY SIZE LE3:\t", SUPNSIZE, P_SUPNSIZE)
print ("-FAMILY SUPPORT and FAMILY SIZE GT3:\t", NSUPSIZE, P_NSUPSIZE)
print ("-FAMILY SUPPORT and FAMILY SIZE LE3:\t", NSUPNSIZE, P_NSUPNSIZE)

FAMILY SUPPORT and FAMILY SIZE GT3:	 145 0.6531531531531531
FAMILY SUPPORT and FAMILY SIZE LE3:	 46 0.5542168674698795
-FAMILY SUPPORT and FAMILY SIZE GT3:	 77 0.34684684684684686
-FAMILY SUPPORT and FAMILY SIZE LE3:	 37 0.4457831325301205


In [102]:
#print ("FAMILY SUPPORT, STUDYTIME, FAMILY SIZE")      
Count()
for i, j in train.iterrows():
    if (train.famsup[i] == "True") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count = count + 1 
    elif (train.famsup[i] == "True") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count_ = count_ + 1
    elif (train.famsup[i] == "True") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count1 = count1 + 1
    elif (train.famsup[i] == "True") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count2 = count2 + 1
    elif (train.famsup[i] == "False") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count3 = count3 + 1 
    elif (train.famsup[i] == "False") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count4 = count4 + 1
    elif (train.famsup[i] == "False") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count5 = count5 + 1
    elif (train.famsup[i] == "False") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count6 = count6 + 1

SUPSTUDSIZE=count
SUPSTUDNSIZE=count_
SUPNSTUDSIZE=count1
SUPNSTUDNSIZE=count2
NSUPSTUDSIZE=count3
NSUPSTUDNSIZE=count4
NSUPNSTUDSIZE=count5
NSUPNSTUDNSIZE=count6

In [103]:
print("ACADEMIC PERFORMACE given FAMILY SUPPORT, STUDYTIME, FAMILY SIZE")
Count()
for i, j in train.iterrows():
    if (train.Average[i] >= 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count = count + 1 
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count_ = count_ + 1
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count1 = count1 + 1
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count2 = count2 + 1
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count3 = count3 + 1 
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count4 = count4 + 1
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count5 = count5 + 1
    elif (train.Average[i] >= 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count6 = count6 + 1
        
        
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count7 = count7 + 1 
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count8 = count8 + 1
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count9 = count9 + 1
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "True") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count10 = count10 + 1
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "True") and  (train.famsize[i] == "True"):
        count11 = count11 + 1 
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "True") and (train.famsize[i] == "False"):
        count12 = count12 + 1
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "False") and (train.famsize[i] == "True"):        
        count13 = count13 + 1
    elif (train.Average[i] < 12 ) and (train.famsup[i] == "False") and (train.studytime[i] == "False")  and (train.famsize[i] == "False"):      
        count14 = count14 + 1
        

AVESUPSTUDSIZE=count
AVESUPSTUDNSIZE=count_
AVESUPNSTUDSIZE=count1
AVESUPNSTUDNSIZE=count2
AVENSUPSTUDSIZE=count3
AVENSUPSTUDNSIZE=count4
AVENSUPNSTUDSIZE=count5
AVENSUPNSTUDNSIZE=count6
NAVESUPSTUDSIZE=count7
NAVESUPSTUDNSIZE=count8
NAVESUPNSTUDSIZE=count9
NAVESUPNSTUDNSIZE=count10
NAVENSUPSTUDSIZE=count11
NAVENSUPSTUDNSIZE=count12
NAVENSUPNSTUDSIZE=count13
NAVENSUPNSTUDNSIZE=count14

P_AVESUPSTUDSIZE=count/SUPSTUDSIZE
P_AVESUPSTUDNSIZE=count_/SUPSTUDNSIZE
P_AVESUPNSTUDSIZE=count1/SUPNSTUDSIZE
P_AVESUPNSTUDNSIZE=count2/SUPNSTUDNSIZE
P_AVENSUPSTUDSIZE=count3/NSUPSTUDSIZE
P_AVENSUPSTUDNSIZE=count4/NSUPSTUDNSIZE
P_AVENSUPNSTUDSIZE=count5/NSUPNSTUDSIZE
P_AVENSUPNSTUDNSIZE=count6/NSUPNSTUDNSIZE
P_NAVESUPSTUDSIZE=count7/SUPSTUDSIZE
P_NAVESUPSTUDNSIZE=count8/SUPSTUDNSIZE
P_NAVESUPNSTUDSIZE=count9/SUPNSTUDSIZE
P_NAVESUPNSTUDNSIZE=count10/SUPNSTUDNSIZE
P_NAVENSUPSTUDSIZE=count11/NSUPSTUDSIZE
P_NAVENSUPSTUDNSIZE=count12/NSUPSTUDNSIZE
P_NAVENSUPNSTUDSIZE=count13/NSUPNSTUDSIZE
P_NAVENSUPNSTUDNSIZE=count14/NSUPNSTUDNSIZE

print(P_AVESUPSTUDSIZE)
print(P_AVESUPSTUDNSIZE)
print(P_AVESUPNSTUDSIZE)
print(P_AVESUPNSTUDNSIZE)
print(P_AVENSUPSTUDSIZE)
print(P_AVENSUPSTUDNSIZE)
print(P_AVENSUPNSTUDSIZE)
print(P_AVENSUPNSTUDNSIZE)
print ()
print(P_NAVESUPSTUDSIZE)
print(P_NAVESUPSTUDNSIZE)
print(P_NAVESUPNSTUDSIZE)
print(P_NAVESUPNSTUDNSIZE)
print(P_NAVENSUPSTUDSIZE)
print(P_NAVENSUPSTUDNSIZE)
print(P_NAVENSUPNSTUDSIZE)
print(P_NAVENSUPNSTUDNSIZE)


ACADEMIC PERFORMACE given FAMILY SUPPORT, STUDYTIME, FAMILY SIZE
0.5365853658536586
0.8181818181818182
0.41346153846153844
0.4
0.5
0.6666666666666666
0.5263157894736842
0.4411764705882353

0.4634146341463415
0.18181818181818182
0.5865384615384616
0.6
0.5
0.3333333333333333
0.47368421052631576
0.5588235294117647


In [104]:
#print ("ACADEMIC PERFORMANCE, STUDYTIME, FAMILY SUPPORT")      
Count()
for i, j in train.iterrows():
    if (train.Average[i] >= 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count = count + 1 
    elif (train.Average[i] >= 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count_ = count_ + 1
    elif (train.Average[i] >= 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count1 = count1 + 1
    elif (train.Average[i] >= 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count2 = count2 + 1
    elif (train.Average[i] < 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count3 = count3 + 1 
    elif (train.Average[i] < 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count4 = count4 + 1
    elif (train.Average[i] < 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count5 = count5 + 1
    elif (train.Average[i] < 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count6 = count6 + 1

AVESTUDSUP=count
AVESTUDNSUP=count_
AVENSTUDSUP=count1
AVENSTUDNSUP=count2
NAVESTUDSUP=count3
NAVESTUDNSUP=count4
NAVENSTUDSUP=count5
NAVENSTUDNSUP=count6

print ("HIGHER EDUCATION given ACADEMIC PERFORMANCE, STUDYTIME, FAMILY SIZE") 

Count()
for i, j in train.iterrows():
    if (train.higher[i] == "True") and (train.Average[i] >= 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count = count + 1 
    elif (train.higher[i] == "True") and (train.Average[i] >= 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count_ = count_ + 1
    elif (train.higher[i] == "True") and (train.Average[i] >= 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count1 = count1 + 1
    elif (train.higher[i] == "True") and (train.Average[i] >= 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count2 = count2 + 1
    elif (train.higher[i] == "True") and (train.Average[i] < 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count3 = count3 + 1 
    elif (train.higher[i] == "True") and (train.Average[i] < 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count4 = count4 + 1
    elif (train.higher[i] == "True") and (train.Average[i] < 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count5 = count5 + 1
    elif (train.higher[i] == "True") and (train.Average[i] < 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count6 = count6 + 1
    
    elif (train.higher[i] == "False") and (train.Average[i] >= 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count7 = count7 + 1 
    elif (train.higher[i] == "False") and (train.Average[i] >= 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count8 = count8 + 1
    elif (train.higher[i] == "False") and (train.Average[i] >= 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count9 = count9 + 1
    elif (train.higher[i] == "False") and (train.Average[i] >= 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count10 = count10 + 1
    elif (train.higher[i] == "False") and (train.Average[i] < 12) and (train.studytime[i] == "True") and  (train.famsup[i] == "True"):
        count11 = count11 + 1 
    elif (train.higher[i] == "False") and (train.Average[i] < 12) and (train.studytime[i] == "True") and (train.famsup[i] == "False"):
        count12 = count12 + 1
    elif (train.higher[i] == "False") and (train.Average[i] < 12) and (train.studytime[i] == "False") and (train.famsup[i] == "True"):        
        count13 = count13 + 1
    elif (train.higher[i] == "False") and (train.Average[i] < 12) and (train.studytime[i] == "False")  and (train.famsup[i] == "False"):      
        count14 = count14 + 1

HIGHAVESTUDSUP=count
HIGHAVESTUDNSUP=count_
HIGHAVENSTUDSUP=count1
HIGHAVENSTUDNSUP=count2
HIGHNAVESTUDSUP=count3
HIGHNAVESTUDNSUP=count4
HIGHNAVENSTUDSUP=count5
HIGHNAVENSTUDNSUP=count6

NHIGHAVESTUDSUP=count7
NHIGHAVESTUDNSUP=count8
NHIGHAVENSTUDSUP=count9
NHIGHAVENSTUDNSUP=count10
NHIGHNAVESTUDSUP=count11
NHIGHNAVESTUDNSUP=count12
NHIGHNAVENSTUDSUP=count13
NHIGHNAVENSTUDNSUP=count14

P_HIGHAVESTUDSUP=count/AVESTUDSUP
P_HIGHAVESTUDNSUP=count_/AVESTUDNSUP
P_HIGHAVENSTUDSUP=count1/AVENSTUDSUP
P_HIGHAVENSTUDNSUP=count2/AVENSTUDNSUP
P_HIGHNAVESTUDSUP=count3/NAVESTUDSUP
P_HIGHNAVESTUDNSUP=count4/NAVESTUDNSUP
P_HIGHNAVENSTUDSUP=count5/NAVENSTUDSUP
P_HIGHNAVENSTUDNSUP=count6/NAVENSTUDNSUP
P_NHIGHAVESTUDSUP=count7/AVESTUDSUP
P_NHIGHAVESTUDNSUP=count8/AVESTUDNSUP
P_NHIGHAVENSTUDSUP=count9/AVENSTUDSUP
P_NHIGHAVENSTUDNSUP=count10/AVENSTUDNSUP
P_NHIGHNAVESTUDSUP=count11/NAVESTUDSUP
P_NHIGHNAVESTUDNSUP=count12/NAVESTUDNSUP
P_NHIGHNAVENSTUDSUP=count13/NAVENSTUDSUP
P_NHIGHNAVENSTUDNSUP=count14/NAVENSTUDNSUP

print(P_HIGHAVESTUDSUP)
print(P_HIGHAVESTUDNSUP)
print(P_HIGHAVENSTUDSUP)
print(P_HIGHAVENSTUDNSUP)
print(P_HIGHNAVESTUDSUP)
print(P_HIGHNAVESTUDNSUP)
print(P_HIGHNAVENSTUDSUP)
print(P_HIGHNAVENSTUDNSUP)
print ()
print(P_NHIGHAVESTUDSUP)
print(P_NHIGHAVESTUDNSUP)
print(P_NHIGHAVENSTUDSUP)
print(P_NHIGHAVENSTUDNSUP)
print(P_NHIGHNAVESTUDSUP)
print(P_NHIGHNAVESTUDNSUP)
print(P_NHIGHNAVENSTUDSUP)
print(P_NHIGHNAVENSTUDNSUP)

HIGHER EDUCATION given ACADEMIC PERFORMANCE, STUDYTIME, FAMILY SIZE
1.0
1.0
1.0
1.0
1.0
1.0
0.9146341463414634
0.8478260869565217

0.0
0.0
0.0
0.0
0.0
0.0
0.08536585365853659
0.15217391304347827


In [105]:
#with np.errstate(divide='ignore'):
#print ("STUDY TIME, HIGHER")    
Count()
for i, j in train.iterrows():
        if (train.studytime[i] == "True") and  (train.higher[i] == "True"):
            count = count + 1 
        elif (train.studytime[i] == "True") and  (train.higher[i] == "False"):
            count_ = count_ + 1
        elif (train.studytime[i] == "False") and  (train.higher[i] == "True"):        
            count1 = count1 + 1
        elif (train.studytime[i]  == "False") and  (train.higher[i] == "False"):      
            count2 = count2 + 1
STUDHIGH = count
STUDNHIGH = count_ # value is 0
NSTUDHIGH = count1
NSTUDNHIGH = count2

print ("SEX given STUDY TIME, HIGHER") 
Count()
for i, j in train.iterrows():
    if (train.sex[i] == "True") and (train.studytime[i] == "True") and  (train.higher[i] == "True"):
        count = count + 1 
    elif (train.sex[i] == "True") and (train.studytime[i] == "True") and  (train.higher[i] == "False"):
        count_ = count_ + 1
    elif (train.sex[i] == "True") and (train.studytime[i] == "False") and  (train.higher[i] == "True"):        
        count1 = count1 + 1
    elif (train.sex[i] == "True") and (train.studytime[i]  == "False") and  (train.higher[i] == "False"):      
        count2 = count2 + 1
    
    elif (train.sex[i] == "False") and (train.studytime[i] == "True") and  (train.higher[i] == "True"):
        count3 = count3 + 1 
    elif (train.sex[i] == "False") and (train.studytime[i] == "True") and  (train.higher[i] == "False"):
        count4 = count4 + 1
    elif (train.sex[i] ==  "F") and (train.studytime[i] == "False") and  (train.higher[i] == "True"):        
        count5 = count5 + 1
    elif (train.sex[i] == "False") and (train.studytime[i]  == "False") and  (train.higher[i] == "False"):      
        count6 = count6 + 1
MSTUDHIGH = count
MSTUDNHIGH = count_ # value is 0
MNSTUDHIGH = count1
MNSTUDNHIGH = count2
FSTUDHIGH = count3
FSTUDNHIGH = count4 # value is 0
FNSTUDHIGH = count5
FNSTUDNHIGH = count6


P_MSTUDHIGH = count/STUDHIGH
P_MSTUDNHIGH = 0 #count_/STUDNHIGH # value is 0
P_MNSTUDHIGH = count1/NSTUDHIGH
P_MNSTUDNHIGH = count2/NSTUDNHIGH
P_FSTUDHIGH = count3/STUDHIGH
P_FSTUDNHIGH = 0 #count4/STUDNHIGH # value is 0
P_FNSTUDHIGH = count5/NSTUDHIGH
P_FNSTUDNHIGH = count6/NSTUDNHIGH

print(P_MSTUDHIGH)
print(MSTUDNHIGH)
print(P_MNSTUDHIGH)
print(P_MNSTUDNHIGH)
print()
print(P_FSTUDHIGH)
print(FSTUDNHIGH)
print(P_FNSTUDHIGH)
print(P_FNSTUDNHIGH)

SEX given STUDY TIME, HIGHER
0.26666666666666666
0
0.5370370370370371
0.9285714285714286

0.7333333333333333
0
0.0
0.07142857142857142


In [106]:
#print ("SEX, HIGHER")    
Count()
for i, j in train.iterrows():
    if (train.sex[i] == "True") and  (train.higher[i] == "True"):
        count = count + 1 
    elif (train.sex[i] == "True") and  (train.higher[i] == "False"):
        count_ = count_ + 1
    elif (train.sex[i] == "False") and  (train.higher[i] == "True"):        
        count1 = count1 + 1
    elif (train.sex[i]  == "False") and  (train.higher[i] == "False"):      
        count2 = count2 + 1
MHIGH = count
MNHIGH = count_ 
FHIGH = count1
FNHIGH = count2

print ("FREETIME given SEX, HIGHER")    
Count()
for i, j in train.iterrows():
    if (train.freetime[i] == "True") and (train.sex[i] == "True") and  (train.higher[i] == "True"):
        count = count + 1 
    elif (train.freetime[i] == "True") and (train.sex[i] == "True") and  (train.higher[i] == "False"):
        count_ = count_ + 1
    elif (train.freetime[i] == "True") and (train.sex[i] == "False") and  (train.higher[i] == "True"):        
        count1 = count1 + 1
    elif (train.freetime[i] == "True") and (train.sex[i]  == "False") and  (train.higher[i] == "False"):      
        count2 = count2 + 1
        
    elif (train.freetime[i] == "False") and (train.sex[i] == "True") and  (train.higher[i] == "True"):
        count3 = count3 + 1 
    elif (train.freetime[i] == "False") and (train.sex[i] == "True") and  (train.higher[i] == "False"):
        count4 = count4 + 1
    elif (train.freetime[i] == "False") and (train.sex[i] == "False") and  (train.higher[i] == "True"):        
        count5 = count5 + 1
    elif (train.freetime[i] == "False") and (train.sex[i]  == "False") and  (train.higher[i] == "False"):      
        count6 = count6 + 1
        
FREEMHIGH = count
FREEMNHIGH = count_ 
FREEFHIGH = count1
FREEFNHIGH = count2        
NFREEMHIGH = count3
NFREEMNHIGH = count4 
NFREEFHIGH = count5
NFREEFNHIGH = count6

P_FREEMHIGH = count/MHIGH
P_FREEMNHIGH = count_/MNHIGH
P_FREEFHIGH = count1/FHIGH
P_FREEFNHIGH = count2/FNHIGH     
P_NFREEMHIGH = count3/MHIGH
P_NFREEMNHIGH = count4/MNHIGH
P_NFREEFHIGH = count5/FHIGH
P_NFREEFNHIGH = count6/FNHIGH

print(P_FREEMHIGH)
print(P_FREEMNHIGH)
print(P_FREEFHIGH)
print(P_FREEFNHIGH)
print()
print(P_NFREEMHIGH) 
print(P_NFREEMNHIGH)
print(P_NFREEFHIGH) 
print(P_NFREEFNHIGH)

FREETIME given SEX, HIGHER
0.8014705882352942
0.9230769230769231
0.7548387096774194
1.0

0.19852941176470587
0.07692307692307693
0.24516129032258063
0.0


In [107]:
#print ("SEX, OUT")    
Count()
for i, j in train.iterrows():
    if (train.sex[i] == "True") and  (train.freetime[i] == "True"):
        count = count + 1 
    elif (train.sex[i] == "True") and  (train.freetime[i] == "False"):
        count_ = count_ + 1
    elif (train.sex[i] == "False") and  (train.freetime[i] == "True"):        
        count1 = count1 + 1
    elif (train.sex[i]  == "False") and  (train.freetime[i] == "False"):      
        count2 = count2 + 1
MFREE = count
MNFREE = count_ 
FFREE = count1
FNFREE = count2


print ("GO OUT given SEX, OUT")    
Count()
for i, j in train.iterrows():
    if (train.goout[i] == "True") and (train.sex[i] == "True") and  (train.freetime[i] == "True"):
        count = count + 1 
    elif (train.goout[i] == "True") and (train.sex[i] == "True") and  (train.freetime[i] == "False"):
        count_ = count_ + 1
    elif (train.goout[i] == "True") and (train.sex[i] == "False") and  (train.freetime[i] == "True"):        
        count1 = count1 + 1
    elif (train.goout[i] == "True") and (train.sex[i]  == "False") and  (train.freetime[i] == "False"):      
        count2 = count2 + 1
    elif (train.goout[i] == "False") and (train.sex[i] == "True") and  (train.freetime[i] == "True"):
        count3 = count3 + 1 
    elif (train.goout[i] == "False") and (train.sex[i] == "True") and  (train.freetime[i] == "False"):
        count4 = count4 + 1
    elif (train.goout[i] == "False") and (train.sex[i] == "False") and  (train.freetime[i] == "True"):        
        count5 = count5 + 1
    elif (train.goout[i] == "False") and (train.sex[i]  == "False") and  (train.freetime[i] == "False"):      
        count6 = count6 + 1

OUTMFREE = count
OUTMNFREE = count_
OUTFFREE = count1
OUTFNFREE = count2
NOUTMFREE = count3
NOUTMNFREE = count4
NOUTFFREE = count5
NOUTFNFREE = count6


P_OUTMFREE = count/MFREE
P_OUTMNFREE = count_/MNFREE
P_OUTFFREE = count1/FFREE
P_OUTFNFREE = count2/FNFREE
P_NOUTMFREE = count3/MFREE
P_NOUTMNFREE = count4/MNFREE
P_NOUTFFREE = count5/FFREE
P_NOUTFNFREE = count6/FNFREE

print(P_OUTMFREE)
print(P_OUTMNFREE)
print(P_OUTFFREE)
print(P_OUTFNFREE)
print()
print(P_NOUTMFREE)
print(P_NOUTMNFREE)
print(P_NOUTFFREE)
print(P_NOUTFNFREE)

GO OUT given SEX, OUT
0.71900826446281
0.6071428571428571
0.7711864406779662
0.39473684210526316

0.2809917355371901
0.39285714285714285
0.2288135593220339
0.6052631578947368


In [108]:
#print ("SEX, GO OUT, FREEETIME, STUDY TIME") 

Count()
for i, j in train.iterrows():
    if (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count = count + 1 
    elif (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count_ = count_ + 1
    elif (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count1 = count1 + 1
    elif (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count2 = count2 + 1
    elif (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count3 = count3 + 1 
    elif (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count4 = count4 + 1
    elif (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count5 = count5 + 1
    elif (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count6 = count6 + 1 
    elif (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count7 = count7 + 1 
    elif (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count8 = count8 + 1
    elif (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count9 = count9 + 1
    elif (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count10 = count10 + 1
    elif (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count11 = count11 + 1 
    elif (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count12 = count12 + 1
    elif (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count13 = count13 + 1
    elif (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count14 = count14 + 1

MOUTFREESTUD=count
MOUTFREENSTUD=count_
MOUTNFREESTUD=count1
MOUTNFREENSTUD=count2
MNOUTFREESTUD=count3
MNOUTFREENSTUD=count4
MNOUTNFREESTUD=count5
MNOUTNFREENSTUD=count6
FOUTFREESTUD=count7
FOUTFREENSTUD=count8
FOUTNFREESTUD=count9
FOUTNFREENSTUD=count10
FNOUTFREESTUD=count11
FNOUTFREENSTUD=count12
FNOUTNFREESTUD=count13
FNOUTNFREENSTUD=count14



print ("alcohol CONSUMPTION given SEX, GO OUT, FREEETIME, STUDY TIME") 
Count()
for i, j in train.iterrows():        
    if (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count = count + 1 
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count_ = count_ + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count1 = count1 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count2 = count2 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count3 = count3 + 1 
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count4 = count4 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count5 = count5 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count6 = count6 + 1 
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count7 = count7 + 1 
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count8 = count8 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count9 = count9 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count10 = count10 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count11 = count11 + 1 
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count12 = count12 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count13 = count13 + 1
    elif (train.alcohol[i] == "True") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count14 = count14 + 1  
    
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count15 = count15 + 1 
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count16 = count16 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count17 = count17 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count18 = count18 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count19 = count19 + 1 
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count20 = count20 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count21 = count21 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "True") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count22 = count22 + 1 
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count23 = count23 + 1 
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count24 = count24 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):      
        count25 = count25 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "True") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):      
        count26 = count26 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "True"):
        count27 = count27 + 1 
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "True") and (train.studytime[i] == "False"):
        count28 = count28 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "True"):       
        count29 = count29 + 1
    elif (train.alcohol[i] == "False") and (train.sex[i] == "False") and (train.goout[i] == "False") and (train.freetime[i] == "False") and (train.studytime[i] == "False"):   
        count30 = count30 + 1 

ALCMOUTFREESTUD=count
ALCMOUTFREENSTUD=count_
ALCMOUTNFREESTUD=count1
ALCMOUTNFREENSTUD=count2
ALCMNOUTFREESTUD=count3
ALCMNOUTFREENSTUD=count4
ALCMNOUTNFREESTUD=count5
ALCMNOUTNFREENSTUD=count6
ALCFOUTFREESTUD=count7
ALCFOUTFREENSTUD=count8
ALCFOUTNFREESTUD=count9
ALCFOUTNFREENSTUD=count10
ALCFNOUTFREESTUD=count11
ALCFNOUTFREENSTUD=count12
ALCFNOUTNFREESTUD=count13
ALCFNOUTNFREENSTUD=count14
    
NALCMOUTFREESTUD=count15
NALCMOUTFREENSTUD=count16
NALCMOUTNFREESTUD=count17
NALCMOUTNFREENSTUD=count18
NALCMNOUTFREESTUD=count19
NALCMNOUTFREENSTUD=count20
NALCMNOUTNFREESTUD=count21
NALCMNOUTNFREENSTUD=count22
NALCFOUTFREESTUD=count23
NALCFOUTFREENSTUD=count24
NALCFOUTNFREESTUD=count25
NALCFOUTNFREENSTUD=count26
NALCFNOUTFREESTUD=count27
NALCFNOUTFREENSTUD=count28
NALCFNOUTNFREESTUD=count29
NALCFNOUTNFREENSTUD=count30

P_ALCMOUTFREESTUD=count/MOUTFREESTUD
P_ALCMOUTFREENSTUD=count_/MOUTFREENSTUD
P_ALCMOUTNFREESTUD=count1/MOUTNFREESTUD
P_ALCMOUTNFREENSTUD=count2/MOUTNFREENSTUD
P_ALCMNOUTFREESTUD=count3/MNOUTFREESTUD
P_ALCMNOUTFREENSTUD=count4/MNOUTFREENSTUD
P_ALCMNOUTNFREESTUD=count5/MNOUTNFREESTUD
P_ALCMNOUTNFREENSTUD=count6/MNOUTNFREENSTUD
P_ALCFOUTFREESTUD=count7/FOUTFREESTUD
P_ALCFOUTFREENSTUD=count8/FOUTFREENSTUD
P_ALCFOUTNFREESTUD=count9/FOUTNFREESTUD
P_ALCFOUTNFREENSTUD=count10/FOUTNFREENSTUD
P_ALCFNOUTFREESTUD=count11/FNOUTFREESTUD
P_ALCFNOUTFREENSTUD=count12/FNOUTFREENSTUD
P_ALCFNOUTNFREESTUD=count13/FNOUTNFREESTUD
P_ALCFNOUTNFREENSTUD=count14/FNOUTNFREENSTUD
    
P_NALCMOUTFREESTUD=count15/MOUTFREESTUD
P_NALCMOUTFREENSTUD=count16/MOUTFREENSTUD
P_NALCMOUTNFREESTUD=count17/MOUTNFREESTUD
P_NALCMOUTNFREENSTUD=count18/MOUTNFREENSTUD
P_NALCMNOUTFREESTUD=count19/MNOUTFREESTUD
P_NALCMNOUTFREENSTUD=count20/MNOUTFREENSTUD
P_NALCMNOUTNFREESTUD=count21/MNOUTNFREESTUD
P_NALCMNOUTNFREENSTUD=count22/MNOUTNFREENSTUD
P_NALCFOUTFREESTUD=count23/FOUTFREESTUD
P_NALCFOUTFREENSTUD=count24/FOUTFREENSTUD
P_NALCFOUTNFREESTUD=count25/FOUTNFREESTUD
P_NALCFOUTNFREENSTUD=count26/FOUTNFREENSTUD
P_NALCFNOUTFREESTUD=count27/FNOUTFREESTUD
P_NALCFNOUTFREENSTUD=count28/FNOUTFREENSTUD
P_NALCFNOUTNFREESTUD=count29/FNOUTNFREESTUD
P_NALCFNOUTNFREENSTUD=count30/FNOUTNFREENSTUD

print(P_ALCMOUTFREESTUD)
print(P_ALCMOUTFREENSTUD)
print(P_ALCMOUTNFREESTUD)
print(P_ALCMOUTNFREENSTUD)
print(P_ALCMNOUTFREESTUD)
print(P_ALCMNOUTFREENSTUD)
print(P_ALCMNOUTNFREESTUD)
print(P_ALCMNOUTNFREENSTUD)
print(P_ALCFOUTFREESTUD)
print(P_ALCFOUTFREENSTUD)
print(P_ALCFOUTNFREESTUD)
print(P_ALCFOUTNFREENSTUD)
print(P_ALCFNOUTFREESTUD)
print(P_ALCFNOUTFREENSTUD)
print(P_ALCFNOUTNFREESTUD)
print(P_ALCFNOUTNFREENSTUD)
print()  
print(P_NALCMOUTFREESTUD)
print(P_NALCMOUTFREENSTUD)
print(P_NALCMOUTNFREESTUD)
print(P_NALCMOUTNFREENSTUD)
print(P_NALCMNOUTFREESTUD)
print(P_NALCMNOUTFREENSTUD)
print(P_NALCMNOUTNFREESTUD)
print(P_NALCMNOUTNFREENSTUD)
print(P_NALCFOUTFREESTUD)
print(P_NALCFOUTFREENSTUD)
print(P_NALCFOUTNFREESTUD)
print(P_NALCFOUTNFREENSTUD)
print(P_NALCFNOUTFREESTUD)
print(P_NALCFNOUTFREENSTUD)
print(P_NALCFNOUTNFREESTUD)
print(P_NALCFNOUTNFREENSTUD)      

alcohol CONSUMPTION given SEX, GO OUT, FREEETIME, STUDY TIME
0.0
0.21333333333333335
0.0
0.125
0.0
0.13333333333333333
0.0
0.125
0.03225806451612903
0.08333333333333333
0.0
0.0
0.0
0.0
0.0
0.0

1.0
0.7866666666666666
1.0
0.875
1.0
0.8666666666666667
1.0
0.875
0.967741935483871
0.9166666666666666
1.0
1.0
1.0
1.0
1.0
1.0


# Model

In [109]:
train.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime',
       'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery',
       'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
       'Walc', 'health', 'absences', 'G3_Mat', 'G3_Por', 'AvgAlc', 'alcohol',
       'Average', 'performance'],
      dtype='object')

In [110]:
T,F = True,False

In [111]:
data.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime',
       'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery',
       'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
       'Walc', 'health', 'absences', 'G3_Mat', 'G3_Por', 'AvgAlc', 'alcohol',
       'Average', 'performance'],
      dtype='object')

In [119]:
acad = BayesModel([
        
    ('famsize','', P_GT3),
    ('studytime','',P_FIVE),
        
    ('famsup', 'famsize',{ T:P_SUPSIZE, F:P_SUPNSIZE}),
    
    ('performance','famsup studytime famsize', {
                (T,T,T): P_AVESUPSTUDSIZE,
                (T,T,F): P_AVESUPSTUDNSIZE,
                (T,F,T): P_AVESUPNSTUDSIZE,
                (T,F,F): P_AVESUPNSTUDNSIZE,
                (F,T,T): P_AVENSUPSTUDSIZE,
                (F,T,F): P_AVENSUPSTUDNSIZE,
                (F,F,T): P_AVENSUPNSTUDSIZE,
                (F,F,F): P_AVENSUPNSTUDNSIZE
            }),
        
        
    ('higher','performance studytime famsize', {
                (T,T,T): P_HIGHAVESTUDSUP,
                (T,T,F): P_HIGHAVESTUDNSUP,
                (T,F,T): P_HIGHAVENSTUDSUP,
                (T,F,F): P_HIGHAVENSTUDNSUP,
                (F,T,T): P_HIGHNAVESTUDSUP,
                (F,T,F): P_HIGHNAVESTUDNSUP,
                (F,F,T): P_HIGHNAVENSTUDSUP,
                (F,F,F): P_HIGHNAVENSTUDNSUP
            }),
        
    ('sex','studytime higher', {
                (T,T): P_MSTUDHIGH,
                (T,F): P_MSTUDNHIGH,
                (F,T): P_MNSTUDHIGH,
                (F,F): P_MNSTUDNHIGH
            }),
    ('freetime', 'sex higher',{
                (T,T): P_FREEMHIGH,
                (T,F): P_FREEMNHIGH,
                (F,T): P_FREEFHIGH,
                (F,F): P_FREEFNHIGH
            }),
        
    ('goout','sex freetime', {
                (T,T): P_OUTMFREE,
                (T,F): P_OUTMNFREE,
                (F,T): P_OUTFFREE,
                (F,F): P_OUTFNFREE
            }),
        

    ('alcohol','sex goout freetime studytime',{
                (T,T,T,T): P_ALCMOUTFREESTUD , 
                (T,T,T,F): P_ALCMOUTFREENSTUD ,                       
                (T,T,F,T): P_ALCMOUTNFREESTUD,
                (T,T,F,F): P_ALCMOUTNFREENSTUD,
                (T,F,T,T): P_ALCMNOUTFREESTUD,
                (T,F,T,F): P_ALCMNOUTFREENSTUD,
                (T,F,F,T): P_ALCMNOUTNFREESTUD  ,                          
                (T,F,F,F): P_ALCMNOUTNFREENSTUD,
                (F,T,T,T): P_ALCFOUTFREESTUD,
                (F,T,T,F): P_ALCFOUTFREENSTUD,
                (F,T,F,T): P_ALCFOUTNFREESTUD,
                (F,T,F,F): P_ALCFOUTNFREENSTUD   ,                   
                (F,F,T,T): P_ALCFNOUTFREESTUD,
                (F,F,T,F): P_ALCFNOUTFREENSTUD,
                (F,F,F,T): P_ALCFNOUTNFREESTUD,
                (F,F,F,F): P_ALCFNOUTNFREENSTUD
            })
    ])

In [122]:
gibbs('alcohol',dict(sex=F),acad,2)

{False: 1.0, True: 0.0}

# Evaluate

In [123]:
test.columns

Index(['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu',
       'Mjob', 'Fjob', 'reason', 'guardian', 'traveltime', 'studytime',
       'failures', 'schoolsup', 'famsup', 'paid', 'activities', 'nursery',
       'higher', 'internet', 'romantic', 'famrel', 'freetime', 'goout', 'Dalc',
       'Walc', 'health', 'absences', 'G3_Mat', 'G3_Por', 'AvgAlc', 'alcohol',
       'Average', 'performance'],
      dtype='object')

In [None]:
data['Average'] = (data.G3_Mat + data.G3_Por)/2.0
data['performance'] = data.Average.map(lambda x: 'True' if x >= 12 else 'False')  #True: Good performance
data['famsize']   = data.famsize.map(lambda x: 'True' if x == 'GT3' else 'False') #True: Greater than 3, large family
data['famsup']   = data.famsup.map(lambda x: 'True' if x == 'yes' else 'False') 
data['studytime']   = data.studytime.map(lambda x: 'True' if x >= 3 else 'False')  #True: 3 : 5 to 10 hours, or 4 : >10 hours
data['higher']   = data.higher.map(lambda x: 'True' if x == 'yes' else 'False')  
data['sex']   = data.sex.map(lambda x: 'True' if x == 'M' else 'False')           #True: Male
data['freetime']   = data.freetime.map(lambda x: 'True' if x >= 3 else 'False')    #True: 4-5 => range from 1-5 (Very low to Very high)
data['goout']   = data.goout.map(lambda x: 'True' if x >= 3 else 'False')          #True: 4-5 => range from 1-5 (Very low to Very high)

In [132]:
test = test[['famsize','famsup','performance','studytime',
        'higher','sex','freetime','goout','alcohol']]

In [133]:
test.head(2)

Unnamed: 0,famsize,famsup,performance,studytime,higher,sex,freetime,goout,alcohol
0,F,F,F,False,F,F,F,F,F
2,F,F,F,False,F,F,F,F,F


In [134]:
test['famsize'] = test.famsize.map(lambda x: 'T' if x == 'True' else 'F' )
test['famsup'] = test.famsup.map(lambda x: 'T' if x == 'True' else 'F' )
test['performance'] = test.performance.map(lambda x: 'T' if x == 'True' else 'F' )
test['studytime']   = test.studytime.map(lambda x: 'T' if x == 'True' else 'F' )
test['higher']   = test.higher.map(lambda x: 'T' if x == 'True' else 'F' )
test['sex']   = test.sex.map(lambda x: 'T' if x == 'True' else 'F' )
test['freetime']   = test.freetime.map(lambda x: 'T' if x == 'True' else 'F' )
test['goout']   = test.goout.map(lambda x: 'T' if x == 'True' else 'F' )
test['alcohol']   = test.goout.map(lambda x: 'T' if x == 'True' else 'F' )

In [135]:
test.head(2)

Unnamed: 0,famsize,famsup,performance,studytime,higher,sex,freetime,goout,alcohol
0,F,F,F,F,F,F,F,F,F
2,F,F,F,F,F,F,F,F,F


In [137]:
for i in test['studytime'][:5]:
    print(i)

F
F
F
F
F


In [138]:
gibbs('alcohol', dict(famsize=T,famsup=T,performance=T,studytime=T,
                      higher=T,sex=T,freetime=T,goout=T), acad, 100)

{False: 1.0, True: 0.0}

In [None]:
test.head(2)

In [139]:
result_test = []
for index, row in test.iterrows():
    result_test.append(gibbs('alcohol', 
                                 
        dict(famsize=bool(row['famsize']),famsup=bool(row['famsup']),performance=bool(row['performance']),
             studytime=bool(row['studytime']),higher=bool(row['higher']),sex=bool(row['sex']),
             freetime=bool(row['freetime']),goout=bool(row['goout'])     
            ), acad,1000)[True])

In [140]:
test = test.reset_index()

In [141]:
test

Unnamed: 0,index,famsize,famsup,performance,studytime,higher,sex,freetime,goout,alcohol
0,0,F,F,F,F,F,F,F,F,F
1,2,F,F,F,F,F,F,F,F,F
2,4,F,F,F,F,F,F,F,F,F
3,5,F,F,F,F,F,F,F,F,F
4,7,F,F,F,F,F,F,F,F,F
5,13,F,F,F,F,F,F,F,F,F
6,14,F,F,F,T,F,F,F,F,F
7,19,F,F,F,F,F,F,F,F,F
8,22,F,F,F,F,F,F,F,F,F
9,28,F,F,F,F,F,F,F,F,F


In [170]:
print ('Actual Received True     = ' , len(Table.loc[Table['Actual'] == 1]))
print ('Actual Received False    = ' , len(Table.loc[Table['Actual'] == 0]))
print ('Predicted Received True  = ' , len(Table.loc[Table['Predicted'] == 1 ]))
print ('Predicted Received False = ' , len(Table.loc[Table['Predicted'] < 0.5]))
print ('')
print ('True Positive  = ', len(Table[(Table['Predicted'] >= 0.5) & (Table['Actual'] == 1)]))
print ('False Positive = ', len(Table[(Table['Predicted'] >= 0.5) & (Table['Actual'] == 0)]))
print ('True Negative  = ', len(Table[(Table['Predicted'] < 0.5) & (Table['Actual'] == 0)]))
print ('False Negative = ', len(Table[(Table['Predicted'] < 0.5) & (Table['Actual'] == 1)]))


Actual Received True     =  0
Actual Received False    =  77
Predicted Received True  =  0
Predicted Received False =  77

True Positive  =  0
False Positive =  0
True Negative  =  77
False Negative =  0


# Inference evaluation

In [158]:
gibbs('alcohol', dict(sex=T), acad, 100)

{False: 0.83125, True: 0.16875}

In [162]:
gibbs('alcohol', dict(goout = T), acad, 100)

{False: 0.86125, True: 0.13875}

In [164]:
gibbs('alcohol', dict(freetime = T), acad, 100)

{False: 0.92, True: 0.08}

In [165]:
gibbs('alcohol', dict(studytime = T), acad, 100)

{False: 1.0, True: 0.0}

In [168]:
gibbs('alcohol', dict(sex = T,goout=F), acad, 100)

{False: 0.8985714285714286, True: 0.10142857142857142}

In [169]:
gibbs('alcohol', dict(sex = T,goout=F, freetime = T), acad, 100)

{False: 0.8416666666666667, True: 0.15833333333333333}

In [175]:
gibbs('alcohol', dict(sex = T,goout=F, freetime = F), acad, 100)

{False: 0.8683333333333333, True: 0.13166666666666665}