In [77]:
""" 
    Naive Bayes Classifier program by Robert Simari <rsimari> 
    Date: 2/9/18
    Tables and Probabilities are displayed below
"""

data_file = "tic-tac-toe"
"""
    tic-tac-toe board locations in data structure
    0 1 2
    3 4 5
    6 7 8
    result = 9
"""
data = []
with open(data_file + ".data") as f:
    line = f.readline().rstrip()
    while line:
        data.append(line.split())
        line = f.readline().rstrip()

        
def compute_probability(data, index, feature = True):
    """
        @param [list[list], int, bool]: data is a 2 array of data, index is the number of feature you want,
        feature is whether or not it should compute it as a feature or not
        @return [dict]: returns dictionary of values i.e. {"x": {"positive": 0.3, "negative": 0.2}, ... }
    """
    if index < 0 or index >= len(data[0]):
        raise Exception("Error: Index out of bounds in compute_probability")
    
    res = {}
    
    """ this means that they are doing probability for a feature """
    if feature:    
        for instance in data:
            result = instance[9]
            feature_val = instance[index]
            if feature_val in res:
                if result in res[feature_val]:
                    res[feature_val][result] += 1
                else:
                    res[feature_val][result] = 1
            else:
                res[feature_val] = {}
                res[feature_val][result] = 1
    else:
        """ doing probability for a result """
        for instance in data:
            result = instance[index]
            if result in res:
                res[result] += 1
            else:
                res[result] = 1
    
    if feature:
        """ turns all values into probabilities """
        outcomes = list(res[list(res)[0]])
        for i in outcomes:
            total_outcome = 0
            for key in res:
                total_outcome += res[key][i]
            for key in res:
                res[key][i] = res[key][i] / total_outcome
            """ sanity check for total probability """
            total_prob = 0
            for key in res:
                total_prob += res[key][i]
            if total_prob != 1:
                raise Exception("Warning: Probabilities did not sum to 1")
    else:
        """ turns all outcome values into probabilities """
        outcomes = list(res)
        total_outcome = 0
        for i in outcomes:
            total_outcome += res[i]
        for i in outcomes:
            res[i] = res[i] / total_outcome
        """ sanity check for outcome """
        total_prob = 0
        for i in outcomes:
            total_prob += res[i]
        if total_prob != 1:
            raise Exception("Warning: Probabilities did not sum to 1")

    return res

In [97]:
p_outcome = compute_probability(data, 9, False)

In [105]:
top_left      = compute_probability(data, 0)
top_middle    = compute_probability(data, 1)
top_right     = compute_probability(data, 2)
middle_left   = compute_probability(data, 3)
middle_middle = compute_probability(data, 4)
middle_right  = compute_probability(data, 5)
bottom_left   = compute_probability(data, 6)
bottom_middle = compute_probability(data, 7)
bottom_right  = compute_probability(data, 8)

feature_data = [top_left, top_middle, top_right, middle_left, middle_middle, middle_right, bottom_left, bottom_middle, bottom_right]

In [102]:
from IPython.display import display, Markdown
def display_feature_table(probs, name):
    """
        @param [dict[dict], string]: probabilities from compute_probabilities, name of table
        @returns [None]
    """
    feature_vals = list(probs)
    outcomes = list(probs[list(probs)[0]])
    display(Markdown("""| """ + name + """ | """ + outcomes[0] + """ | """ + outcomes[1] + """ |
| --- | --- | --- |
| """ + feature_vals[0] + """ | """ + str(probs[feature_vals[0]][outcomes[0]]) + """ | """ + str(probs[feature_vals[0]][outcomes[1]]) + """ |
| """ + feature_vals[1] + """ | """ + str(probs[feature_vals[1]][outcomes[0]]) + """ | """ + str(probs[feature_vals[1]][outcomes[1]]) + """ |
| """ + feature_vals[2] + """ | """ + str(probs[feature_vals[2]][outcomes[0]]) + """ | """ + str(probs[feature_vals[2]][outcomes[1]]) + """ |
    """))
    
def display_outcome_table(probs, name):
    """
        @param [dict, string]: probabilities of outcomes, name of table
        @returns [None]
    """
    outcomes = list(probs)
    display(Markdown("""| """ + name + """ | """ + outcomes[0] + """ | """ + outcomes[1] + """ |
| --- | --- | --- |
|  | """ + str(probs[outcomes[0]]) + """ | """ + str(probs[outcomes[1]]) + """ | """))

In [96]:
display_feature_table(top_left, "Top-Left-Square")
display_feature_table(top_middle, "Top-Middle-Square")
display_feature_table(top_right, "Top-Right-Square")
display_feature_table(middle_left, "Middle-Left-Square")
display_feature_table(middle_middle, "Middle-Middle-Square")
display_feature_table(middle_right, "Middle-Right-Square")
display_feature_table(bottom_left, "Bottom-Left-Square")
display_feature_table(bottom_middle, "Bottom-Middle-Square")
display_feature_table(bottom_right, "Bottom-Right-Square")

| Top-Left-Square | positive | negative |
| --- | --- | --- |
| x | 0.46178861788617886 | 0.37383177570093457 |
| o | 0.3073170731707317 | 0.4392523364485981 |
| b | 0.23089430894308943 | 0.18691588785046728 |
    

| Top-Middle-Square | positive | negative |
| --- | --- | --- |
| x | 0.34796747967479674 | 0.45482866043613707 |
| o | 0.3723577235772358 | 0.3115264797507788 |
| b | 0.27967479674796747 | 0.2336448598130841 |
    

| Top-Right-Square | positive | negative |
| --- | --- | --- |
| x | 0.46178861788617886 | 0.37694704049844235 |
| o | 0.3073170731707317 | 0.42679127725856697 |
| b | 0.23089430894308943 | 0.19626168224299065 |
    

| Middle-Left-Square | positive | negative |
| --- | --- | --- |
| x | 0.35609756097560974 | 0.46105919003115264 |
| o | 0.3723577235772358 | 0.30218068535825543 |
| b | 0.27154471544715447 | 0.2367601246105919 |
    

| Middle-Middle-Square | positive | negative |
| --- | --- | --- |
| o | 0.22439024390243903 | 0.5825545171339563 |
| b | 0.18048780487804877 | 0.14641744548286603 |
| x | 0.5951219512195122 | 0.27102803738317754 |
    

| Middle-Right-Square | positive | negative |
| --- | --- | --- |
| o | 0.3642276422764228 | 0.29906542056074764 |
| b | 0.27479674796747966 | 0.24299065420560748 |
| x | 0.36097560975609755 | 0.45794392523364486 |
    

| Bottom-Left-Square | positive | negative |
| --- | --- | --- |
| x | 0.4764227642276423 | 0.3582554517133956 |
| o | 0.2991869918699187 | 0.4454828660436137 |
| b | 0.22439024390243903 | 0.19626168224299065 |
    

| Bottom-Middle-Square | positive | negative |
| --- | --- | --- |
| o | 0.36097560975609755 | 0.29906542056074764 |
| x | 0.36585365853658536 | 0.45794392523364486 |
| b | 0.2731707317073171 | 0.24299065420560748 |
    

| Bottom-Right-Square | positive | negative |
| --- | --- | --- |
| o | 0.2975609756097561 | 0.4423676012461059 |
| x | 0.4796747967479675 | 0.3613707165109034 |
| b | 0.22276422764227644 | 0.19626168224299065 |
    

In [103]:
display_outcome_table(p_outcome, "Game Outcome")

| Game Outcome | positive | negative |
| --- | --- | --- |
|  | 0.657051282051282 | 0.34294871794871795 | 

In [115]:
def compute_naive_bayes(outcome, given, outcome_probs, feature_probs):
    """ 
        @param [string, list, dict, dict[dict]]: desired outcome, list of feature values, outcome probabilities, feature probabilities
        @return [dict]: computes naive bayes probability for a desired outcome given a set of feature values
        e.g. P(Positive | 'xob') * P('x' | Positive) * P('o' | Positive) * P('b' | Positive)
    """
    product = 1
    
    if not outcome in outcome_probs:
        raise Exception("Unknown/Unobserved Outcome")

    # multiply by P(outcome)
    product *= outcome_probs[outcome]
    
    # multiply by P(feature_value | outcome) for every given feature value
    for i in range(len(given)):
        try:
            product *= feature_probs[i][given[i]][outcome]
        except KeyError:
            raise Exception("Invalid Feature Input")
    
    return product

In [136]:
with open(data_file + '.test') as f:
    line = f.readline().rstrip().split()
    while line:
        desired_outcome = line[-1]
        if desired_outcome == 'positive':
            opposite = 'negative'
        else:
            opposite = 'positive'
            
        feature_vals = line[0:len(line)-1]

        pos = compute_naive_bayes('positive', feature_vals, p_outcome, feature_data)
        neg  = compute_naive_bayes('negative', feature_vals, p_outcome, feature_data)
        
        if pos > neg:
            prediction = 'positive'
        else:
            prediction = 'negative'
            
        print('Test Case: ' + ' '.join(feature_vals), outcome)
        
        if desired_outcome == prediction:
            print("Success")
        else:
            print("Fail")
        
        print('Probability of positive ' + str(pos/(pos + neg)))
        print('Probability of negative ' + str(neg/(pos + neg)))
        print()
        
        line = f.readline().rstrip().split()


Test Case: x x x x o o b b o negative
Fail
Probability of positive 0.4099727605238359
Probability of negative 0.5900272394761641

Test Case: x x x x o b o o b negative
Fail
Probability of positive 0.407103439238352
Probability of negative 0.5928965607616481

Test Case: x x x x o b o b o negative
Fail
Probability of positive 0.27483789172536444
Probability of negative 0.7251621082746356

Test Case: x x x x o b b o o negative
Fail
Probability of positive 0.4092390676164827
Probability of negative 0.5907609323835173

Test Case: x x x x b o o o b negative
Success
Probability of positive 0.7029515799375481
Probability of negative 0.29704842006245197

Test Case: x x x b o x o o b negative
Fail
Probability of positive 0.41544595092262887
Probability of negative 0.5845540490773713

Test Case: x x x b o x o b o negative
Fail
Probability of positive 0.2817580299115309
Probability of negative 0.718241970088469

Test Case: x x x b o x b o o negative
Fail
Probability of positive 0.41759451904756556