In [1]:
import pandas as pd
import random
import numpy as np


## This code performs 2 algorithms, Winnow-2 and Naive Bayes, on the Breast Cancer dataset from the UCI Machine Learning Repository https://archive.ics.uci.edu/ml/index.php

[Datasets](#Datasets):

[Breast Cancer](#Breast_Cancer)- 2 classes  

[Test Splits](#Test_Splits)  
[Winnow-2](#Winnow)  
[Naive Bayes](#Naive_Bayes)  
[Models](#Models)  

<a id="Datasets"></a>
# Dataset EDA

<a id="Breast_Cancer"></a>
## Breast Cancer

In [2]:
breast_cancer = pd.read_csv( "breast-cancer-wisconsin.data", header=None, names = ['ID','clump_thick','unif_cell_size','unif_cell_shape','marg_adh','single_epit_cell_size','bare_nuclei','bland_chromatin','normal_nucleoli','mitosis','cancer_class'])

In [3]:
breast_cancer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
ID                       699 non-null int64
clump_thick              699 non-null int64
unif_cell_size           699 non-null int64
unif_cell_shape          699 non-null int64
marg_adh                 699 non-null int64
single_epit_cell_size    699 non-null int64
bare_nuclei              699 non-null object
bland_chromatin          699 non-null int64
normal_nucleoli          699 non-null int64
mitosis                  699 non-null int64
cancer_class             699 non-null int64
dtypes: int64(10), object(1)
memory usage: 60.2+ KB


In [4]:
breast_cancer.head()

Unnamed: 0,ID,clump_thick,unif_cell_size,unif_cell_shape,marg_adh,single_epit_cell_size,bare_nuclei,bland_chromatin,normal_nucleoli,mitosis,cancer_class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


Feature bare_nuclei is of object type, this will need to be researched further.

In [5]:
breast_cancer.isna().sum()

ID                       0
clump_thick              0
unif_cell_size           0
unif_cell_shape          0
marg_adh                 0
single_epit_cell_size    0
bare_nuclei              0
bland_chromatin          0
normal_nucleoli          0
mitosis                  0
cancer_class             0
dtype: int64

In [6]:
breast_cancer.bare_nuclei.value_counts()

1     402
10    132
5      30
2      30
3      28
8      21
4      19
?      16
9       9
7       8
6       4
Name: bare_nuclei, dtype: int64

With 16 records with ? we can drop those invalid values and update the column to be int.

Other options could have been:
Go back to the source of the data to see if the missing values could be corrected.
Keep the records and input a value based on similar records.
Keep the records and only ignore the missing values.

In [7]:
breast_cancer=breast_cancer[breast_cancer.bare_nuclei != '?']

In [8]:
breast_cancer.bare_nuclei = breast_cancer.bare_nuclei.astype(int)

In [9]:
breast_cancer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 683 entries, 0 to 698
Data columns (total 11 columns):
ID                       683 non-null int64
clump_thick              683 non-null int64
unif_cell_size           683 non-null int64
unif_cell_shape          683 non-null int64
marg_adh                 683 non-null int64
single_epit_cell_size    683 non-null int64
bare_nuclei              683 non-null int32
bland_chromatin          683 non-null int64
normal_nucleoli          683 non-null int64
mitosis                  683 non-null int64
cancer_class             683 non-null int64
dtypes: int32(1), int64(10)
memory usage: 61.4 KB


Next we need to create boolean indicator features to replace the current features.

In [10]:
breast_cancer.cancer_class.value_counts(normalize = True)

2    0.650073
4    0.349927
Name: cancer_class, dtype: float64

In [11]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["cancer_class"],prefix = 'cancer_class_')], axis=1)

In [12]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["clump_thick"],prefix = 'clump_thick_')], axis=1)

In [13]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["unif_cell_size"],prefix = 'unif_cell_size_')], axis=1)

In [14]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["unif_cell_shape"],prefix = 'unif_cell_shape_')], axis=1)

In [15]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["marg_adh"],prefix = 'marg_adh_')], axis=1)

In [16]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["single_epit_cell_size"],prefix = 'single_epit_cell_size_')], axis=1)

In [17]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["bare_nuclei"],prefix = 'bare_nuclei_')], axis=1)

In [18]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["bland_chromatin"],prefix = 'bland_chromatin_')], axis=1)

In [19]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["normal_nucleoli"],prefix = 'normal_nucleoli_')], axis=1)

In [20]:
breast_cancer = pd.concat([breast_cancer, pd.get_dummies(breast_cancer["mitosis"],prefix = 'mitosis_')], axis=1)

In [21]:
breast_cancer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 683 entries, 0 to 698
Columns: 102 entries, ID to mitosis__10
dtypes: int32(1), int64(10), uint8(91)
memory usage: 122.1 KB


Now we can drop all of the un-needed features from the dataset.

In [22]:
breast_cancer = breast_cancer.drop(['ID', 'clump_thick', 'unif_cell_size', 'unif_cell_shape','marg_adh','single_epit_cell_size','bare_nuclei','bland_chromatin','normal_nucleoli','mitosis','cancer_class'], axis=1)

In [23]:
breast_cancer.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 683 entries, 0 to 698
Data columns (total 91 columns):
cancer_class__2              683 non-null uint8
cancer_class__4              683 non-null uint8
clump_thick__1               683 non-null uint8
clump_thick__2               683 non-null uint8
clump_thick__3               683 non-null uint8
clump_thick__4               683 non-null uint8
clump_thick__5               683 non-null uint8
clump_thick__6               683 non-null uint8
clump_thick__7               683 non-null uint8
clump_thick__8               683 non-null uint8
clump_thick__9               683 non-null uint8
clump_thick__10              683 non-null uint8
unif_cell_size__1            683 non-null uint8
unif_cell_size__2            683 non-null uint8
unif_cell_size__3            683 non-null uint8
unif_cell_size__4            683 non-null uint8
unif_cell_size__5            683 non-null uint8
unif_cell_size__6            683 non-null uint8
unif_cell_size__7            68

In [24]:
breast_cancer.head()

Unnamed: 0,cancer_class__2,cancer_class__4,clump_thick__1,clump_thick__2,clump_thick__3,clump_thick__4,clump_thick__5,clump_thick__6,clump_thick__7,clump_thick__8,...,normal_nucleoli__10,mitosis__1,mitosis__2,mitosis__3,mitosis__4,mitosis__5,mitosis__6,mitosis__7,mitosis__8,mitosis__10
0,1,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,1,0,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,0,0
4,1,0,0,0,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0


<a id="Test_Splits"></a>
## Split into Test and Training DataSets

The following two functions split the datasets into n folds, then use one fold for testing and the remaining folds for training. They then train the corresponding model based on the training dataset, and test that model based on the testing dataset. The resulting output provides the performance by returning the accuracy, precision, specificity, and recall.

In [25]:
# This function runs the Winnow-2 algorithm

# data is the full dataset
# n is the number of folds to split the data set into. One will be the test data, the rest will be the train data
# target_class is the index for the feature that contains that class 
# attributes is a list of the indices for the attribute features
# threshold is used for model prediction
# alpha is a parameter used to adjust the weights
# model is the initial model
# returns accuracy, precision, specificity, and recall for the resulting model based on the test data
def winnow_2(data, n, target_class, attributes, threshold, alpha, model):
    accuracy= []
    precision= []
    specificity= []
    recall= []
    list_of_indices = list(range(len(data)))
    k = len(data) #this is the number of records in the data
    l = int(k/n) #this is the number of records in each test set

    random.shuffle(list_of_indices)
    splits = [list_of_indices[0:l-1]]

    test = data.iloc[splits[0]]
    training_indices = [index not in splits[0] for index in list_of_indices]
    training = data.iloc[training_indices]
    new_model = winnow_2_train(training, target_class, attributes, threshold, alpha, model)
    accuracy1, precision1, specificity1, recall1 = winnow_2_test(test, target_class, attributes, threshold, new_model)
        
    print(new_model)
    accuracy.append(accuracy1)
    precision.append(precision1)
    specificity.append(specificity1)
    recall.append(recall1)
        
    return accuracy, precision, specificity, recall

In [26]:
# This function runs the Naive Bayes algorithm

# data is the full dataset
# n is the number of folds to split the data set into. One will be the test data, the rest will be the train data
# target_class is the index for the feature that contains that class 
# attributes is a list of the indices for the attribute features
# model is the initial model
# returns accuracy, precision, specificity, and recall for the resulting model based on the test data
def naive_bayes(data, n, target_class, attributes, model):
    accuracy= []
    precision= []
    specificity= []
    recall= []
    list_of_indices = list(range(len(data)))
    k = len(data) #this is the number of records in the data
    l = int(k/n) #this is the number of records in each split
    
    random.shuffle(list_of_indices)
    splits = [list_of_indices[0:l - 1]]
    
    test = data.iloc[splits[0]]
    training_indices = [index not in splits[0] for index in list_of_indices]
    training = data.iloc[training_indices]
    new_model = naive_bayes_train(training, target_class, attributes, model)
    accuracy1, precision1, specificity1, recall1 = naive_bayes_test(test, target_class, attributes, new_model)
        
    accuracy.append(accuracy1)
    precision.append(precision1)
    specificity.append(specificity1)
    recall.append(recall1)
    print(new_model)
    return accuracy, precision, specificity, recall

<a id="Winnow"></a>
## Winnow-2

In [27]:
# Winnow-2 only works on 2 classes at a time. If your dataset has more classes, you will need to create boolean features 
# for each class in your dataset and create a new model for each class.

# The model is a list of weights, one for each feature in the dataset
# Predictions are made by taking the weighted sum of the features x weights, and comparing the result to a threshold theta
# If the weighted sum is greater than theta, the prediction is 1, otherwise the prediction is 0

# Weights are initialized to be all 1s
# Alpha is a number above 1

# Process:
# 1. Receive data instance
# 2. Make prediction for that instance (0 means not in class, 1 means in class)
# 3. Check if the prediction was correct
# 3.a. If correct, do nothing
# 3.b. If not correct, update weights via promotion or demotion
# Promotion- happens when the prediction is 0 but the correct class is 1
#            For only features with a value of 1, multiply the weight for that feature times alpha
# Demotion- happens when the prediction is 1 but the correct class is 0
#            For only features with a value of 1, divide the weight for that feature by alpha

# data is the training dataset
# target_class is the column # for the class you are checking
# attributes should be a list with the column #s for the features in the data to check
# threshold is used for model prediction
# alpha is a parameter used to adjust the weights
# model is the initial model
# returns the trained model
def winnow_2_train(data, target_class, attributes, threshold, alpha, model):
    num_records = len(data)
    num_attributes = len(attributes)
    new_model = model
    
    for record in range(num_records): #Loop through all records in data
        #use model to predict class (pandas indexing starts at 0)
        curr_record = data.iloc[[record]] #pull current record data
        sum_weights = 0
        for index in range(num_attributes): #Loop through each attribute and add the weights
            sum_weights = sum_weights + new_model[index] * curr_record.iloc[0,attributes[index]]
        # end for
            
        prediction = 0
        if(sum_weights > threshold):
            prediction = 1
        # end if
        #check if class is correct
        #if class is correct -no action
        #if class is not correct, determine if promotion (predict 0 when correct is 1) or demotion (predict 1 when correct is 0)
        if(prediction == 0):
            if(curr_record.iloc[0,target_class]==1):
                #promoting only adjusts weights for attributes that have a value of 1 - wi * alpha
                for index in range(num_attributes):
                    if(curr_record.iloc[0,attributes[index]] == 1):
                        model[index] = model[index]*alpha
                    # end if
                # end for
            # end if
        # end if
                
        else:
            if(curr_record.iloc[0,target_class]==0):
                #demoting only adjusts weights for attributes that have a value of 1 - wi / alpha
                for index in range(num_attributes):
                    if(curr_record.iloc[0,attributes[index]] == 1):
                        model[index] = model[index]/alpha
                    # end if
                # end for
            # end if
        # end else
       
    return new_model
                

In [28]:
# data is the test dataset
# target_class is the column # for the class you are checking
# attributes should be a list with the column #s for the features in the data to check
# threshold is used for model prediction
# model is the model to be tested
# returns the calculated accuracy, precision, specificity, and recall
def winnow_2_test(data, target_class, attributes,threshold, model):
    num_records = len(data)
    num_attributes = len(attributes)
    
    TP = 1
    FP = 1
    FN = 1
    TN = 1
    
    
    for record in range(num_records): #Loop through all records in data
        #use model to predict class (pandas indexing starts at 0)
        curr_record = data.iloc[[record]] #pull current record data
        sum_weights = 0
        for index in range(num_attributes): #Loop through each attribute and add the weights
            sum_weights = sum_weights + model[index] * curr_record.iloc[0,attributes[index]]
        # end for
            
        prediction = 0
        if(sum_weights > threshold):
            prediction = 1
        # end if
        #check if class is correct

        if(prediction == 0):
            if(curr_record.iloc[0,target_class]==0):
                TN = TN +1
            # end if
            else:
                FN = FN +1
            # end else
        # end if
                
        else:
            if(curr_record.iloc[0,target_class]==1):
                TP = TP+1
            # end if
            else:
                FP = FP+1
            # end else
        # end else
                
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    precision = TP/(TP+FP)
    specificity = TN/(TN+FP)
    recall = TP/(TP+FN)

    return accuracy, precision, specificity, recall

<a id="Naive_Bayes"></a>
## Naive Bayes

In [29]:


# Naive Bayes can work for multiple classes at a time, however for the purposes of comparing to Winnow-2 we will keep the
# boolean attributes for each class.
# The purpose of the algorithm is to find the class that maximizes the posterior probability of that class given the data.
# It does this using Bayes Rule- P(c|f1,f2,...,fd)= P(f1,f2,...,fd|c)*P(c)/P(f1,f2,...,fd)
# with the assumption that the features are conditionally independent of eachother given the classes, this becomes
# P(c|f1,f2,...,fd)= P(f1|c)*P(f2|c)*...*P(fd|c)*P(c)/P(f1,f2,...,fd)
# The Naive Bayes Classifier is the argmax of this, which is (the denominator does not matter because it does not depend on c):
# class=argmax_c(P(c)*Product_i=1-d(P(fi|d)))

# Some negatives for using Naive Bayes is the assumption that all of the features are conditionally independent of eachother
# given the classes


# data is the training dataset
# target_class is the column # for the class you are checking
# attributes should be a list with the column #s for the features in the data to check
# model is the initial model
# returns the trained model
def naive_bayes_train(data, target_class, attributes, model):
    num_records = len(data)
    num_attributes = len(attributes)
    new_model = model
    count_1s = 1
    count_0s = 1
    
    #Model of form
    # P(0) / P(xi = 0)
    # ---- / P(xi = 1)
    # P(1) / P(xi = 0)
    # ---- / P(xi = 1)
    for record in range(num_records): #Loop through all records in data
        #add 1 to model for every datapoint for each scenario
        curr_record = data.iloc[[record]] #pull current record data
        #first add for when target_class = 1
        if(curr_record.iloc[0,target_class]==1):
            for index in range(num_attributes): #Loop through each attribute
                if(curr_record.iloc[0,attributes[index]]==1):
                    new_model[3][index+1] = new_model[3][index+1] +1
                # end if
                else:
                    new_model[2][index+1] = new_model[2][index+1] +1
                # end else
            new_model[2][0] = new_model[2][0] +1
            count_1s = count_1s +1
        # end if
                
        #then add for when target_class = 0
        else:
            for index in range(num_attributes): #Loop through each attribute  
                if(curr_record.iloc[0,attributes[index]]==0):
                    new_model[0][index+1] = new_model[0][index+1] +1
                # end if
                else:
                    new_model[1][index+1]= new_model[1][index+1] +1
                # end else
            new_model[0][0] = new_model[0][0] +1
            count_0s = count_0s+1
        # end else
    
    
    
    new_model[0][0] = new_model[0][0] / num_records
    new_model[2][0] = new_model[2][0] / num_records
    
    for index in range(num_attributes):
        new_model[0][index+1] = (new_model[0][index+1] + 0.01) / count_0s
        new_model[1][index+1] = (new_model[1][index+1] + 0.01) / count_0s
        new_model[2][index+1] = (new_model[2][index+1] + 0.01) / count_1s
        new_model[3][index+1] = (new_model[3][index+1] + 0.01) / count_1s
    # end for
    

    return new_model

In [30]:
# data is the test dataset
# target_class is the column # for the class you are checking
# attributes should be a list with the column #s for the features in the data to check
# model is the model to be tested
# returns the calculated accuracy, precision, specificity, and recall
def naive_bayes_test(data, target_class, attributes, model):
    num_records = len(data)
    num_attributes = len(attributes)
    
    TP = 1
    FP = 1
    FN = 1
    TN = 1

    #Model of form
    # P(0) / P(xi = 0)
    # ---- / P(xi = 1)
    # P(1) / P(xi = 0)
    # ---- / P(xi = 1)
    for record in range(num_records): #Loop through all records in data
        curr_record = data.iloc[[record]] #pull current record data
        p_0 = model[0][0]
        p_1 = model[2][0]
        for index in range(num_attributes): #Loop through each attribute
            if(curr_record.iloc[0,attributes[index]]==1):
                p_0 = p_0 * model[1][index+1]
                p_1 = p_1 * model[3][index+1]
            # end if
            else:
                p_0 = p_0 * model[0][index+1]
                p_1 = p_1 * model[2][index+1]
            # end else
                
        #determine class and add to count_correct if correct
        if(p_0>p_1):
            if(curr_record.iloc[0,target_class]==0):
                TN = TN+1
            # end if
            else:
                FN = FN+1
            # end else
        else:
            if(curr_record.iloc[0,target_class]==1):
                TP = TP+1
            # end if
            else:
                FP = FP+1
            # end else
    # end for


    
    accuracy = (TP+TN)/(TP+TN+FP+FN)
    precision = TP/(TP+FP)
    specificity = TN/(TN+FP)
    recall = TP/(TP+FN)
    
    return accuracy, precision, specificity, recall

<a id="Models"></a>
## Models

Winnow-2

In [31]:
# Run Breast Cancer Through
attributes_breast_cancer = [2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90]
winnow_2_breast_cancer = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
print("winnow-2 Breast Cancer model:")
winnow_2_accuracy, winnow_2_precision, winnow_2_specificity, winnow_2_recall = winnow_2(breast_cancer, 3, 0, attributes_breast_cancer, 4, 2, winnow_2_breast_cancer)

winnow-2 Breast Cancer model:
[1, 1.0, 1.0, 0.5, 1.0, 0.5, 0.25, 0.0625, 0.25, 0.0009765625, 1.0, 0.5, 1.0, 0.25, 0.5, 0.25, 0.25, 0.0625, 1.0, 0.00390625, 2.0, 0.25, 1.0, 0.125, 0.25, 0.25, 0.25, 0.0625, 1, 0.015625, 0.125, 0.25, 0.5, 0.5, 0.25, 0.25, 0.5, 0.0625, 1, 0.0625, 1, 0.5, 0.03125, 0.125, 0.25, 0.03125, 0.25, 1.0, 1, 0.25, 1.0, 1.0, 1.0, 0.5, 1.0, 0.5, 0.25, 0.5, 0.25, 0.0001220703125, 1.0, 1.0, 0.5, 0.0625, 0.0625, 1, 0.015625, 0.25, 0.5, 0.25, 1.0, 2, 0.03125, 0.25, 0.5, 0.5, 0.5, 0.0625, 0.25, 0.03125, 0.03125, 0.125, 0.0625, 0.25, 0.5, 1, 0.5, 0.25, 0.25]


Naive Bayes

In [32]:
# Run Breast Cancer Through
naive_bayes_breast_cancer=[[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]
print("Naive Bayes Breast Cancer model:")
naive_bayes_accuracy, naive_bayes_precision, naive_bayes_specificity, naive_bayes_recall = naive_bayes(breast_cancer, 3, 0, attributes_breast_cancer, naive_bayes_breast_cancer)

Naive Bayes Breast Cancer model:
[[0.29978118161925604, 0.9855797101449275, 0.9783333333333333, 0.9421014492753622, 0.9203623188405797, 0.8189130434782609, 0.9131159420289855, 0.9058695652173914, 0.8116666666666668, 0.9493478260869564, 0.7102173913043479, 0.971086956521739, 0.971086956521739, 0.9276086956521739, 0.8334057971014494, 0.9131159420289855, 0.8841304347826088, 0.9203623188405797, 0.855144927536232, 0.971086956521739, 0.6884782608695652, 0.9855797101449275, 0.9638405797101448, 0.8986231884057971, 0.855144927536232, 0.8986231884057971, 0.8768840579710145, 0.8913768115942029, 0.8768840579710145, 0.9638405797101448, 0.7247101449275363, 0.8841304347826088, 0.8913768115942029, 0.9131159420289855, 0.9203623188405797, 0.8841304347826088, 0.9203623188405797, 0.9348550724637681, 0.8841304347826088, 0.9783333333333333, 0.7247101449275363, 0.9928260869565216, 0.9058695652173914, 0.789927536231884, 0.7826811594202899, 0.855144927536232, 0.8334057971014494, 0.9565942028985507, 0.942101449

Results

In [33]:
print("Winnow Results: accuracy, precision, specificity, recall")
print(winnow_2_accuracy, winnow_2_precision, winnow_2_specificity, winnow_2_recall)
print("Naive-Bayes Results: accuracy, precision, specificity, recall")
print(naive_bayes_accuracy, naive_bayes_precision, naive_bayes_specificity, naive_bayes_recall)

Winnow Results: accuracy, precision, specificity, recall
[0.9652173913043478] [0.9668874172185431] [0.9382716049382716] [0.9798657718120806]
Naive-Bayes Results: accuracy, precision, specificity, recall
[0.9608695652173913] [0.9932885906040269] [0.9864864864864865] [0.9487179487179487]
