# Importing the Libraries

In [1]:
import numpy as np
from sklearn import datasets
from sklearn.metrics import confusion_matrix
from sklearn.naive_bayes import GaussianNB
import naive_bayes
from sklearn.model_selection import train_test_split

# Fetching the Datasets

In [2]:
iris = datasets.load_iris()
print(iris['DESCR'])

.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

                    Min  Max   Mean    SD   Class Correlation
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov)
    :

In [3]:
breast_cancer = datasets.load_breast_cancer()
print(breast_cancer['DESCR'])

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

# Splitting the Dataset into Training and Testing Set

In [4]:
iris_input = iris['data']
iris_output = iris['target']

In [5]:
(iris_input_train, iris_input_test, 
    iris_output_train, iris_output_test) = train_test_split(iris_input, iris_output, random_state = 1)

In [6]:
breast_cancer_input = breast_cancer['data']
breast_cancer_output = breast_cancer['target']

In [7]:
(breast_cancer_input_train, breast_cancer_input_test,
    breast_cancer_output_train, breast_cancer_output_test) = train_test_split(breast_cancer_input, 
                                                                              breast_cancer_output,
                                                                              random_state = 1)

# Training the Created and Inbuild Algorithm

In [8]:
created_Algo = naive_bayes.GaussianNB()
inbuild_Algo = GaussianNB()

## Testing for Iris Dataset

In [9]:
created_Algo.fit(iris_input_train, iris_output_train)

In [10]:
inbuild_Algo.fit(iris_input_train, iris_output_train)

In [11]:
print('Created Algorithm Training Score:', created_Algo.score(iris_input_train, iris_output_train))
print('Created Algorithm Testing Score:', created_Algo.score(iris_input_test, iris_output_test))

Created Algorithm Training Score: 0.9553571428571429
Created Algorithm Testing Score: 0.9473684210526315


In [12]:
print('Inbuild Algorithm Training Score:', inbuild_Algo.score(iris_input_train, iris_output_train))
print('Inbuild Algorithm Testing Score:', inbuild_Algo.score(iris_input_test, iris_output_test))

Inbuild Algorithm Training Score: 0.9464285714285714
Inbuild Algorithm Testing Score: 0.9736842105263158


In [13]:
print('Confusion Matrix for Created Algorithm on Training Data')
print(confusion_matrix(iris_output_train, created_Algo.predict(iris_input_train)))

Confusion Matrix for Created Algorithm on Training Data
[[37  0  0]
 [ 0 31  3]
 [ 0  2 39]]


In [14]:
print('Confusion Matrix for Inbuild Algorithm on Training Data')
print(confusion_matrix(iris_output_train, inbuild_Algo.predict(iris_input_train)))

Confusion Matrix for Inbuild Algorithm on Training Data
[[37  0  0]
 [ 0 31  3]
 [ 0  3 38]]


In [15]:
print('Confusion Matrix for Created Algorithm on Testing Data')
print(confusion_matrix(iris_output_test, created_Algo.predict(iris_input_test)))

Confusion Matrix for Created Algorithm on Testing Data
[[13  0  0]
 [ 0 14  2]
 [ 0  0  9]]


In [16]:
print('Confusion Matrix for Inbuild Algorithm on Testing Data')
print(confusion_matrix(iris_output_test, inbuild_Algo.predict(iris_input_test)))

Confusion Matrix for Inbuild Algorithm on Testing Data
[[13  0  0]
 [ 0 15  1]
 [ 0  0  9]]


## Testing for Breast Cancer Dataset

In [17]:
created_Algo.fit(breast_cancer_input_train, breast_cancer_output_train)

In [18]:
inbuild_Algo.fit(breast_cancer_input_train, breast_cancer_output_train)

In [19]:
print('Created Algorithm Training Score:', 
            created_Algo.score(breast_cancer_input_train, breast_cancer_output_train))
print('Created Algorithm Testing Score:', 
              created_Algo.score(breast_cancer_input_test, breast_cancer_output_test))

Created Algorithm Training Score: 0.9389671361502347
Created Algorithm Testing Score: 0.9440559440559441


In [20]:
print('Inbuild Algorithm Training Score:', 
            inbuild_Algo.score(breast_cancer_input_train, breast_cancer_output_train))
print('Inbuild Algorithm Testing Score:', 
              inbuild_Algo.score(breast_cancer_input_test, breast_cancer_output_test))

Inbuild Algorithm Training Score: 0.9413145539906104
Inbuild Algorithm Testing Score: 0.9440559440559441


In [21]:
print('Confusion Matrix for Created Algorithm on Training Data')
print(confusion_matrix(breast_cancer_output_train, created_Algo.predict(breast_cancer_input_train)))

Confusion Matrix for Created Algorithm on Training Data
[[140  17]
 [  9 260]]


In [22]:
print('Confusion Matrix for Inbuild Algorithm on Training Data')
print(confusion_matrix(breast_cancer_output_train, inbuild_Algo.predict(breast_cancer_input_train)))

Confusion Matrix for Inbuild Algorithm on Training Data
[[139  18]
 [  7 262]]


In [23]:
print('Confusion Matrix for Created Algorithm on Testing Data')
print(confusion_matrix(breast_cancer_output_test, created_Algo.predict(breast_cancer_input_test)))

Confusion Matrix for Created Algorithm on Testing Data
[[50  5]
 [ 3 85]]


In [24]:
print('Confusion Matrix for Inbuild Algorithm on Testing Data')
print(confusion_matrix(breast_cancer_output_test, inbuild_Algo.predict(breast_cancer_input_test)))

Confusion Matrix for Inbuild Algorithm on Testing Data
[[50  5]
 [ 3 85]]
