In [1]:
# Import necessary libraries

import numpy as np
import pandas as pd

In [2]:
# Load dataset
data = pd.read_csv('../data/Iris.csv')

In [3]:
data.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
# Get all classes
data['Species'].unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
# Cleaning dataset

# Remove unused 'Id' attribute
data.drop('Id',axis=1,inplace=True)

In [6]:
# Separate all features of the 3 classes
setosa = data[data['Species'] == 'Iris-setosa']
versicolor = data[data['Species'] == 'Iris-versicolor']
virginica = data[data['Species'] == 'Iris-virginica']

In [7]:
# Calculate mean
def mean(array):
    return np.sum(array) / len(array)

In [8]:
# Mean values of setosa features
setosa_sepallen_mean = mean(setosa['SepalLengthCm'])
setosa_sepalwid_mean = mean(setosa['SepalWidthCm'])
setosa_petallen_mean = mean(setosa['PetalLengthCm'])
setosa_petalwid_mean = mean(setosa['PetalWidthCm'])

# Mean values of versicolor features
versicolor_sepallen_mean = mean(versicolor['SepalLengthCm'])
versicolor_sepalwid_mean = mean(versicolor['SepalWidthCm'])
versicolor_petallen_mean = mean(versicolor['PetalLengthCm'])
versicolor_petalwid_mean = mean(versicolor['PetalWidthCm'])

# Mean values of virginica features
virginica_sepallen_mean = mean(virginica['SepalLengthCm'])
virginica_sepalwid_mean = mean(virginica['SepalWidthCm'])
virginica_petallen_mean = mean(virginica['PetalLengthCm'])
virginica_petalwid_mean = mean(virginica['PetalWidthCm'])

In [9]:
# Calculate variance
def variance(array):
    avg = mean(array)
    variance = np.sum( [(x-avg)**2 for x in array] ) / ( len(array)-1 )
    return np.sqrt(variance)

In [10]:
# Variance values of setosa features
setosa_sepallen_var = variance(setosa['SepalLengthCm'])
setosa_sepalwid_var = variance(setosa['SepalWidthCm'])
setosa_petallen_var = variance(setosa['PetalLengthCm'])
setosa_petalwid_var = variance(setosa['PetalWidthCm'])

# Variance values of versicolor features
versicolor_sepallen_var = variance(versicolor['SepalLengthCm'])
versicolor_sepalwid_var = variance(versicolor['SepalWidthCm'])
versicolor_petallen_var = variance(versicolor['PetalLengthCm'])
versicolor_petalwid_var = variance(versicolor['PetalWidthCm'])

# Variance values of virginica features
virginica_sepallen_var = variance(virginica['SepalLengthCm'])
virginica_sepalwid_var = variance(virginica['SepalWidthCm'])
virginica_petallen_var = variance(virginica['PetalLengthCm'])
virginica_petalwid_var = variance(virginica['PetalWidthCm'])

In [11]:
# Calculate the bayes probability P( x | y )

def probability(x, mean_y, variance_y):
    exponent = np.exp( -((x - mean_y) ** 2) / (2 * variance_y ** 2) )
    
    return ( 1 / np.sqrt( 2 * np.pi * variance_y) ) * exponent

In [12]:
# Calculate simple probability P(x) for likelihood

total = len(data)

P_setosa = len(setosa) / total
P_versicolor = len(versicolor) / total
P_virginica = len(virginica) / total

In [13]:
# Predict the class of a given row
def predict(features):
    # Porbabiliy of the given flower being setosa
    prob_setosa = P_setosa * \
    probability(features['SepalLengthCm'], setosa_sepallen_mean, setosa_sepallen_var) * \
    probability(features['SepalWidthCm'], setosa_sepalwid_mean, setosa_sepalwid_var)  * \
    probability(features['PetalLengthCm'], setosa_petallen_mean, setosa_petallen_var) * \
    probability(features['PetalWidthCm'], setosa_petalwid_mean, setosa_petalwid_var)
    # Porbabiliy of the given flower being versicolor
    prob_versicolor = P_versicolor * \
    probability(features['SepalLengthCm'], versicolor_sepallen_mean, versicolor_sepallen_var) * \
    probability(features['SepalWidthCm'], versicolor_sepalwid_mean, versicolor_sepalwid_var)  * \
    probability(features['PetalLengthCm'], versicolor_petallen_mean, versicolor_petallen_var) * \
    probability(features['PetalWidthCm'], versicolor_petalwid_mean, versicolor_petalwid_var)
    # Porbabiliy of the given flower being virginica
    prob_virginica = P_virginica * \
    probability(features['SepalLengthCm'], virginica_sepallen_mean, virginica_sepallen_var) * \
    probability(features['SepalWidthCm'], virginica_sepalwid_mean, virginica_sepalwid_var)  * \
    probability(features['PetalLengthCm'], virginica_petallen_mean, virginica_petallen_var) * \
    probability(features['PetalWidthCm'], virginica_petalwid_mean, virginica_petalwid_var)
    
    print('setosa probability: ', prob_setosa)
    print('versicolor probability: ', prob_versicolor)
    print('virginica probability: ', prob_virginica)