# Experiment 7: Image Classification
###### Siddhartha Dutta - A70405217037 - BCSE1732 

In [None]:
# Importing Required Libraries
import pandas as pd
import numpy as np

from sklearn.naive_bayes import GaussianNB

## 7.1 Object Identification using Minimum Distance Classifier
* The minimum distance classifier is defined as the classifier (decision rule dMD(x)) that will classify an unknown instance x as belonging to class r if and only if the nearest training atom to x is labeled class r.
* The sense of "nearest" can be chosen to be Euclidean distance, city block distance or any other norm.
<br><br>
* In this example, the Wine dataset is used, where the minimum distance of each data point is calculated with the respective class mean using Eucledian distance. The minimum distance to the class mean is classified accordingly.

In [2]:
# Reading Training Data
data = pd.read_csv('Datasets/wine.csv', header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,13.75,1.73,2.41,16.0,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320,1
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1
2,13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020,1
3,13.86,1.35,2.27,16.0,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045,1
4,13.51,1.8,2.65,19.0,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095,1


In [3]:
# Calculating the mean of every feature grouped by class
classes = data[13].unique()
featureMeans = np.zeros((len(classes), len(data.columns)))  # (Number of Classes, Number of Features)

for i in range(len(classes)):
    classFeatureMeans = data[data[13] == classes[i]].mean()  # Calculate Means of Every Feature of Particular Class (14,)
    featureMeans[i] = classFeatureMeans.to_numpy(dtype='float64')  # Append to the Means of All Classes (3,14)

featureMeans = np.delete(featureMeans, -1, axis=1)  # Excluding Class Label Column
print(featureMeans.shape)

(3, 13)


In [4]:
# Splitting Data into Features (X) and Classes (Y)
X = data.drop(data.columns[-1], axis=1).to_numpy()  # Excluding Class Label Column
Y = np.array(data[data.columns[-1]])
print('X Shape:', X.shape)
print('Y Shape:', Y.shape)

X Shape: (89, 13)
Y Shape: (89,)


In [5]:
# Predicting the Class based on Minimum Distance to Class Mean
Y_pred = []
for dataPoint in X:
    distanceToMeans = []
    for classMean in featureMeans:
        distance = np.sqrt(np.sum(np.square(dataPoint - classMean)))  # Eucledian Distance
        distanceToMeans.append(distance)
    Y_pred.append(classes[np.argmin(distanceToMeans)])  # Corresponding Class to Least Distance
Y_pred = np.array(Y_pred)
print('Y_pred Shape:', Y_pred.shape)

Y_pred Shape: (89,)


In [6]:
# Error Rate (On Training Data)
numErrors = (Y != Y_pred).sum()  # Prediction Does not Match True Class
errorRate = numErrors / len(Y_pred)
print('Error Rate of MDTCM Classifier = %.2f' %errorRate)

Error Rate of MDTCM Classifier = 0.25


## 7.2 Object Classification using Bayes Classifier
* Naive Bayes classifiers are a collection of classification algorithms based on Bayes’ Theorem.
* It is not a single algorithm but a family of algorithms where all of them share a common principle, i.e. every pair of features being classified is independent of each other.
* Bayes’ Theorem finds the probability of an event occurring given the probability of another event that has already occurred.

In [8]:
# Reading Training Data
data = pd.read_csv('Datasets/wine.csv', header=None)
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,13.75,1.73,2.41,16.0,89,2.6,2.76,0.29,1.81,5.6,1.15,2.9,1320,1
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1
2,13.07,1.5,2.1,15.5,98,2.4,2.64,0.28,1.37,3.7,1.18,2.69,1020,1
3,13.86,1.35,2.27,16.0,98,2.98,3.15,0.22,1.85,7.22,1.01,3.55,1045,1
4,13.51,1.8,2.65,19.0,110,2.35,2.53,0.29,1.54,4.2,1.1,2.87,1095,1


In [9]:
# Splitting Data into Features (X) and Classes (Y)
X = data.drop(data.columns[-1], axis=1).to_numpy()  # Excluding Class Label Column
Y = np.array(data[data.columns[-1]])
print('X Shape:', X.shape)
print('Y Shape:', Y.shape)

X Shape: (89, 13)
Y Shape: (89,)


In [10]:
# Training a Naive Bayes classifier and making predictions on training data
nbModel = GaussianNB()
nbModel.fit(X, Y)
Y_pred = nbModel.predict(X)
print('Y_pred Shape:', Y_pred.shape)

Y_pred Shape: (89,)


In [11]:
# Error Rate (On Training Data)
numErrors = (Y != Y_pred).sum()  # Prediction Does not Match True Class
errorRate = numErrors / len(Y_pred)
print('Error Rate of Gaussian Naive Bayes Classifier = %.2f' %errorRate)

Error Rate of Gaussian Naive Bayes Classifier = 0.02
