-
Notifications
You must be signed in to change notification settings - Fork 0
/
Naive Bayes Classification Code.R
77 lines (61 loc) · 2.47 KB
/
Naive Bayes Classification Code.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# library GGally for correlation plot
library(GGally)
# load the iris data set
data(iris)
# The correlation plot is made for the predictors
ggpairs(iris[-5], title = "The correlation between the predictors")
# library the naive Bayes package
library(naivebayes)
# Choose Size of training data
Train_N <- 75 # 50% split
# Split the data set into training and test
set.seed(123) #makes it repeatable
Ind <- sample(1:nrow(iris), Train_N, replace = FALSE)
Train <- iris[Ind,]
Test <- iris[-Ind,]
# The naive Bayes model created using the training data
model <- naive_bayes(Species ~ ., data = Train, ntree = Num_Trees)
# A data frame containing the predicted and actual flower species
Results <- data.frame(predict(model,Test[,-5]), Test[,5])
names(Results) <- c("Predicted","Actual")
# Initializing values for the loop
Correct <- rep(0,(150-Train_N))
Group_1 <- Group_2 <- Group_3 <- 0
C1 <- C2 <- C3 <- 0
# For loop that iterates through the row indexes of the "Results" data frame
for(i in 1:(150-Train_N)){
# Assigns a 1 to "Correct" if it is correct
if(Results$Predicted[i] == Results$Actual[i]){
Correct[i] = 1
}
# Counts up C1 and accumulates group 1 if correct
if(Results$Actual[i] == "setosa"){
C1 = C1 + 1
if(Correct[i] == 1){Group_1 = Group_1 + 1}
}
# Counts up C2 and accumulates group 2 if correct
if(Results$Actual[i] == "versicolor"){
C2 = C2 + 1
if(Correct[i] == 1){Group_2 = Group_2 + 1}
}
# Counts up C3 and accumulates group 3 if correct
if(Results$Actual[i] == "virginica"){
C3 = C3 + 1
if(Correct[i] == 1){Group_3 = Group_3 + 1}
}
}
# Calculating the percent correct ovar all and by species
Correct_Total = round(sum(Correct)*100/(150-Train_N),2)
Correct_Seto = round(Group_1*100/C1,2)
Correct_Vers = round(Group_2*100/C2,2)
Correct_Virg = round(Group_3*100/C3,2)
# Printing percent correct in the console
print(paste("Total accuracy: ", Correct_Total, "%",
" Setosa accuracy: ", Correct_Seto, "%",
" Versicolor accuracy: ", Correct_Vers, "%",
" Virginica accuracy: ", Correct_Virg, "%", sep = ""))
# Visualizing percent correct as a bar plot
barplot(c(Correct_Total,Correct_Seto,Correct_Vers,Correct_Virg),
names.arg = c("Total Acc","Seto Acc", "Vers Acc", "Virg Acc"),
main = paste("Accuracy for Naive Bayes"),xlab = "Different Metics",
ylab = "Accuracy %", col = c("green","blue","orange","yellow"))