In [1]:
# Installing Packages
suppressMessages(install.packages("RWeka"))
suppressMessages(install.packages("e1071"))
suppressMessages(install.packages("caTools"))
suppressMessages(install.packages("caret"))


In [2]:
suppressMessages(library(RWeka))
suppressMessages(library(e1071))
suppressMessages(library(caTools))
suppressMessages(library(caret))
suppressMessages(library(foreign))

“running command 'timedatectl' had status 1”


In [3]:
# Download messidor_features.arff file to /content

download.file('https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff','/content/messidor_features.arff')

In [4]:
# Load file into variable df

df = read.arff("./messidor_features.arff")

numericFeats = c(3:16)
eyeFeats = c(17,18)
df[, c(numericFeats, eyeFeats)] = scale(df[, c(numericFeats, eyeFeats)])

df$Class = as.factor(df$Class)

head(df)

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,Class
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>
1,1,1,-0.6412076,-0.6185134,-0.5762121,-0.6297551,-0.5508766,-0.4735387,-0.2428118,-0.24589609,-0.29683715,-0.2713911,-0.2182294,-0.19432495,-0.2050354,-0.1860881,-1.29420026,-0.468452,1,0
2,1,1,-0.5631463,-0.5355452,-0.5762121,-0.6771154,-0.6533922,-0.539757,-0.1092025,0.03295799,-0.46502232,-0.4084152,-0.2241587,-0.19712594,-0.2050855,-0.1861999,-0.08213216,2.0051825,0,0
3,1,1,0.9200171,0.957883,1.0462107,1.0278526,0.9355995,0.7846095,-0.1413216,0.22709761,0.34431328,0.7687028,0.3353919,0.15226391,-0.1099956,-0.1647369,0.27416347,1.1210291,0,1
4,1,1,0.6468028,0.6674942,0.7831151,0.8384117,0.7305683,0.6521728,-0.4040238,-0.2148833,0.03581478,0.3168157,0.1125243,0.05689399,-0.1956804,-0.1994543,-1.42319524,0.3543467,0,0
5,1,1,0.217466,0.2941372,0.3884717,0.4121697,0.5255372,0.3872995,-0.7877266,-0.67201352,-0.71702314,-0.4681072,-0.2257299,-0.20081802,-0.2148744,-0.2080094,-1.68514168,0.8437351,0,1
6,1,1,0.217466,0.252653,0.2569239,0.4121697,0.4230216,0.5197362,-0.6110985,-0.7477019,-0.55316964,-0.3855924,-0.2257299,-0.20081802,-0.2148744,-0.2080094,-0.72646549,1.0203317,0,1


---
#**Naive Bayes - Weka Classifier**
---

In [5]:
NB <- make_Weka_classifier("weka/classifiers/bayes/NaiveBayes")

In [6]:
resultNB <- NB(Class ~ ., data = df)
resultNB

Naive Bayes Classifier

                 Class
Attribute            0       1
                (0.47)  (0.53)
0
  mean           0.9926       1
  std. dev.      0.1667  0.1667
  weight sum        540     611
  precision           1       1

1
  mean           0.9407  0.8985
  std. dev.      0.2361   0.302
  weight sum        540     611
  precision           1       1

2
  mean          -0.3094   0.277
  std. dev.      0.8076  1.0682
  weight sum        540     611
  precision      0.0537  0.0537

3
  mean          -0.2816  0.2493
  std. dev.      0.8494  1.0563
  weight sum        540     611
  precision      0.0528  0.0528

4
  mean          -0.2491  0.2204
  std. dev.      0.8857  1.0429
  weight sum        540     611
  precision      0.0532  0.0532

5
  mean          -0.2098  0.1857
  std. dev.      0.9132   1.034
  weight sum        540     611
  precision      0.0547  0.0547

6
  mean          -0.1703   0.153
  std. dev.       0.938  1.0277
  weight sum        540     611
  preci

In [7]:
evalJ48 <- evaluate_Weka_classifier(resultNB,numFolds = 10)
evalJ48

=== 10 Fold Cross Validation ===

=== Summary ===

Correctly Classified Instances         650               56.4726 %
Incorrectly Classified Instances       501               43.5274 %
Kappa statistic                          0.1666
Mean absolute error                      0.4327
Root mean squared error                  0.6509
Relative absolute error                 86.8772 %
Root relative squared error            130.4309 %
Total Number of Instances             1151     

=== Confusion Matrix ===

   a   b   <-- classified as
 515  25 |   a = 0
 476 135 |   b = 1

---
#**Naive Bayes - Weka Split**
---

In [8]:
# Splitting data into train
# and test data
split <- sample.split(df, SplitRatio = 0.7)
train_cl <- subset(df, split == "TRUE")
test_cl <- subset(df, split == "FALSE")

In [9]:
# Feature Scaling
train_scale <- scale(train_cl[, 0:18])
test_scale <- scale(test_cl[, 0:18])

In [10]:
# Fitting Naive Bayes Model
# to training dataset
set.seed(120)  # Setting Seed
classifier_cl <- naiveBayes(Class ~ ., data = train_cl)
classifier_cl


Naive Bayes Classifier for Discrete Predictors

Call:
naiveBayes.default(x = X, y = Y, laplace = laplace)

A-priori probabilities:
Y
       0        1 
0.483871 0.516129 

Conditional probabilities:
   0
Y        [,1]       [,2]
  0 0.9974359 0.05063697
  1 1.0000000 0.00000000

   1
Y        [,1]      [,2]
  0 0.9461538 0.2260038
  1 0.8798077 0.3255778

   2
Y         [,1]      [,2]
  0 -0.3543826 0.7902616
  1  0.2550892 1.0636620

   3
Y         [,1]      [,2]
  0 -0.3284437 0.8282493
  1  0.2330079 1.0533862

   4
Y         [,1]      [,2]
  0 -0.2963638 0.8610884
  1  0.2020069 1.0450984

   5
Y         [,1]      [,2]
  0 -0.2559737 0.8916513
  1  0.1675132 1.0387910

   6
Y         [,1]      [,2]
  0 -0.2225638 0.9092343
  1  0.1379001 1.0338036

   7
Y         [,1]      [,2]
  0 -0.1915165 0.9046866
  1  0.1106470 1.0445932

   8
Y          [,1]      [,2]
  0 -0.02433369 0.8925485
  1  0.09159437 1.1171489

   9
Y         [,1]      [,2]
  0 0.03092558 0.9505299
  1 0.07171482 1

In [11]:
# Predicting on test data'
y_pred <- predict(classifier_cl, newdata = test_cl)

In [12]:
# Confusion Matrix
cm <- table(test_cl$Class, y_pred)
cm

   y_pred
      0   1
  0 126  24
  1 140  55

In [13]:
# Model Evaluation
confusionMatrix(cm)

Confusion Matrix and Statistics

   y_pred
      0   1
  0 126  24
  1 140  55
                                          
               Accuracy : 0.5246          
                 95% CI : (0.4705, 0.5784)
    No Information Rate : 0.771           
    P-Value [Acc > NIR] : 1               
                                          
                  Kappa : 0.1121          
                                          
 Mcnemar's Test P-Value : <2e-16          
                                          
            Sensitivity : 0.4737          
            Specificity : 0.6962          
         Pos Pred Value : 0.8400          
         Neg Pred Value : 0.2821          
             Prevalence : 0.7710          
         Detection Rate : 0.3652          
   Detection Prevalence : 0.4348          
      Balanced Accuracy : 0.5849          
                                          
       'Positive' Class : 0               
                                          