In [None]:
# install necessary packages
#!pip install javabridge
!pip install -q python-javabridge
!pip install -q python-weka-wrapper3
!apt install -q libgraphviz-dev
!pip install -q pygraphviz
!pip install -q pydotplus
!pip install -q sklearn

In [2]:
# Import necessary libraries
import os
import graphviz
import pydotplus
import pygraphviz
import weka.core.converters as converters
import warnings

from graphviz import Source
from IPython.display import Image, SVG
from sklearn import tree
from sklearn.preprocessing import scale
from sklearn.tree import export_graphviz
from six import StringIO
from weka.core import classes
from weka.core.converters import Loader
from weka.classifiers import Classifier, Evaluation
from weka.experiments import SimpleCrossValidationExperiment, SimpleRandomSplitExperiment, Tester, ResultMatrix
import weka.core.jvm as jvm

warnings.filterwarnings('ignore')

In [3]:
# start JVM
jvm.start()

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['/usr/local/lib/python3.8/dist-packages/javabridge/jars/rhino-1.7R4.jar', '/usr/local/lib/python3.8/dist-packages/javabridge/jars/runnablequeue.jar', '/usr/local/lib/python3.8/dist-packages/javabridge/jars/cpython.jar', '/usr/local/lib/python3.8/dist-packages/weka/lib/weka.jar', '/usr/local/lib/python3.8/dist-packages/weka/lib/python-weka-wrapper.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support disabled


In [None]:
# Check if file already exists
matches = [match for match in os.listdir() if "messidor_features.arff" in match]

# If file is missing, then download it
if not matches:
  !wget -q https://archive.ics.uci.edu/ml/machine-learning-databases/00329/messidor_features.arff

In [5]:
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file("messidor_features.arff")
data.class_is_last()

---
#**J48 - Weka Classifier**
---

In [6]:
j48 = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.25"])
j48.build_classifier(data)

print(j48)

J48 pruned tree
------------------

14 <= 0.02029
|   2 <= 55
|   |   1 <= 0
|   |   |   7 <= 1: 0 (4.0)
|   |   |   7 > 1
|   |   |   |   8 <= 101.64826
|   |   |   |   |   14 <= 0.001954
|   |   |   |   |   |   8 <= 61.995063
|   |   |   |   |   |   |   11 <= 0.186842
|   |   |   |   |   |   |   |   11 <= 0.095217
|   |   |   |   |   |   |   |   |   18 <= 0
|   |   |   |   |   |   |   |   |   |   3 <= 29: 1 (10.0/1.0)
|   |   |   |   |   |   |   |   |   |   3 > 29
|   |   |   |   |   |   |   |   |   |   |   2 <= 41: 0 (4.0)
|   |   |   |   |   |   |   |   |   |   |   2 > 41: 1 (4.0/1.0)
|   |   |   |   |   |   |   |   |   18 > 0
|   |   |   |   |   |   |   |   |   |   8 <= 8.030835: 0 (2.0)
|   |   |   |   |   |   |   |   |   |   8 > 8.030835: 1 (2.0)
|   |   |   |   |   |   |   |   11 > 0.095217: 0 (6.0/1.0)
|   |   |   |   |   |   |   11 > 0.186842: 1 (14.0)
|   |   |   |   |   |   8 > 61.995063: 0 (8.0/1.0)
|   |   |   |   |   14 > 0.001954: 0 (3.0)
|   |   |   |   8 > 101.64826: 

In [7]:
rnd = classes.Random

evl_j48 = Evaluation(data)
evl_j48.crossvalidate_model(j48, data, 10, rnd(1))

print("=== Summary ===")
print(evl_j48.summary())
print(evl_j48.class_details())
print(evl_j48.matrix())

=== Summary ===

Correctly Classified Instances         741               64.3788 %
Incorrectly Classified Instances       410               35.6212 %
Kappa statistic                          0.2897
Mean absolute error                      0.3796
Root mean squared error                  0.5125
Relative absolute error                 76.2052 %
Root relative squared error            102.6913 %
Total Number of Instances             1151     

=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0.680    0.388    0.608      0.680    0.642      0.292    0.685     0.614     0
                 0.612    0.320    0.684      0.612    0.646      0.292    0.685     0.694     1
Weighted Avg.    0.644    0.352    0.648      0.644    0.644      0.292    0.685     0.656     

=== Confusion Matrix ===

   a   b   <-- classified as
 367 173 |   a = 0
 237 374 |   b = 1



In [8]:
# weka.classifiers.trees.RandomForest
# -P 100 -I 100 -num-slots 1 -K 0 -M 1.0 -V 0.001 -S 1
rt = Classifier(classname="weka.classifiers.trees.RandomTree", options=["-K", "0", "-M", "1.0", "-V", "0.001", "-S", "1"])
rt.build_classifier(data)

print(rt)



RandomTree

2 < 55.5
|   14 < 0.06
|   |   2 < 17.5
|   |   |   11 < 0.02
|   |   |   |   7 < 10
|   |   |   |   |   3 < 3.5
|   |   |   |   |   |   9 < 21.07
|   |   |   |   |   |   |   14 < 0 : 0 (8/0)
|   |   |   |   |   |   |   14 >= 0 : 1 (1/0)
|   |   |   |   |   |   9 >= 21.07 : 1 (1/0)
|   |   |   |   |   3 >= 3.5
|   |   |   |   |   |   17 < 0.14
|   |   |   |   |   |   |   16 < 0.57
|   |   |   |   |   |   |   |   16 < 0.51
|   |   |   |   |   |   |   |   |   7 < 8.5
|   |   |   |   |   |   |   |   |   |   9 < 10.75 : 1 (4/0)
|   |   |   |   |   |   |   |   |   |   9 >= 10.75 : 0 (2/0)
|   |   |   |   |   |   |   |   |   7 >= 8.5 : 0 (2/0)
|   |   |   |   |   |   |   |   16 >= 0.51
|   |   |   |   |   |   |   |   |   10 < 0.42 : 1 (11/0)
|   |   |   |   |   |   |   |   |   10 >= 0.42
|   |   |   |   |   |   |   |   |   |   11 < 0.01
|   |   |   |   |   |   |   |   |   |   |   9 < 7.1 : 0 (5/0)
|   |   |   |   |   |   |   |   |   |   |   9 >= 7.1 : 1 (3/0)
|   |   |   |   |  

In [9]:
evl_rt = Evaluation(data)
evl_rt.crossvalidate_model(rt, data, 10, rnd(1))

print("=== Summary ===")
print(evl_rt.summary())
print(evl_rt.class_details())
print(evl_rt.matrix())

=== Summary ===

Correctly Classified Instances         710               61.6855 %
Incorrectly Classified Instances       441               38.3145 %
Kappa statistic                          0.231 
Mean absolute error                      0.3831
Root mean squared error                  0.619 
Relative absolute error                 76.9211 %
Root relative squared error            124.0335 %
Total Number of Instances             1151     

=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0.594    0.363    0.591      0.594    0.593      0.231    0.616     0.542     0
                 0.637    0.406    0.640      0.637    0.638      0.231    0.616     0.600     1
Weighted Avg.    0.617    0.386    0.617      0.617    0.617      0.231    0.616     0.573     

=== Confusion Matrix ===

   a   b   <-- classified as
 321 219 |   a = 0
 222 389 |   b = 1

