In [1]:
%lsmagic

Available line magics:
%alias  %alias_magic  %autocall  %automagic  %autosave  %bookmark  %cd  %clear  %cls  %colors  %config  %connect_info  %copy  %ddir  %debug  %dhist  %dirs  %doctest_mode  %echo  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %macro  %magic  %matplotlib  %mkdir  %more  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %popd  %pprint  %precision  %profile  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %ren  %rep  %rerun  %reset  %reset_selective  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%cmd  %%debug  %%file  %%html  %%javascript  %%js  %%latex  %%perl  %%prun  %%pypy  %%python  %%python2  %%python3  %%rub

In [2]:
%matplotlib inline

In [3]:
import sys
import os
import tempfile

import pandas as pd

import weka.core.jvm as jvm
import pygraphviz
import PIL
from PIL import Image
from weka.core.converters import Loader
# import weka.core.ContingencyTables.entropy
from weka.classifiers import Classifier, PredictionOutput, Evaluation
import weka.plot.graph as graph  # NB: pygraphviz and PIL are required
from weka.core.converters import Loader, Saver
import traceback
import weka.core.serialization as serialization
import weka.plot.graph as plot_graph
import weka.plot.classifiers as plot_cls

# Function

In [4]:
def load_data(data_train, data_testing):
    
    loader = Loader(classname="weka.core.converters.ArffLoader")
    train = loader.load_file(data_train + ".arff")
    testing = loader.load_file(data_testing + ".arff")
    
    
    train.class_is_last()
    testing.class_is_last()
    train.class_index = train.num_attributes - 1
#     print testing.classAttribute()
    
    # print(data)
    #print "Data Train :", len(train)
    # print(train)
    #print "Data Test :", len(testing)
    # print(test)
    
    return train,testing

def build_tree(train, testing, directory):

    cls = Classifier(classname="weka.classifiers.trees.J48")
    cls.options = ["-U"] 
    cls.build_classifier(train)
    
    #print model
#     print(cls) 

    
    #### save classifier
    

    outfile = directory + "Unpruned_j48.model"
    serialization.write(outfile, cls)
    f = open( directory + "Unpruned_tree.txt", "w+" )
    f.write('{}'.format(cls))
    f.close()
#     print cls
    graph.plot_dot_graph(cls.graph, filename = directory + "Unpruned_Result_tree.png")
#     print cls.graph
    return cls


def testing_evaluation(cls, train, testing, directory):
     # Testing
    list_summ = []
#     evaluation = Evaluation(train, cost_matrix = None)
    evaluation = Evaluation(train, cost_matrix = None)
    evaluation.test_model(cls, testing)
#     print(evaluation.confusion_matrix)
    print(evaluation.class_details())
#     print evaluation
    print evaluation.summary(title="Summary", complexity=False)

    # Output predictions
    cols = ['Actual', 'Predicted', 'Error', 'Distribution']
    list_inst =[]
    df =pd.DataFrame(columns=cols)
   
    for index, inst in enumerate(testing):
        pred = cls.classify_instance(inst)
        dist = cls.distribution_for_instance(inst)
        myFormattedList = [ '%.2f' % elem for elem in dist.tolist() ]
        
        

        list_inst.append({'Actual':inst.get_string_value(inst.class_index),
                          'Predicted': inst.class_attribute.value(int(pred)),
                          'Error':"yes" if pred != inst.get_value(inst.class_index)else "no",
                          'Distribution' : myFormattedList,
                         })  

    df = df.append(list_inst)
    df = df.reset_index(drop=True)
    df.to_csv(directory + 'Unpruned_TestingResult.csv', index=False)
    summ = summary(directory, evaluation, list_summ, 0)
    
    
    return df,evaluation,summ

    
    
def summary(directory, evaluation, list_summ, confval):
    
    sum_item = evaluation.summary().split()
    precision = evaluation.precision(1)
  
#     print inst.class_attribute.value(recall)
#     print len(evaluation.recall())
#     print evaluation
#     print("mean_prior entropy 1", evaluation.sf_mean_prior_entropy)
    
    class_detail  =evaluation.class_details()
    class_detail_item = class_detail.split()
    len_detail = len(class_detail_item)
    sum_item.append(0)
    sum_item.append(confval)
    
    #     precision + recall
 
    
    sum_item.append(class_detail_item[21]+"-"+ class_detail_item[27])
    sum_item.append(class_detail_item[22] + "-"+ class_detail_item[27])

    
    sum_item.append(class_detail_item[30]+ "-"+class_detail_item[36])
    sum_item.append(class_detail_item[31] +"-"+ class_detail_item[36])
    
    sum_item.append(class_detail_item[39] + "-"+class_detail_item[45])
    sum_item.append(class_detail_item[40] +"-"+ class_detail_item[45])
    
    if len_detail == 65 :
        sum_item.append(class_detail_item[48] +"-"+ class_detail_item[54])
        sum_item.append(class_detail_item[49] +"-"+ class_detail_item[54])
#     print (class_detail_item[21],class_detail_item[30], class_detail_item[39])

    
    
    list_summ.append(sum_item)
    
    conf_matrix = evaluation.matrix()
    
    
#     print type(class_detail_item)
#     print class_detail_item 
#     print class_detail_item
    f = open( directory+str(confval)+"Confusion Matrix.txt", "w+" )
    f.write('{} \n {}'.format(conf_matrix, class_detail))
    f.close()
   
    
    return list_summ
    
def pruning_tree(directory, training, testing, list_summ, summary_):

    ######  Recursive Pruning
    for value in range(50,1,-1):

        confval = float(value)/100
        confval = str(confval)
        
        cls = Classifier(classname="weka.classifiers.trees.J48")
        cls.options = ["-C", confval]
        
        
        #### cls.options = ["-R"]
          
        print "__________Pruning tree with Confidence Value : ", confval
        print(cls.options)

        
        cls.build_classifier(training)
        
        outfile = directory + confval+ "pruning_j48.model"
        serialization.write(outfile, cls)
        f = open( directory + confval+"_pruning_tree.txt", "w+" )
        f.write('{}'.format(cls))
        f.close()
#         graph.plot_dot_graph(cls.graph, filename = directory +confval+"pruning_j48.png")

        #### Testing
        evaluation = Evaluation(training, cost_matrix = None)
        evaluation.test_model(cls, testing)
        
        summ = summary(directory, evaluation,list_summ,confval)
        
    summary_ = summary_.append(summ)
    summary_ = summary_.reset_index(drop =  True)
    summary_.to_csv(directory +'summ_temp.csv', index=False)
    
    return summary_


def re_summary(summary, directory):
    cols_new = ['Confidence Value','Correctly Classified', 'Incorrectly Classified', 'Kappa', 'MEE',
            'RMSE','Relative abs. error', 'Root Relative abs. error']
    new_summary = pd.DataFrame(columns=cols_new)

    #Assign Summary
    new_summary['Confidence Value'] = summary.iloc[:,41]
    new_summary['precision1'] = summary.iloc[:,42]
    new_summary['recall1'] = summary.iloc[:,43]
    
    new_summary['precision2'] = summary.iloc[:,44]
    new_summary['recall2'] = summary.iloc[:,45]
    
    new_summary['precision3'] = summary.iloc[:,46]
    new_summary['recall3'] = summary.iloc[:,47]
    
    if len(summary.columns) == 50 :
        new_summary['precision4'] = summary.iloc[:,48]
        new_summary['recall4'] = summary.iloc[:,49]

    new_summary['Correctly Classified'] = summary.iloc[:,4]
    new_summary['Incorrectly Classified'] = summary.iloc[:,10]
    new_summary['Kappa'] = summary.iloc[:,14]
    new_summary['MEE'] = summary.iloc[:,18]
    new_summary['RMSE'] = summary.iloc[:,23]
    new_summary['Relative abs. error'] = summary.iloc[:,27]
    new_summary['Root Relative abs. error'] = summary.iloc[:,33]
    
#     new_summary = new_summary[].astype('float64')
    new_summary = new_summary.sort_values(by=['Confidence Value'], ascending=True)

    new_summary = new_summary.reset_index(drop=True)
    new_summary.to_csv( directory + 'Summary.csv')
    return new_summary
#     print display(new_summary)

    
def data_demografis(data,cls):
    datax = pd.DataFrame(columns = data.columns)
    if cls == "1": #IPK
        datax =data['JUR','JK','GAJI_ORTU','PDK_IBU','PEK_IBU','PEK_AYAH','IPK_TERAKHIR']
        datax.to_csv("DataResult/final/uji/data_uji_mipa_ipk_demografis")
    if cls == "2":#STATUS LULUS
        datax = data['JUR','JK','GAJI_ORTU','PDK_IBU','PEK_IBU','PEK_AYAH', 'STATUS_LULUS']
    


# Main Function

In [None]:
def main(args):
    
    list_summ = []
    summary = pd.DataFrame(columns = None)
    cols_summary = ['A','B','C','D','E','F','G','H','I']
    
    #1. Data Mipa kelas Ipk
    ##################################################
    
    print("Data mipa kelas ipk")
    data_train = "DataPreprocessing/data_mipa_ipk"
    data_testing = "DataPreprocessing/data_uji_mipa_ipk"
    directory = "DataResult/original/mipa-ipk/"
   
    
    train,testing = load_data(data_train, data_testing)
#     print(testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory, train, testing, temp, summary)
    re_summary(summary_, directory)
    
    
    #2. Data Mipa kelas Lulus
    ##################################################
    
    print("Data mipa kelas lulus")
    
    data_train = "DataPreprocessing/data_mipa_lulus"
    data_testing = "DataPreprocessing/data_uji_mipa_lulus"
    directory = "DataResult/original/mipa-lulus/"
    
    train,testing = load_data(data_train, data_testing)
   
    cls = build_tree(train,testing,directory)

    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory, train, testing, temp, summary)
    re_summary(summary_, directory)
    


    #3. Data sosial kelas ipk
    ##################################################
    
    print("Data sosial kelas ipk")
    
    data_train = "DataPreprocessing/data_sosial_ipk"
    data_testing = "DataPreprocessing/data_uji_sosial_ipk"
    directory = "DataResult/original/sosial-ipk/"
       
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   
  
    
    
#     4. Data sosial kelas lulus
    ##################################################
    
    print("Data sosial kelas lulus")
    
    data_train = "DataPreprocessing/data_sosial_lulus"
    data_testing = "DataPreprocessing/data_uji_sosial_lulus"
    directory = "DataResult/original/sosial-lulus/"
   

    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   



     ################  oversampling   ##################
    

    
    print "-------------Oversampling------------"
    
    #5.Data mipa kelas ipk - oversampling
    ##################################################
    
    print("Data mipa kelas ipk - oversampling")
    
    data_train = "DataPreprocessing/data_mipa_ipk_over"
    data_testing = "DataPreprocessing/data_uji_mipa_ipk"
    directory = "DataResult/oversampling/mipa-ipk/"
   
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
    
    
    
    
    #6. Data mipa kelas lulus - oversampling
     ##################################################
        
    print("Data mipa kelas lulus - oversampling")
    
    
    
    data_train = "DataPreprocessing/data_mipa_lulus_over"
    data_testing = "DataPreprocessing/data_uji_mipa_lulus"
    directory = "DataResult/oversampling/mipa-lulus/"
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
    
    
    #7. Data Sosial kelas Ipk - oversampling
    ###################################################
    
    print("Data sosial kelas ipk - oversampling")
    
    data_train = "DataPreprocessing/data_sosial_ipk_over"
    data_testing = "DataPreprocessing/data_uji_sosial_ipk"
    directory = "DataResult/oversampling/sosial-ipk/"
    
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   

    #8. Data Sosial kelas Lulus - Oversampling     
    ##################################################
    
    print("Data Sosial kelas lulus - oversampling")
    
    data_train = "DataPreprocessing/data_sosial_lulus_over"
    data_testing = "DataPreprocessing/data_uji_sosial_lulus"
    directory = "DataResult/oversampling/sosial-lulus/"
   
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   

    #####################  Pure Random Data Uji  #####################
    print("---------------------------------------Random data Uji---------------------------------------")
    
    
    #9. Data Mipa kelas IPK - Data Uji Random 
    ##################################################
    
    print("Data mipa kelas ipk - Data Uji Random") 
    
    data_train = "DataPreprocessing/data_mipa_ipk_random"
    data_testing = "DataPreprocessing/data_uji_mipa_ipk_random"
    directory = "DataResult/random/mipa-ipk/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
    
    #10. Data Mipa kelas lulus - Data Uji Random 
    ##################################################    
    
    print("Data mipa kelas lulus - Data Uji Random")
    
    data_train = "DataPreprocessing/data_mipa_lulus_random"
    data_testing = "DataPreprocessing/data_uji_mipa_lulus_random"
    directory = "DataResult/random/mipa-lulus/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
    
   
    #11. Data Sosial kelas ipk - Data Uji Random 
    ##################################################  
    
    print("Data sosial kelas ipk - data uji random")
    
    data_train = "DataPreprocessing/data_sosial_ipk_random"
    data_testing = "DataPreprocessing/data_uji_sosial_ipk_random"
    directory = "DataResult/random/sosial-ipk/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   

    #12. Data Sosial kelas lulus - Data Uji Random 
    ##################################################
    
    print("Data sosial kelas lulus - Data Uji Random")
    
    data_train = "DataPreprocessing/data_sosial_lulus_random"
    data_testing = "DataPreprocessing/data_uji_sosial_lulus_random"
    directory = "DataResult/random/sosial-lulus/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   

    #####################  Only Demografis parameters  #####################
    print "-------------Demografis------------"
    
    
    #13. Data Mipa kelas IPK - Data Demografis
    ################################################## 
    print("Data mipa kelas ipk - Data demografis")
    
    data_train = "DataPreprocessing/data_mipa_ipk_demografis"
    data_testing = "DataPreprocessing/data_uji_mipa_ipk_dem"
    directory = "DataResult/demografis/mipa-ipk/"
   
    
    print data_train
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_, directory)
    
    
    
    #14. Data Mipa kelas Lulus - Data Demografis
    ################################################## 
    
    print("Data mipa kelas lulus - Data demografis")
    
    data_train = "DataPreprocessing/data_mipa_lulus_demografis"
    data_testing = "DataPreprocessing/data_uji_mipa_lulus_dem"
    directory = "DataResult/demografis/mipa-lulus/"
    
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   

    #15. Data Sosial kelas Ipk - Data Demografis
    ##################################################
    
    print("Data sosial kelas ipk - Data demografis")
    
    data_train = "DataPreprocessing/data_sosial_ipk_demografis"
    data_testing = "DataPreprocessing/data_uji_sosial_ipk_dem"
    directory = "DataResult/demografis/sosial-ipk/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
    

    #16. Data Sosial kelas Ipk - Data Demografis
    ################################################## 
    
    print("Data sosial kelas ipk - Data Demografis")
    
    data_train = "DataPreprocessing/data_sosial_lulus_demografis"
    data_testing = "DataPreprocessing/data_uji_sosial_lulus_dem"
    directory = "DataResult/demografis/sosial-lulus/"
   
    print data_train
    
    train,testing = load_data(data_train, data_testing)
    cls = build_tree(train,testing,directory)
    result, evaluation, temp = testing_evaluation(cls,train,testing,directory)
    summary_ = pruning_tree(directory,train, testing,temp, summary)
    re_summary(summary_,directory)
   



    
    
if __name__ == "__main__":
  
    try:
        jvm.start()
        main(sys.argv)
    except Exception, e:
        print(traceback.format_exc())
    finally:
        jvm.stop()


DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['c:\\python27\\lib\\site-packages\\javabridge\\jars\\rhino-1.7R4.jar', 'c:\\python27\\lib\\site-packages\\javabridge\\jars\\runnablequeue.jar', 'c:\\python27\\lib\\site-packages\\javabridge\\jars\\cpython.jar', 'c:\\python27\\lib\\site-packages\\weka\\lib\\python-weka-wrapper.jar', 'c:\\python27\\lib\\site-packages\\weka\\lib\\weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support disabled
DEBUG:javabridge.jutil:Creating JVM object
DEBUG:javabridge.jutil:Signalling caller


Data mipa kelas ipk



DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,592    0,055    0,617      0,592    0,604      0,546    0,852     0,536     IPK_CUM
                 0,304    0,062    0,241      0,304    0,269      0,217    0,685     0,178     IPK_KM
                 0,458    0,118    0,478      0,458    0,468      0,346    0,691     0,368     IPK_M
                 0,780    0,347    0,784      0,780    0,782      0,432    0,726     0,765     IPK_SM
Weighted Avg.    0,665    0,248    0,670      0,665    0,667      0,417    0,733     0,623     

Summary
Correctly Classified Instances         250               66.4894 %
Incorrectly Classified Instances       126               33.5106 %
Kappa statistic                          0.4067
Mean absolute error                      0.184 
Root mean squared error                  0.372 
Relative absolute error                 65.1847 %
Root relative squared 


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,000    0,019    0,000      0,000    0,000      -0,022   0,473     0,024     LC
                 0,792    0,563    0,769      0,792    0,781      0,235    0,627     0,758     LLW
                 0,408    0,201    0,433      0,408    0,420      0,211    0,628     0,377     LTW
Weighted Avg.    0,668    0,451    0,659      0,668    0,663      0,222    0,623     0,636     

Summary
Correctly Classified Instances         252               66.8435 %
Incorrectly Classified Instances       125               33.1565 %
Kappa statistic                          0.211 
Mean absolute error                      0.2428
Root mean squared error                  0.4322
Relative absolute error                 84.0922 %
Root relative squared error            114.0633 %
Total Number of Instances              377     

__________Pruning tree with Confide


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,639    0,130    0,634      0,639    0,637      0,508    0,818     0,609     IPK_CUM
                 0,255    0,069    0,255      0,255    0,255      0,186    0,640     0,172     IPK_KM
                 0,222    0,029    0,348      0,222    0,271      0,239    0,687     0,186     IPK_M
                 0,715    0,463    0,689      0,715    0,702      0,254    0,651     0,680     IPK_SM
Weighted Avg.    0,624    0,314    0,616      0,624    0,619      0,314    0,696     0,586     

Summary
Correctly Classified Instances         345               62.387  %
Incorrectly Classified Instances       208               37.613  %
Kappa statistic                          0.3301
Mean absolute error                      0.1962
Root mean squared error                  0.3848
Relative absolute error                 68.2748 %
Root relative squared 


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,000    0,030    0,000      0,000    0,000      -0,029   0,550     0,037     LC
                 0,744    0,420    0,672      0,744    0,706      0,329    0,683     0,676     LLW
                 0,562    0,234    0,651      0,562    0,603      0,336    0,694     0,594     LTW
Weighted Avg.    0,644    0,328    0,644      0,644    0,642      0,322    0,684     0,623     

Summary
Correctly Classified Instances         357               64.4404 %
Incorrectly Classified Instances       197               35.5596 %
Kappa statistic                          0.3109
Mean absolute error                      0.2562
Root mean squared error                  0.4372
Relative absolute error                 73.7537 %
Root relative squared error            104.9115 %
Total Number of Instances              554     

__________Pruning tree with Confide


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,898    0,055    0,710      0,898    0,793      0,765    0,937     0,803     IPK_CUM
                 0,826    0,031    0,633      0,826    0,717      0,703    0,905     0,720     IPK_KM
                 0,847    0,066    0,753      0,847    0,797      0,748    0,938     0,801     IPK_M
                 0,828    0,076    0,946      0,828    0,883      0,733    0,926     0,932     IPK_SM
Weighted Avg.    0,840    0,069    0,859      0,840    0,845      0,738    0,929     0,878     

Summary
Correctly Classified Instances         316               84.0426 %
Incorrectly Classified Instances        60               15.9574 %
Kappa statistic                          0.7337
Mean absolute error                      0.0797
Root mean squared error                  0.254 
Relative absolute error                 21.2549 %
Root relative squared 


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,556    0,024    0,357      0,556    0,435      0,429    0,768     0,356     LC
                 0,842    0,250    0,888      0,842    0,864      0,573    0,825     0,884     LLW
                 0,689    0,150    0,634      0,689    0,660      0,526    0,810     0,604     LTW
Weighted Avg.    0,793    0,217    0,806      0,793    0,798      0,557    0,819     0,795     

Summary
Correctly Classified Instances         299               79.3103 %
Incorrectly Classified Instances        78               20.6897 %
Kappa statistic                          0.5402
Mean absolute error                      0.1419
Root mean squared error                  0.3444
Relative absolute error                 31.9382 %
Root relative squared error             73.0512 %
Total Number of Instances              377     

__________Pruning tree with Confide


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,896    0,088    0,782      0,896    0,835      0,775    0,947     0,881     IPK_CUM
                 0,851    0,051    0,606      0,851    0,708      0,688    0,919     0,670     IPK_KM
                 0,861    0,031    0,660      0,861    0,747      0,734    0,919     0,695     IPK_M
                 0,773    0,101    0,916      0,773    0,839      0,661    0,903     0,906     IPK_SM
Weighted Avg.    0,817    0,089    0,838      0,817    0,821      0,698    0,917     0,865     

Summary
Correctly Classified Instances         452               81.736  %
Incorrectly Classified Instances       101               18.264  %
Kappa statistic                          0.7023
Mean absolute error                      0.0918
Root mean squared error                  0.2642
Relative absolute error                 24.4795 %
Root relative squared 


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,800    0,037    0,375      0,800    0,511      0,531    0,889     0,510     LC
                 0,811    0,191    0,831      0,811    0,821      0,620    0,890     0,900     LLW
                 0,773    0,144    0,806      0,773    0,789      0,632    0,900     0,843     LTW
Weighted Avg.    0,794    0,166    0,808      0,794    0,799      0,623    0,895     0,865     

Summary
Correctly Classified Instances         440               79.4224 %
Incorrectly Classified Instances       114               20.5776 %
Kappa statistic                          0.6153
Mean absolute error                      0.1502
Root mean squared error                  0.3173
Relative absolute error                 33.8052 %
Root relative squared error             67.3025 %
Total Number of Instances              554     

__________Pruning tree with Confide


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,479    0,042    0,622      0,479    0,541      0,489    0,848     0,540     IPK_CUM
                 0,161    0,078    0,156      0,161    0,159      0,082    0,592     0,112     IPK_KM
                 0,359    0,156    0,319      0,359    0,338      0,194    0,683     0,277     IPK_M
                 0,753    0,420    0,747      0,753    0,750      0,335    0,703     0,748     IPK_SM
Weighted Avg.    0,603    0,299    0,610      0,603    0,605      0,310    0,709     0,590     

Summary
Correctly Classified Instances         228               60.3175 %
Incorrectly Classified Instances       150               39.6825 %
Kappa statistic                          0.2896
Mean absolute error                      0.2003
Root mean squared error                  0.3903
Relative absolute error                 70.9047 %
Root relative squared 

DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,250    0,024    0,100      0,250    0,143      0,144    0,708     0,149     LC
                 0,863    0,574    0,790      0,863    0,825      0,315    0,687     0,805     LLW
                 0,375    0,124    0,534      0,375    0,441      0,284    0,673     0,428     LTW
Weighted Avg.    0,722    0,444    0,712      0,722    0,712      0,305    0,683     0,694     

Summary
Correctly Classified Instances         273               72.2222 %
Incorrectly Classified Instances       105               27.7778 %
Kappa statistic                          0.2862
Mean absolute error                      0.2109
Root mean squared error                  0.3869
Relative absolute error                 73.9347 %
Root relative squared error            104.0434 %
Total Number of Instances              378     

__________Pruning tree with Confide


DEBUG:PIL.PngImagePlugin:STREAM 'IHDR' 16 13
DEBUG:PIL.PngImagePlugin:STREAM 'bKGD' 41 6
DEBUG:PIL.PngImagePlugin:'bKGD' 41 6 (unknown)
DEBUG:PIL.PngImagePlugin:STREAM 'IDAT' 59 8192


=== Detailed Accuracy By Class ===

                 TP Rate  FP Rate  Precision  Recall   F-Measure  MCC      ROC Area  PRC Area  Class
                 0,703    0,142    0,622      0,703    0,660      0,540    0,811     0,590     IPK_CUM
                 0,217    0,069    0,222      0,217    0,220      0,150    0,615     0,129     IPK_KM
                 0,371    0,077    0,245      0,371    0,295      0,243    0,751     0,190     IPK_M
                 0,639    0,393    0,713      0,639    0,674      0,241    0,646     0,686     IPK_SM
Weighted Avg.    0,603    0,283    0,620      0,603    0,609      0,308    0,691     0,584     

Summary
Correctly Classified Instances         334               60.2888 %
Incorrectly Classified Instances       220               39.7112 %
Kappa statistic                          0.3265
Mean absolute error                      0.2076
Root mean squared error                  0.389 
Relative absolute error                 72.672  %
Root relative squared 

# Summaries

## Summary data Original IPA IPK

In [None]:
# df = pd.read_csv('WekaResultClassifier/final/kelulusan/oversampling mipa/Unpruned_TestingResult.csv')
# display(df)

df1= pd.read_csv('WekaResultClassifier/final/original/mipa-ipk/summary.csv',index_col=0)
display(df1)



## Summary data Original IPA Lulus

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/original/mipa-lulus/summary.csv',index_col=0)
display(df1)


## Summary data Original Sosial IPK

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/original/sosial-ipk/summary.csv',index_col=0)
display(df1)


## Summary data Original Sosial Lulus

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/original/sosial-lulus/summary.csv',index_col=0)
display(df1)


## Summary data Oversampling IPA IPK

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/oversampling/mipa-ipk/summary.csv',index_col=0)
display(df1)


## Summary data Oversampling IPA Lulus

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/oversampling/mipa-lulus/summary.csv',index_col=0)
display(df1)


## Summary data Oversampling Sosial IPK

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/oversampling/sosial-ipk/summary.csv',index_col=0)
display(df1)


## Summary data Sosial Sosial Lulus

In [None]:
df1= pd.read_csv('WekaResultClassifier/final/oversampling/sosial-lulus/summary.csv',index_col=0)
display(df1)
