In [16]:
# Imports
from luigi.contrib.spark import PySparkTask
from luigi.parameter import IntParameter, DateSecondParameter
from luigi import LocalTarget, Task, WrapperTask
import datetime
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
%run RulebasedClassifier.ipynb

class Evaluator(Task):

    # Date for Output-File prefix
    from datetime import date, timedelta
    date = DateSecondParameter(default=datetime.datetime.now())
    
    # Method to declare the Output-File
    def output(self):
        prefix = self.date.strftime("%Y-%m-%dT%H%M%S")
        return LocalTarget("data/%s_Evaluator_out.csv" % prefix, format=UTF8)
    
    # Method to define the required Task (Preprocessor)
    def requires(self):
        return RulebasedClassifier()


    # Classify the imported Data
    def run(self):
        df = pd.read_csv(self.input().path)
        
        print(classification_report(df['specified'].values, df['predicted'].values))
        confMatrix = confusion_matrix(df['specified'].values, df['predicted'].values)
        tp = confMatrix[0][0]
        fp = confMatrix[0][1]
        tn = confMatrix[1][1]
        fn = confMatrix[1][0]
        print(confMatrix)
        print('True Positive: %s\tFalse Negative: %s'%(tp,fn))
        print('False Positive: %s\tTrue Negative %s'%(fp,tn))
        
        # Write .csv-File
        with self.output().open("w") as out:
            df.to_csv(out, encoding="utf-8")
            


evaluator = Evaluator()
evaluator.run()


              precision    recall  f1-score   support

           0       0.90      0.62      0.73       566
           1       0.66      0.91      0.76       457

   micro avg       0.75      0.75      0.75      1023
   macro avg       0.78      0.76      0.75      1023
weighted avg       0.79      0.75      0.75      1023

[[350 216]
 [ 41 416]]
True Positive: 350	False Negative: 41
False Positive: 216	True Negative 416
