In [1]:
from __future__ import division

import os
import numpy as np
import pandas as pd
import datetime

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
%matplotlib inline

# Experiment parameters
MA_TYPE = 'SMA'
COMPONENT = 'Chloride'
METHOD = 'RS1'
QC_RULES = ['13s','22s']

URefL = 106
LRefL = 99
UTL = 104
LTL =99
WINDOW = 50 


In [172]:
def df_to_series(df, method, qc=False, flags=False):
    
    if qc == False:
        # get the requested test and method and create a new DF
        df_ = df.loc[(df.Test != 'QC') & (df.Method == method)]
        test = 'val'
    else:
        df_ = df.loc[(df.Test == 'QC') & (df.Method == method)]
        test = 'qc'
        
    if flags == False:
        # take just the data and the value columns
        df_ = df_[['Resulted','Value']]
    else:
        df_ = df_[['Resulted','inOutQC']]
        
    # save to csv
    df_.to_csv('tmp.csv'.format(test, method), index=False)

    # read csv back in as a series
    series = pd.Series.from_csv('tmp.csv'.format(test, method),header=0)
    
    return series


def simple_moving_average(Window, ValTimeSeries):
    """
    Create a simple moving averages TimeSeries
    In: 
    -> Moving averages window
    -> TimeSeries of values
    Out: TimeSeries of MovingAverages for each of the supplied TimeStamps
    """
    
    ma_rs1_series = ValTimeSeries.copy(deep=True)
    for i in range(len(ValTimeSeries)):
        if i == 0:
            continue
        elif i < Window:
            ma_rs1_series[i] = ValTimeSeries[:i].mean()
        else:
            start = i - Window
            ma_rs1_series[i] = ValTimeSeries[start:i].mean()

    return ma_rs1_series


def moving_average_flags(MaTimeSeries, UTL, LTL):
    """
    Create a time series of moving average flags:
    1 = "Out"
    0 = "In"
    
    In: TimeSeries of moving average values
    Out: TimeSeries of 0 or 1 digits representing in and out respectively. 
    """
    
    ma_flag_series = MaTimeSeries.copy(deep=True)
    for i in range(len(MaTimeSeries)):

        if MaTimeSeries[i] <= UTL:
            # in = 0
            ma_flag_series[i] = 0
        else: 
            # out = 1
            ma_flag_series[i] = 1
    return ma_flag_series


def moving_avg_performance_metrics(ma_perf_model, moving_avg_flag_series, qc_flag_series):
    """
    Takes flags from Moving Averages and QC and compares to get performance metrics
    
    Input: MA and QC flag timeseries'
    Output: Class object of: TP, FP, TN, FN, Precision, Recall
    """

    raw_perf_data = RawPerformanceModel()
    raw_perf_data.true_positive = 0
    raw_perf_data.false_positive = 0
    raw_perf_data.true_negative = 0
    raw_perf_data.false_negative = 0

    # TODO: Need to seperate logic for high and low MA flags
    for i in range(len(moving_avg_flag_series)):

        # instantiate ma flag timestamps
        raw_perf_data.ma_flag_ts = moving_avg_flag_series.index[i]
        
        # Check MovingAverage "In" flags relative to QC
        # i.e. True Negatives and False Negatives
        if moving_avg_flag_series[i] == 0:
            startQcLookup = moving_avg_flag_series.index[i]
            for qc in qc_flag_series[startQcLookup:]:
                
                raw_perf_data.qc_flag_ts = qc_flag_series[startQcLookup:].index[0]
                
                if qc == 0:
                    # 'True Negative'
                    raw_perf_data.true_negative+=1
                else:
                    # ma_flag = 0 & qc_flag = 1
                    # 'False Negative'
                    raw_perf_data.false_negative+=1

        # Check MovingAverage "Out" flags relative to QC
        # i.e. True Positives and False Positives
        else:
            startQcLookup = moving_avg_flag_series.index[i]
            for qc in qc_flag_series[startQcLookup:]:
                
                # add QC_Flag TimeStamp to MovingAvgPerformanceMetrics
                raw_perf_data.qc_flag_ts = qc_flag_series[startQcLookup:].index[0]
                
                if qc == 1:
                    # 'True Positive'
                    raw_perf_data.true_positive+=1
                else:
                    # ma_flag = 0 & qc_flag = 1
                    # 'False Positive'
                    raw_perf_data.false_positive+=1
                break

    # Calculate difference in time between QC flag and MA flag
    raw_perf_data.ma_qc_diff_secs = raw_perf_data.calculate_timestamp_diff()
    raw_perf_data.recall = raw_perf_data.calculate_recall()
    raw_perf_data.precision = raw_perf_data.calculate_precision()
    
    return raw_perf_data

def set_ma_experiment(ma_type, component, method, ma_window, qc_rules):
    
    ma = MovingAvgPerformanceModel()
    
    ma.component = component
    ma.chem_method = method
    ma.ma_type = ma_type
    ma.qc_rules = qc_rules
    
    return ma

In [3]:
WORK_DIR = 'C:\\Users\\thoma\\Box Sync\\00Professional\\01Research\\01_Projects\\moving_average\\'
DATA_DIR = WORK_DIR + 'data\\'
OUT_DIR = WORK_DIR + 'out\\'

CSV_PATH = DATA_DIR + 'ChlorideRS1_RS2.csv'

In [4]:
parse_dates = ['Resulted', 'Verified']
df = pd.read_csv(CSV_PATH, parse_dates=parse_dates)

In [5]:
df = df.sort_values(by='Verified')
rs1_val_series = df_to_series(df,'RS1', qc=False, flags=False)
rs1_qc_series = df_to_series(df,'RS1', qc=True, flags=False)
low_rs1_qc_series = rs1_qc_series[rs1_qc_series <= 95]
high_rs1_qc_series = rs1_qc_series[rs1_qc_series >= 95]
qc_flags = df_to_series(df,'RS1', qc=True, flags=True)

norm_rs1_val_series = rs1_val_series[(rs1_val_series <= URefL) & (rs1_val_series >= LRefL)]
ma_rs1_series = simple_moving_average(WINDOW, norm_rs1_val_series)


ma_flags = moving_average_flags(ma_rs1_series, 103.2, 99)

In [173]:
class MovingAvgPerformanceModel:
    
    def __init__(self):

        self.component = str()
        self.chem_method = str()
        self.ma_type = str()
        self.qc_rules = list()
        self.exp_data = []
            
    # dumps Result object in json
    def tojson(self):
        return json.dumps(self, default=lambda o: o.__dict__,
                          sort_keys=False, indent=4)
    
class RawPerformanceModel:

    def __init__(self):

        # Ma/QC Parameters
        self.ma_utl = int()
        self.ma_ltl = int()
        self.ma_url = int()
        self.ma_lrl = int()
        self.ma_window = int()

        # Performance metrics
        self.true_positive = int()
        self.true_negative = int()
        self.false_positive = int()
        self.false_negative = int()
        self.precision = float()
        self.recall = float()

        # datetime("%Y-%m-%d %H:%M:%S")
        # 2017-01-15 00:15:00
        self.ma_flag_ts = str()
        self.qc_flag_ts = str()
        self.ma_qc_diff_secs = str()

    def calculate_timestamp_diff(self):
        return (self.qc_flag_ts - self.ma_flag_ts).seconds     

    def calculate_recall(self):
        return (self.true_positive / (self.true_positive + self.false_negative))

    def calculate_precision(self):
        return (self.true_positive / (self.true_positive + self.false_positive))
    

In [174]:
ExperimentObject = set_ma_experiment(ma_type=MA_TYPE,
                                        component=COMPONENT,
                                        method=METHOD,
                                        ma_window=WINDOW,
                                        qc_rules=QC_RULES)

In [177]:
ExperimentObject.exp_data.append(moving_avg_performance_metrics(ma_perf_model=ExperimentObject,
                                                                     moving_avg_flag_series=ma_flags,
                                                                     qc_flag_series=qc_flags))
print ExperimentObject.__dict__

{'qc_rules': ['13s', '22s'], 'exp_data': [<__main__.RawPerformanceModel instance at 0x0000000007E170C8>, <__main__.RawPerformanceModel instance at 0x0000000007EE7F08>], 'chem_method': 'RS1', 'component': 'Chloride', 'ma_type': 'SMA'}


In [178]:
for i in enumerate(ExperimentObject.exp_data):
    print i[1].__dict__

{'false_positive': 8, 'qc_flag_ts': Timestamp('2017-01-28 21:55:00'), 'ma_url': 0, 'recall': 0.0035211267605633804, 'ma_qc_diff_secs': 77820L, 'ma_lrl': 0, 'ma_utl': 0, 'precision': 0.3333333333333333, 'true_positive': 4, 'ma_window': 0, 'ma_ltl': 0, 'true_negative': 162918, 'ma_flag_ts': Timestamp('2017-01-29 00:18:00'), 'false_negative': 1132}
{'false_positive': 8, 'qc_flag_ts': Timestamp('2017-01-28 21:55:00'), 'ma_url': 0, 'recall': 0.0035211267605633804, 'ma_qc_diff_secs': 77820L, 'ma_lrl': 0, 'ma_utl': 0, 'precision': 0.3333333333333333, 'true_positive': 4, 'ma_window': 0, 'ma_ltl': 0, 'true_negative': 162918, 'ma_flag_ts': Timestamp('2017-01-29 00:18:00'), 'false_negative': 1132}


In [191]:
import json
ExperimentObject_json = ExperimentObject.tojson()
ExperimentObject_dict = json.loads(ExperimentObject_json)

In [192]:
ExperimentObject_dict['exp_data']

[{u'false_negative': 1132,
  u'false_positive': 8,
  u'ma_flag_ts': {},
  u'ma_lrl': 0,
  u'ma_ltl': 0,
  u'ma_qc_diff_secs': 77820,
  u'ma_url': 0,
  u'ma_utl': 0,
  u'ma_window': 0,
  u'precision': 0.3333333333333333,
  u'qc_flag_ts': {},
  u'recall': 0.0035211267605633804,
  u'true_negative': 162918,
  u'true_positive': 4},
 {u'false_negative': 1132,
  u'false_positive': 8,
  u'ma_flag_ts': {},
  u'ma_lrl': 0,
  u'ma_ltl': 0,
  u'ma_qc_diff_secs': 77820,
  u'ma_url': 0,
  u'ma_utl': 0,
  u'ma_window': 0,
  u'precision': 0.3333333333333333,
  u'qc_flag_ts': {},
  u'recall': 0.0035211267605633804,
  u'true_negative': 162918,
  u'true_positive': 4}]

In [194]:
csv.DictWriter?

In [193]:
import csv
f = open('output.csv','wb')
w = csv.DictWriter(f)
w.writerows(ExperimentObject_dict['exp_data'])


TypeError: __init__() takes at least 3 arguments (2 given)