In [1]:
import os
import numpy as np
import pandas as pd
from __future__ import division

import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
%matplotlib inline

URefL = 106
LRefL = 99
UTL = 104
LTL =99
WINDOW = 50 

In [2]:
class MovingAvgPerformanceMetrics:
    
    def __init__(self):
        self.true_positive = int()
        self.true_negative = int()
        self.false_positive = int()
        self.false_negative = int()
        
        self.precision = float()
        self.recall = float()


def df_to_series(df, method, qc=False, flags=False):
    
    if qc == False:
        # get the requested test and method and create a new DF
        df_ = df.loc[(df.Test != 'QC') & (df.Method == method)]
        test = 'val'
    else:
        df_ = df.loc[(df.Test == 'QC') & (df.Method == method)]
        test = 'qc'
        
    if flags == False:
        # take just the data and the value columns
        df_ = df_[['Resulted','Value']]
    else:
        df_ = df_[['Resulted','inOutQC']]
        
    # save to csv
    df_.to_csv('tmp.csv'.format(test, method), index=False)

    # read csv back in as a series
    series = pd.Series.from_csv('tmp.csv'.format(test, method),header=0)
    
    return series


def simple_moving_average(Window, ValTimeSeries):
    """
    Create a simple moving averages TimeSeries
    In: 
    -> Moving averages window
    -> TimeSeries of values
    Out: TimeSeries of MovingAverages for each of the supplied TimeStamps
    """
    
    ma_rs1_series = ValTimeSeries.copy(deep=True)
    for i in range(len(ValTimeSeries)):
        if i == 0:
            continue
        elif i < Window:
            ma_rs1_series[i] = ValTimeSeries[:i].mean()
        else:
            start = i - Window
            ma_rs1_series[i] = ValTimeSeries[start:i].mean()

    return ma_rs1_series

def moving_average_flags(MaTimeSeries, UTL, LTL):
    """
    Create a time series of moving average flags:
    1 = "Out"
    0 = "In"
    
    In: TimeSeries of moving average values
    Out: TimeSeries of 0 or 1 digits representing in and out respectively. 
    """
    
    ma_flag_series = MaTimeSeries.copy(deep=True)
    for i in range(len(MaTimeSeries)):

        if MaTimeSeries[i] <= UTL:
            # in = 0
            ma_flag_series[i] = 0
        else: 
            # out = 1
            ma_flag_series[i] = 1
    return ma_flag_series

def moving_avg_performance_metrics(moving_avg_flag_series, qc_flag_series):
    """
    Takes flags from Moving Averages and QC and compares to get performance metrics
    
    Input: MA and QC flag timeseries'
    Output: Class object of: TP, FP, TN, FN, Precision, Recall
    """
    metrics = MovingAvgPerformanceMetrics()
    metrics.true_positive = 0
    metrics.false_positive = 0
    metrics.true_negative = 0
    metrics.false_negative = 0

    for i in range(len(moving_avg_flag_series)):

        # Check MovingAverage "In" flags relative to QC
        # i.e. True Negatives and False Negatives
        if moving_avg_flag_series[i] == 0:
            startQcLookup = moving_avg_flag_series.index[i]
            for qc in qc_flag_series[startQcLookup:]:
                if qc == 0:
                    # 'True Negative'
                    metrics.true_negative+=1
                else:
                    # ma_flag = 0 & qc_flag = 1
                    # 'False Negative'
                    metrics.false_negative+=1

        # Check MovingAverage "Out" flags relative to QC
        # i.e. True Positives and False Positives
        else:
            startQcLookup = moving_avg_flag_series.index[i]
            for qc in qc_flag_series[startQcLookup:]:
                if qc == 1:
                    # 'True Positive'
                    metrics.true_positive+=1
                else:
                    # ma_flag = 0 & qc_flag = 1
                    # 'False Positive'
                    metrics.false_positive+=1
                break

                
    metrics.recall = (metrics.true_positive / (metrics.true_positive + metrics.false_negative)) * 100
    metrics.precision = (metrics.true_positive / (metrics.true_positive + metrics.false_positive)) * 100

    return metrics

In [3]:
WORK_DIR = 'C:\\Users\\thoma\\Box Sync\\00Professional\\01Research\\01_Projects\\moving_average\\'
DATA_DIR = WORK_DIR + 'data\\'
OUT_DIR = WORK_DIR + 'out\\'

CSV_PATH = DATA_DIR + 'ChlorideRS1_RS2.csv'

In [4]:
parse_dates = ['Resulted', 'Verified']
df = pd.read_csv(CSV_PATH, parse_dates=parse_dates)

In [5]:
df = df.sort_values(by='Verified')
rs1_val_series = df_to_series(df,'RS1', qc=False, flags=False)
rs1_qc_series = df_to_series(df,'RS1', qc=True, flags=False)
low_rs1_qc_series = rs1_qc_series[rs1_qc_series <= 95]
high_rs1_qc_series = rs1_qc_series[rs1_qc_series >= 95]
qc_flags = df_to_series(df,'RS1', qc=True, flags=True)

norm_rs1_val_series = rs1_val_series[(rs1_val_series <= URefL) & (rs1_val_series >= LRefL)]
ma_rs1_series = simple_moving_average(WINDOW, norm_rs1_val_series)


ma_flags = moving_average_flags(ma_rs1_series, 103.2, 99)

In [6]:
met = moving_avg_performance_metrics(ma_flags, qc_flags)
print met.__dict__

{'false_positive': 8, 'recall': 0.35211267605633806, 'precision': 33.33333333333333, 'true_negative': 162918, 'true_positive': 4, 'false_negative': 1132}
