In [29]:
import pandas as pd

class NelsonRules:      

    # Rule 1
    def rule1(self, data, mean, sigma):

        def isBetween(value, lower, upper):
            isBetween = value < upper and value > lower
            return 0 if isBetween else 1

        upperLimit = mean + 3 * sigma
        lowerLimit = mean - 3 * sigma

        data['Rule1'] = data.apply(lambda row: isBetween(row['amount'], lowerLimit, upperLimit), axis = 1)

    # Rule 2
    def rule2(self, data, mean):
        values = [0]*len(data)

        # +1 means upside, -1 means downside
        upsideOrDownside = 0
        count = 0
        for i in range(len(data)):
            amount = data.iloc[i]['amount']
            if amount > mean:
                if upsideOrDownside == 1:
                    count += 1
                else: 
                    upsideOrDownside = 1
                    count = 1
            elif amount < mean: 
                if upsideOrDownside == -1:
                    count += 1
                else: 
                    upsideOrDownside = -1
                    count = 1

            if count >= 9:
                values[i] = 1

        data['Rule2'] = values              

    # Rule 3
    def rule3(self, data):
        values = [0]*len(data)

        previousAmount = data.iloc[0]['amount']
        # +1 means increasing, -1 means decreasing
        increasingOrDecreasing = 0
        count = 0
        for i in range(1, len(data)):
            amount = data.iloc[i]['amount']
            if amount > previousAmount:
                if increasingOrDecreasing == 1:
                    count += 1
                else:
                    increasingOrDecreasing = 1
                    count = 1
            elif amount < previousAmount:
                if increasingOrDecreasing == -1:
                    count += 1
                else:
                    increasingOrDecreasing = -1
                    count = 1

            if count >= 6:
                values[i] = 1

            previousAmount = amount

        data['Rule3'] = values 

    # Rule 4
    def rule4(self, data):
        values = [0]*len(data)

        previousAmount = data.iloc[0]['amount']
        # +1 means increasing, -1 means decreasing
        bimodal = 0
        count = 1
        for i in range(1, len(data)):
            amount = data.iloc[i]['amount']
            
            if amount > previousAmount:
                bimodal += 1
                if abs(bimodal) != 1:
                    count = 0
                    bimodal = 0
                else:
                    count += 1
            elif amount < previousAmount:
                bimodal -= 1
                if abs(bimodal) != 1:
                    count = 0
                    bimodal = 0
                else:
                    count += 1

            previousAmount = amount

            if count >= 14:
                values[i] = 1

        data['Rule4'] = values 

    # Rule 5
    def rule5(self, data, mean, sigma):
        if len(data) < 3: return

        values = [0]*len(data)
        upperLimit = mean - 2 * sigma
        lowerLimit = mean + 2 * sigma        

        for i in range(len(data) - 3):
            first = data.iloc[i]['amount']
            second = data.iloc[i+1]['amount']
            third = data.iloc[i+2]['amount']
            
            setValue = False
            validCount = 0
            if first > mean and second > mean and third > mean:
                validCount += 1 if first > lowerLimit else 0
                validCount += 1 if second > lowerLimit else 0
                validCount += 1 if third > lowerLimit else 0
                setValue = validCount >= 2
            elif first < mean and second < mean and third < mean:
                validCount += 1 if first < upperLimit else 0
                validCount += 1 if second < upperLimit else 0
                validCount += 1 if third < upperLimit else 0
                setValue = validCount >= 2

            if setValue:
                values[i+2] = 1

        data['Rule5'] = values

    # Rule 6
    def rule6(self, data, mean, sigma):
        if len(data) < 5: return

        values = [0]*len(data)
        upperLimit = mean - sigma
        lowerLimit = mean + sigma   

        for i in range(len(data) - 5):
            pVals = list(map(lambda x: data.iloc[x]['amount'], range(i, i+5)))

            setValue = False
            if len(list(filter(lambda x: x > mean, pVals))) == 5:
                setValue = len(list(filter(lambda x: x > lowerLimit, pVals))) >= 4
            elif len(list(filter(lambda x: x < mean, pVals))) == 5:
                setValue = len(list(filter(lambda x: x < upperLimit, pVals))) >= 4

            if setValue:
                values[i+4] = 1

        data['Rule6'] = values

    # Rule 7
    def rule7(self, data, mean, sigma):
        if len(data) < 15: return
        values = [0]*len(data)
        upperLimit = mean + sigma
        lowerLimit = mean - sigma 
        
        for i in range(len(data) - 15):
            setValue = True
            for y in range(15):
                item = data.iloc[i + y]['amount']
                if item >= upperLimit or item <= lowerLimit: 
                    setValue = False
                    break
            
            if setValue:
                values[i+14] = 1

        data['Rule7'] = values

    # Rule 8
    def rule8(self, data, mean, sigma):
        if len(data) < 8: return
        values = [0]*len(data)

        for i in range(len(data) - 8):
            setValue = True
            for y in range(8):
                item = data.iloc[i + y]['amount']
                if abs(mean - item) < sigma:
                    setValue = False
                    break

            if setValue:
                values[i+8] = 1

        data['Rule8'] = values



In [30]:
df = pd.read_excel('Assignment1_Question1_data.xlsx')

df= df.drop('day', axis = 1)

#Split the data into training and testing 
train_df = df[df.iloc[:, 0] < 53] #until and including 52th week
test_df = df[df.iloc[:, 0] >= 53] #from 53th week to finish

    
mean = train_df['amount'].mean()
stdev = train_df['amount'].std()


weeklyData = test_df.groupby('week').mean()
weeklyData
anomalyDetection = NelsonRules()
    

anomalyDetection.rule1(weeklyData, mean, stdev)
anomalyDetection.rule2(weeklyData, mean)
anomalyDetection.rule3(weeklyData)
anomalyDetection.rule4(weeklyData)
anomalyDetection.rule5(weeklyData, mean, stdev)
anomalyDetection.rule6(weeklyData, mean, stdev)
anomalyDetection.rule7(weeklyData, mean, stdev)
anomalyDetection.rule8(weeklyData, mean, stdev)
    
resultDf = weeklyData.drop('amount', axis = 1)
resultDf
resultDf.to_csv('result.csv', sep=',')

In [31]:
resultDf

Unnamed: 0_level_0,Rule1,Rule2,Rule3,Rule4,Rule5,Rule6,Rule7,Rule8
week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
53,0,0,0,0,0,0,0,0
54,0,0,0,0,0,0,0,0
55,0,0,0,0,0,0,0,0
56,0,0,0,0,0,0,0,0
57,0,0,0,0,0,0,0,0
58,0,0,0,0,0,0,0,0
59,0,0,1,0,0,0,0,0
60,0,0,1,0,0,0,0,0
61,0,0,1,0,0,0,0,0
62,0,0,0,0,0,0,0,0
