In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


In [3]:
# Page-Hinkley Test Implementation
class PageHinkley:
    def __init__(self, min_instances=30, delta=0.005, threshold=50, alpha=1-0.0001):
        self.min_instances = min_instances
        self.delta = delta
        self.threshold = threshold
        self.alpha = alpha
        self.cum_sum = 0
        self.mean = 0
        self.n = 0

    def add_element(self, value):
        if self.n < self.min_instances:
            self.n += 1
            self.mean = self.mean + (value - self.mean) / self.n
            return False

        self.cum_sum = max(0, self.alpha * self.cum_sum + (value - self.mean - self.delta))

        self.mean = self.mean + (value - self.mean) / self.n
        self.n += 1

        if self.cum_sum > self.threshold:
            self.cum_sum = 0
            return True

        return False




In [4]:
# Load dataset
def load_dataset(file_path):
    return pd.read_csv(file_path)

# Preprocess dataset
def preprocess_data(data, columns_to_scale):
    data = data.fillna(method='ffill')  # Fill missing values
    scaler = StandardScaler()
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])
    return data


In [5]:
# Detect change using Page-Hinkley
def detect_change(data, columns_to_monitor):
    results = {}
    for column in columns_to_monitor:
        ph = PageHinkley()
        results[column] = []
        for i, value in enumerate(data[column]):
            if ph.add_element(value):
                results[column].append(i)
    return results



In [6]:
# Main function
def main():
    file_path = '../../data_loading/hai-23_05/hai-test1.csv' 
    data = load_dataset(file_path)

    # Select columns to preprocess and monitor
    columns_to_scale_and_monitor = ['P1_FCV01D', 'P1_PIT01', 'P1_FT01', 'P2_VIBTR01', 'x1001_05_SETPOINT_OUT']

    preprocessed_data = preprocess_data(data, columns_to_scale_and_monitor)
    change_points = detect_change(preprocessed_data, columns_to_scale_and_monitor)

    # Display the results
    for column, points in change_points.items():
        if points:
            print(f"Changes detected in {column} at indices: {points}")
        else:
            print(f"No significant changes detected in {column}.")

if __name__ == "__main__":
    main()

  data = data.fillna(method='ffill')  # Fill missing values


Changes detected in P1_FCV01D at indices: [553, 3414, 3486, 3543, 3595, 3643, 3689, 3733, 3776, 3818, 3859, 3899, 3938, 3977, 4015, 4053, 4091, 4128, 4165, 4202, 4239, 4276, 4313, 4349, 4385, 4422, 4460, 4502, 4554, 4628, 7085, 7152, 7208, 7259, 7306, 7351, 7394, 7436, 7476, 7515, 7554, 7592, 7629, 7665, 7701, 7737, 7772, 7807, 7841, 7875, 7909, 7942, 7975, 8008, 8041, 8074, 8107, 8140, 8173, 8207, 8242, 8280, 8325, 8387, 10896, 10968, 11026, 11078, 11126, 11172, 11216, 11258, 11299, 11339, 11378, 11416, 11453, 11489, 11525, 11560, 11595, 11630, 11664, 11698, 11732, 11765, 11798, 11831, 11864, 11896, 11929, 11962, 11997, 12033, 12072, 12117, 12176, 14670, 14744, 14802, 14854, 14902, 14948, 14992, 15034, 15075, 15115, 15154, 15192, 15229, 15265, 15301, 15336, 15371, 15406, 15440, 15474, 15510, 15548, 15588, 15635, 15698, 15822, 18332, 18410, 18469, 18521, 18569, 18614, 18657, 18698, 18738, 18777, 18815, 18852, 18888, 18923, 18958, 18992, 19026, 19060, 19093, 19127, 19162, 19200, 19242, 