In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler


In [4]:
# Page-Hinkley Test Implementation
class PageHinkley:
    def __init__(self, min_instances=30, delta=0.005, threshold=50, alpha=1-0.0001):
        self.min_instances = min_instances
        self.delta = delta
        self.threshold = threshold
        self.alpha = alpha
        self.cum_sum = 0
        self.mean = 0
        self.n = 0

    def add_element(self, value):
        if self.n < self.min_instances:
            self.n += 1
            self.mean = self.mean + (value - self.mean) / self.n
            return False

        self.cum_sum = max(0, self.alpha * self.cum_sum + (value - self.mean - self.delta))

        self.mean = self.mean + (value - self.mean) / self.n
        self.n += 1

        if self.cum_sum > self.threshold:
            self.cum_sum = 0
            return True

        return False




In [3]:
# Load dataset
def load_dataset(file_path):
    return pd.read_csv(file_path)

# Preprocess dataset
def preprocess_data(data, columns_to_scale):
    data = data.fillna(method='ffill')  # Fill missing values
    scaler = StandardScaler()
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])
    return data


In [5]:
# Detect change using Page-Hinkley
def detect_change(data, columns_to_monitor):
    results = {}
    for column in columns_to_monitor:
        ph = PageHinkley()
        results[column] = []
        for i, value in enumerate(data[column]):
            if ph.add_element(value):
                results[column].append(i)
    return results



In [6]:

import influxdb_client
from influxdb_client.client.write_api import SYNCHRONOUS

In [7]:
token = "En1iX5zqnyR_AT71S6Ahz8_Hs78nrJHwEkZDDksf4J6reHJNqXzbaMEXbmBjy7I-bdzp2k8fy7E1FjU1f2ZWsA=="
org = "mema_org"
bucket = "mema_bucket"
url = "http://localhost:8086"

In [None]:
with influxdb_client.InfluxDBClient(url=url, token=token, org=org) as client:
    write_api = client.write_api(write_options=SYNCHRONOUS)
    for message in consumer:
        message = message.value
        if len(message) < 10:
            break
        try:
            point = json.dumps(message)
            print(point)
            print(type(point))
            point = eval(point)
            point_values = []
            for key in point:
                print(key)
                print(point[key])
                p = influxdb_client.Point("point").field(key, point[key])
                write_api.write(bucket, org, p)
                point_values.append(point[key])
        except NotImplementedError:
            pass

In [13]:
# Main function
def main():
    file_path = '../../data_loading/hai-23_05/hai-train1.csv' 
    data = load_dataset(file_path)

    # Select columns to preprocess and monitor
    columns_to_scale_and_monitor = ['P1_FCV01D', 'P1_PIT01', 'P1_FT01', 'P2_VIBTR01', 'x1001_05_SETPOINT_OUT']

    preprocessed_data = preprocess_data(data, columns_to_scale_and_monitor)
    change_points = detect_change(preprocessed_data, columns_to_scale_and_monitor)

    # Display the results
    with influxdb_client.InfluxDBClient(url=url, token=token, org=org) as client:
        write_api = client.write_api(write_options=SYNCHRONOUS)
        for i in range(0, 10):
            p = influxdb_client.Point("Albert_Mao_Test").field(f'Col_{i}', (i+5))
            write_api.write(bucket, org, p)
        # for column, points in change_points.items():
            
        #     if points:
        #         p = influxdb_client.Point("PageHinkley").field(str(column), points)
        #         write_api.write(bucket, org, p)
        #         print(f"Changes detected in {column} at indices: {points}")
        #     else:
        #         print(f"No significant changes detected in {column}.")

if __name__ == "__main__":
    main()

  data = data.fillna(method='ffill')  # Fill missing values
