In [None]:
from LSTM_model import LSTM
import numpy as np
import random
import torch
import time
from datetime import datetime, timedelta
from influxdb_client import InfluxDBClient


In [None]:
# Setting random seed for reproducibility
torch.manual_seed(140)
np.random.seed(140)
random.seed(140)

In [None]:
# Testing of LSTM-class
x_values = torch.tensor([1, 2, 3], dtype=torch.float32).view(-1, 1, 1)
y_values = torch.tensor([10, 20, 30], dtype=torch.float32).view(-1, 1, 1)

# Create an instance of the LSTM model
model = LSTM(x_values, y_values, input_size=1, hidden_size=10, num_layers=1, output_size=1, batch_size=3, num_epochs=50, learning_rate=0.01)
model.train_model() # Train the model

# Get a prediction
x_test = torch.tensor([4], dtype=torch.float32).view(-1, 1, 1)
x_test = model.transform(x_test)
y_pred = model(x_test)
y_pred_orig = model.inverse_transform_y(y_pred)

print(f'Prediction: {y_pred_orig.item()}')  # Prediction: 40.0

In [None]:
# Testing of InfluxDB
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)

poll_interval = 1

# Instantiate the QueryAPI
query_api = client.query_api()

batch_nr = 0

# Initialize timestamp
# Ensure last_timestamp is a datetime object for accurate comparison
last_timestamp = datetime.fromisoformat("1970-01-01T00:00:00+00:00")

while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "2014-04-10T00:00:00Z"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
     |> tail(n: 3)
    '''
            

    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    

    if events:
        # Extract the timestamp of the last event
        new_last_event = events[-1]
        new_last_timestamp = new_last_event.get_time()  # This should already be a datetime object

        # Compare datetime objects directly
        if new_last_timestamp > last_timestamp:
            # Increment batch_nr
            batch_nr += 1
            # Print batch nr as seperator
            print("Batch nr: ", batch_nr)

            # New event detected
            for event in events:
                # Print the timestamp and value of the event
                print(f'Time: {event.get_time()}, Value: {event.get_value()}')

            # Update last_timestamp for the next iteration, converting back to ISO format if necessary
            last_timestamp = new_last_timestamp 

       # else:
            #print("No new events found.")
    else:
        print("No events found in range.")

    time.sleep(poll_interval)

In [None]:
"""
THE PLAN

Making the program start from any predefined time, and then continue to fetch data from that time and onwards.
And so it can process older data and then catch up to the present time.

It therefore processes a batch of 3 from the earliest timestamp in the range, and then updates the start time for the next iteration 
by incrementing the start time by a set time to both avoid duplicate events and to eventually catch up to the present time.

The program will run indefinitely, and will continue to fetch data from the InfluxDB and process it in batches of 3 until the program is stopped.
I there are not enough events for a batch, the program will wait for a set time before trying again.
When a batch of three is available it will be processed and the it will again wait for another event to be available.

This way it is both flexible and efficient, and can be easily used to process either data in real-time or historical data


For each batch it follows the algorithm of RePAD2



"""


# Testing of InfluxDB with LSTM
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)

poll_interval = 1
time_increment = 1

# Instantiate the QueryAPI
query_api = client.query_api()

batch_nr = 0

# Initialize timestamp
# Ensure last_timestamp is a datetime object for accurate comparison
last_timestamp = datetime.fromisoformat("1970-01-01T00:00:00+00:00")
start_time = "2014-04-10T00:00:00Z"

while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "{start_time}"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
    '''
        
    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    if events:
        # Process batches of 3 until there are not enough events for a batch
        for i in range(len(events) - 2):
            # Select the batch of 3 events
            batch_events = events[i:i+3]

            # Process events
            # Example of processing the events
            # Print batch nr as seperator
            batch_nr += 1
            print("Batch nr: ", batch_nr)
            for event in batch_events:

                # Print the timestamp and value of the event
                print(f'Time: {event.get_time()}, Value: {event.get_value()}')

            # After processing the event
            if i + 3 >= len(events):
                # Update start time for the next iteration
                last_event_time = batch_events[-1].get_time()
                # Increment by 1 second to avoid duplicate events
                start_time = (last_event_time + timedelta(seconds=time_increment)).isoformat()
                break

    else:
        print("No events found in range.")

    time.sleep(poll_interval)




In [None]:
def calculate_AARE(D, D_bar, T):
    if T < 5:
        raise ValueError("T must be greater than or equal to 5")
    
    # Calculate the absolute relative errors for the required time period
    absolute_relative_errors = torch.abs(D[T-3:T+1] - D_bar[T-3:T+1]) / D[T-3:T+1]
    
    # Calculate the average absolute relative error
    AARE_star = torch.mean(absolute_relative_errors)
    
    return AARE_star

# Example usage:
# D = torch.tensor([...]) # Tensor containing D values
# D_bar = torch.tensor([...]) # Tensor containing D_bar values
# T = current time period (must be >= 5)
# AARE_star_T = calculate_AARE_star(D, D_bar, T)

def calculate_mu_AARE(AARE, T, W):
    if 7 <= T < W + 4:
        mu_AARE_star = torch.mean(AARE[4:T])
    elif T >= W + 4:
        mu_AARE_star = torch.mean(AARE[T-W:T])
    else:
        raise ValueError("T must be greater than or equal to 7")
    return mu_AARE_star

def calculate_sigma(AARE, mu_AARE, T, W):
    if 7 <= T < W + 4:
        sigma_star = torch.sqrt(torch.sum((AARE[4:T] - mu_AARE) ** 2) / (T - 4))
    elif T >= W + 4:
        sigma_star = torch.sqrt(torch.sum((AARE[T-W:T] - mu_AARE) ** 2) / W)
    else:
        raise ValueError("T must be greater than or equal to 7")
    return sigma_star

def calculate_Thd(AARE, T, W):
    mu_AARE = calculate_mu_AARE(AARE, T, W)
    sigma = calculate_sigma(AARE, mu_AARE, T, W)
    Thd = mu_AARE + 3 * sigma
    return Thd

# Example usage:
# AARE_star = torch.tensor([...]) # A PyTorch tensor containing AARE* values
# T = current time period (must be >= 7)
# W = window size for the moving average and standard deviation
# Thd_star = calculate_Thd_star(AARE_star, T, W)



# Let T be the current time point and T starts from 0; Let flag* be True;

While time has advanced {
    Collect data point D_T
    
    if T >= 2 && T < 5 {
        Train LSTM model by taking D_T-2, D_T-1 and D_T as training data (X and Y values for the three datapoints)
        Let M* be the resulting model and use M* to predict D_T+1
   
    } elif T >= 5 && T < 7 {
        Calculate AARE_T* based on Equation 5
        Train LSTM model by taking D_T-2, D_T-1 and D_T as training data (X and Y values for the three datapoints)
        Let M* be the resulting model and use M* to predict D_T+1
        
    } elif T >= 7 && flag* == True {
        if T != 7 {Use M* to Predict D_T}
        Calculate AARE_T* based on Equation 5
        Calculate Thd* based on Equation 6

        if AARE_T* <= Thd* {D_T* is not considered an anomaly}

        else {
            Train LSTM model by taking D_T-3, D_T-2 and D_T-1 as training data
            Use the model to repredict D_T
            Re-calculate AARE_T* based on Equation 5
            Re-calculate Thd* based on Equation 6

            if AARE_T* <= Thd* {
                D_T is not consideren an anomaly
                Replace M* with the new LSTM-model
                Let flag* be True

            } else {
                D_T is reported as an anomaly immediately
                Let flag* be True

            }
        }
    } elif T >= 7 && flag* == False {
        Train LSTM model by taking D_T-3, D_T-2 and D_T-1 as training data
        Use the LSTM model to predict D_T 
        Calculate AARE_T* based on Equation 5
        Calculate Thd* based on Equation 6

        if AARE_T* <= Thd* {
            D_T is not considered an anomaly
            Replace M* with the new LSTM-model
            Let flag* be True
        } else {
            D_T is reported as an anomaly immediately
            Let flag* be False
        }
    }


}

