In [None]:
from LSTM_model import LSTM
import numpy as np
import random
import torch
import time
from datetime import datetime, timedelta
from collections import deque
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS


In [None]:
# Setting random seed for reproducibility
torch.manual_seed(140)
np.random.seed(140)
random.seed(140)

In [None]:
# Testing of LSTM-class
x_values = torch.tensor([1, 2, 3], dtype=torch.float32).view(-1, 1, 1)
y_values = torch.tensor([10, 20, 30], dtype=torch.float32).view(-1, 1, 1)

# Create an instance of the LSTM model
model = LSTM(x_values, y_values, input_size=1, hidden_size=10, num_layers=1, output_size=1, batch_size=3, num_epochs=50, learning_rate=0.01)
model.train_model() # Train the model

# Get a prediction
x_test = torch.tensor([4], dtype=torch.float32).view(-1, 1, 1)
x_test = model.transform(x_test)
y_pred = model(x_test)
y_pred_orig = model.inverse_transform_y(y_pred)

print(f'Prediction: {y_pred_orig.item()}')  # Prediction: 40.0

In [None]:
# Testing of InfluxDB
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)

poll_interval = 1

# Instantiate the QueryAPI
query_api = client.query_api()

batch_nr = 0

# Initialize timestamp
# Ensure last_timestamp is a datetime object for accurate comparison
last_timestamp = datetime.fromisoformat("1970-01-01T00:00:00+00:00")

while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "2014-04-10T00:00:00Z"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
     |> tail(n: 3)
    '''
            

    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    

    if events:
        # Extract the timestamp of the last event
        new_last_event = events[-1]
        new_last_timestamp = new_last_event.get_time()  # This should already be a datetime object

        # Compare datetime objects directly
        if new_last_timestamp > last_timestamp:
            # Increment batch_nr
            batch_nr += 1
            # Print batch nr as seperator
            print("Batch nr: ", batch_nr)

            # New event detected
            for event in events:
                # Print the timestamp and value of the event
                print(f'Time: {event.get_time()}, Value: {event.get_value()}')

            # Update last_timestamp for the next iteration, converting back to ISO format if necessary
            last_timestamp = new_last_timestamp 

       # else:
            #print("No new events found.")
    else:
        print("No events found in range.")

    time.sleep(poll_interval)

In [None]:
# Further testing of InfluxDB
"""
THE PLAN

Making the program start from any predefined time, and then continue to fetch data from that time and onwards.
And so it can process older data and then catch up to the present time.

It therefore processes a batch of 3 from the earliest timestamp in the range, and then updates the start time for the next iteration 
by incrementing the start time by a set time to both avoid duplicate events and to eventually catch up to the present time.

The program will run indefinitely, and will continue to fetch data from the InfluxDB and process it in batches of 3 until the program is stopped.
I there are not enough events for a batch, the program will wait for a set time before trying again.
When a batch of three is available it will be processed and the it will again wait for another event to be available.

This way it is both flexible and efficient, and can be easily used to process either data in real-time or historical data


For each batch it follows the algorithm of RePAD2

"""

# Testing of InfluxDB
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

# Instantiate the QueryAPI
client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)
query_api = client.query_api()

# Sliding window for Threshold
sliding_window = deque(maxlen=8064)

# Time parameters
poll_interval = 1
time_increment = 1
start_time = "2014-04-10T00:00:00Z"

is_first_batch = True


while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "{start_time}"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
    '''
        
    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    if events:
        # Process batches of 3 until there are not enough events for a batch
        for i in range(len(events) - 2):
            # Select the batch of 3 events
            batch_events = events[i:i+3]

            if is_first_batch:
                for event in batch_events:
                    sliding_window.append(event) # Storing all events in batch
                is_first_batch = False

            else:
                # For subsequent batches, append only the last event
                sliding_window.append(batch_events[-1])

            # After processing the event
            if i + 3 >= len(events):
                # Update start time for the next iteration
                last_event_time = batch_events[-1].get_time()
                # Increment by 1 second to avoid duplicate events
                start_time = (last_event_time + timedelta(seconds=time_increment)).isoformat()
                break

    else:
        print("No events found in range.")
        break

    time.sleep(poll_interval)

# Print a padding line
#print("Padding line")
for event in sliding_window:
    print(f'Time: {event.get_time()}, Value: {event.get_value()}')


In [None]:
# Functions for RePAD2
def calculate_aare(actual, predicted):
    """
    Calculate the Average Absolute Relative Error (AARE) between actual and predicted values.

    Parameters:
    - actual (list or array): The actual values.
    - predicted (list or array): The predicted values.

    Returns:
    - float: The AARE value.
    """
    if len(actual) != len(predicted):
        raise ValueError("The length of actual and predicted values must be the same.")

    # Calculate the absolute relative errors
    absolute_relative_errors = [abs((a - p) / a) for a, p in zip(actual, predicted) if a != 0]

    # Calculate the average of these errors
    aare = sum(absolute_relative_errors) / len(absolute_relative_errors)
    
    return aare

# Example usage:
actual_values = [100, 200, 300, 400]
predicted_values = [90, 210, 310, 390]
aare_result = calculate_aare(actual_values, predicted_values)
print(f"AARE: {aare_result}")

import numpy as np

def calculate_threshold(aare_values):
    """
    Calculate the threshold value (Thd) based on an array of AARE values.
    Thd is defined as the mean of the AARE values plus three times their standard deviation.

    Parameters:
    - aare_values (array-like): An array of AARE values.

    Returns:
    - float: The calculated threshold value (Thd).
    """
    # Calculate the mean and standard deviation of the AARE values
    mean_aare = np.mean(aare_values)
    std_aare = np.std(aare_values)
    
    # Calculate Thd
    thd = mean_aare + 3 * std_aare
    
    return thd

# Example usage
aare_values = [0.05, 0.07, 0.06, 0.08, 0.09]
thd = calculate_threshold(aare_values)
print(f"Threshold (Thd): {thd}")

# Function for creating model
def train_model(train_events):
    tensor_y = torch.tensor(train_events, dtype=torch.float32).view(-1, 1, 1)
    tensor_x = torch.tensor([1, 2, 3], dtype=torch.float32).view(-1, 1, 1)
    # Create an instance of the LSTM model
    model = LSTM(tensor_x, tensor_y, input_size=1, hidden_size=10, num_layers=1, output_size=1, batch_size=3, num_epochs=50, learning_rate=0.01)
    model.train_model() # Train the model

    return model



def report_anomaly(anomalous_event, write_api):
    """
    Sends an anomalous event back to InfluxDB, storing it in the "anomaly" measurement
    with both the same value and time as the original event.

    Parameters:
    - anomalous_event: The event data that was detected as an anomaly, including its value and timestamp.
    """
    point = Point("anomaly")\
        .tag("host", "host1")\
        .field("value", anomalous_event.value)\
        .time(anomalous_event.timestamp, WritePrecision.NS)
    
    write_api.write(bucket="system_state", org="ORG", record=point)
    print(f"Anomalous event sent to InfluxDB: Value={anomalous_event.value}, Time={anomalous_event.timestamp}")

In [None]:
# Testing of InfluxDB with LSTM
"""
THE PLAN

Making the program start from any predefined time, and then continue to fetch data from that time and onwards.
And so it can process older data and then catch up to the present time.

It therefore processes a batch of 3 from the earliest timestamp in the range, and then updates the start time for the next iteration 
by incrementing the start time by a set time to both avoid duplicate events and to eventually catch up to the present time.

The program will run indefinitely, and will continue to fetch data from the InfluxDB and process it in batches of 3 until the program is stopped.
I there are not enough events for a batch, the program will wait for a set time before trying again.
When a batch of three is available it will be processed and the it will again wait for another event to be available.

This way it is both flexible and efficient, and can be easily used to process either data in real-time or historical data


For each batch it follows the algorithm of RePAD2


To-Do:

Store actual values (Store whole event?) with predicted values in sliding window 

"""

# Testing of InfluxDB with LSTM
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

# Instantiate the QueryAPI
client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)
write_api = client.write_api(write_options=SYNCHRONOUS)
query_api = client.query_api()

# Sliding window for Threshold
sliding_window = deque(maxlen=8064)
sliding_window_AARE = deque(maxlen=8064)

# Time parameters
poll_interval = 1
time_increment = 1
start_time = "2014-04-10T00:00:00Z"

batch_nr = 0
is_first_batch = True

# RePAD2 specific
flag = True


while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "{start_time}"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
    '''
        
    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    if len(events) >= 4: # Need at least 4 to predict next and compare
        for i in range(len(events) - 3):
            batch_events = events[i:i+3]
            next_event = events[i+3]


    if len(events) >= 3:
        # Process batches of 3 until there are not enough events for a batch
        for i in range(len(events) - 2):
            # Select the batch of 3 events
            batch_events = events[i:i+3]

            if is_first_batch:
                for event in batch_events:
                    sliding_window.append(event) # Storing all events in batch
                is_first_batch = False

            else:
                # For subsequent batches, append only the last event
                sliding_window.append(batch_events[-1])

################# LSTM Testing ############################

            if len(sliding_window) >= 3:
                model = train_model([event.get_value() for event in list(batch_events)])
                pred = model.predict_next()
                print(f'Prediction: {pred}')

                

################# LSTM Testing ############################

            # After processing the event
            if i + 3 >= len(events):
                # Update start time for the next iteration
                last_event_time = batch_events[-1].get_time()
                # Increment by 1 second to avoid duplicate events
                start_time = (last_event_time + timedelta(seconds=time_increment)).isoformat()
                break

    else:
        print("No events found in range.")
        break

    time.sleep(poll_interval)

# Print a padding line
#print("Padding line")
#for event in sliding_window:
#    print(f'Time: {event.get_time()}, Value: {event.get_value()}')


In [None]:
### REPAD2 Algorithm ###

"""
THE PLAN

Making the program start from any predefined time, and then continue to fetch data from that time and onwards.
And so it can process older data and then catch up to the present time.

It therefore processes a batch of 3 from the earliest timestamp in the range, and then updates the start time for the next iteration 
by incrementing the start time by a set time to both avoid duplicate events and to eventually catch up to the present time.

The program will run indefinitely, and will continue to fetch data from the InfluxDB and process it in batches of 3 until the program is stopped.
I there are not enough events for a batch, the program will wait for a set time before trying again.
When a batch of three is available it will be processed and the it will again wait for another event to be available.

This way it is both flexible and efficient, and can be easily used to process either data in real-time or historical data


For each batch it follows the algorithm of RePAD2


To-Do:

Store actual values (Store whole event?) with predicted values in sliding window 

"""

# Testing of InfluxDB with LSTM
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

# Instantiate the QueryAPI
client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)
write_api = client.write_api(write_options=SYNCHRONOUS)
query_api = client.query_api()

# Sliding window for Threshold
sliding_window = deque(maxlen=8064)
sliding_window_AARE = deque(maxlen=8064)

# Time parameters
poll_interval = 1
time_increment = 1
start_time = "2014-04-10T00:00:00Z"

T = 0
is_first_batch = True

# RePAD2 specific
flag = True 
M = None # Model

while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "{start_time}"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
    '''
        
    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    if len(events) >= 4: # Need at least 4 to predict next and compare
        for i in range(len(events) - 3):
            batch_events = events[i:i+3]
            next_event = events[i+3]

            T += 1

            print(f'T: {T}')

        # Set T to the length of the sliding 
            if T >= 2 and T < 5:
                M = train_model([event.get_value() for event in list(batch_events)])
                pred_D_T_plus_1 = M.predict_next()
                print(f'Prediction: {pred_D_T_plus_1}')
                # Append the event and its prediction to the sliding window
                sliding_window.append((next_event, pred_D_T_plus_1))

            elif T >= 5 and T < 7:
                # Make lists for actual and predicted values
                for event, pred in sliding_window:
                    actual_values.append(event.get_value())
                    predicted_values.append(pred)
                # Calculate AARE and append to sliding window
                AARE_T = calculate_aare(actual_values, predicted_values)
                sliding_window_AARE.append(AARE_T)
                M = train_model([event.get_value() for event in list(batch_events)])
                pred_D_T_plus_1 = M.predict_next()
                # Append the event and its prediction to the sliding window
                sliding_window.append((next_event, pred_D_T_plus_1))

            elif T >= 7 and flag == True:
                if T != 7: # Use M to precdict D_T
                    pred_D_T = M.predict_next()
                    # Append the event and its prediction to the sliding window
                    sliding_window.append((next_event, pred_D_T))

                # Make lists for actual and predicted values
                for event, pred in sliding_window:
                    actual_values.append(event.get_value())
                    predicted_values.append(pred)

                # Calculate AARE and append to sliding window
                AARE_T = calculate_aare(actual_values, predicted_values)
                sliding_window_AARE.append(AARE_T)
                # Calculate Thd
                Thd = calculate_threshold(sliding_window_AARE)
                
                if AARE_T <= Thd: pass # Calculate AARE and append to sliding window
                else:
                    # Pop the right of the sliding window
                    sliding_window.pop()
                    # Train an LSTM model with D_T-3, D_T-2, D_T-1
                    model = train_model([event.get_value() for event in list(batch_events)])
                    # Use the model to predict D_T
                    pred_D_T = model.predict_next()
                    # Append the event and its prediction to the sliding window
                    sliding_window.append((next_event, pred_D_T))
                    for event, pred in sliding_window:
                        actual_values.append(event.get_value())
                        predicted_values.append(pred)
                    # Re-calculate AARE_T
                    AARE_T = calculate_aare(actual_values, predicted_values)
                    sliding_window_AARE.append(AARE_T)
                    # Re-calculate Thd
                    Thd = calculate_threshold(AARE_T)

                    if AARE_T <= Thd:
                        # D_T is not reported as anomaly
                        # Replace M with the new model
                        M = model
                        # Update flag to True
                        flag = True

                    else:
                        # D_T reported as anomaly immediately
                        report_anomaly(next_event, write_api)
                        # Update flag to False
                        flag = False
            
            elif T >= 7 and flag == False:
                # Train an LSTM model with D_T-3, D_T-2, D_T-1
                model = train_model([event.get_value() for event in list(batch_events)])
                # Use the model to predict D_T
                pred_D_T = model.predict_next()
                # Append the event and its prediction to the sliding window
                sliding_window.append((next_event, pred_D_T))
                for event, pred in sliding_window:
                    actual_values.append(event.get_value())
                    predicted_values.append(pred)
                # Calculate AARE_T
                AARE_T = calculate_aare(actual_values, predicted_values)
                sliding_window_AARE.append(AARE_T)
                # Calculate Thd
                Thd = calculate_threshold(AARE_T)

                if AARE_T <= Thd:
                    # D_T is not reported as anomaly
                    # Replace M with the new model
                    M = model
                    # Update flag to True
                    flag = True

                else:
                    # D_T reported as anomaly immediately
                    report_anomaly(next_event)
                    # Update flag to False
                    flag = False


            # After processing the event
            if i + 3 >= len(events):
                # Update start time for the next iteration
                last_event_time = batch_events[-1].get_time()
                # Increment by 1 second to avoid duplicate events
                start_time = (last_event_time + timedelta(seconds=time_increment)).isoformat()
                break

    else:
        print("No events found in range.")
        break

    time.sleep(poll_interval)

# Print a padding line
#print("Padding line")
#for event in sliding_window:
#    print(f'Time: {event.get_time()}, Value: {event.get_value()}')


In [None]:
"""
THE PLAN

Making the program start from any predefined time, and then continue to fetch data from that time and onwards.
And so it can process older data and then catch up to the present time.

It therefore processes a batch of 3 from the earliest timestamp in the range, and then updates the start time for the next iteration 
by incrementing the start time by a set time to both avoid duplicate events and to eventually catch up to the present time.

The program will run indefinitely, and will continue to fetch data from the InfluxDB and process it in batches of 3 until the program is stopped.
I there are not enough events for a batch, the program will wait for a set time before trying again.
When a batch of three is available it will be processed and the it will again wait for another event to be available.

This way it is both flexible and efficient, and can be easily used to process either data in real-time or historical data


For each batch it follows the algorithm of RePAD2

"""

# Testing of InfluxDB with LSTM
# Må installeres influxdb-client "pip install influxdb-client"
# https://www.influxdata.com/blog/getting-started-with-python-and-influxdb-v2-0/
influxdb_url = "http://localhost:8086"
token = "random_token"
username = "influx-admin"
password = "ThisIsNotThePasswordYouAreLookingFor"
org = "ORG"
bucket = "system_state"

# Instantiate the QueryAPI
client = InfluxDBClient(url=influxdb_url, token=token, org=org, username=username, password=password)
query_api = client.query_api()

# Sliding window for Threshold
sliding_window = deque(maxlen=8064)

# Time parameters
poll_interval = 1
time_increment = 1
start_time = "2014-04-10T00:00:00Z"

batch_nr = 0
is_first_batch = True

# RePAD2 specific
flag = True


while True:

    # Construct the Flux query
    query = f'''
    from(bucket: "{bucket}")
     |> range(start: time(v: "{start_time}"))
     |> filter(fn: (r) => r["_measurement"] == "cpu_utilization")
    '''
        
    # Query the data
    events = list(query_api.query_stream(org=org, query=query))

    if events:
        # Process batches of 3 until there are not enough events for a batch
        for i in range(len(events) - 2):
            # Select the batch of 3 events
            batch_events = events[i:i+3]

            if is_first_batch:
                for event in batch_events:
                    sliding_window.append(event) # Storing all events in batch
                is_first_batch = False

            else:
                # For subsequent batches, append only the last event
                sliding_window.append(batch_events[-1])

##############################################################################
            # All RePAD2 processing goes here (I think)
            

            T = len(sliding_window)-1 # T is the index of the last event in the sliding window

            if T >= 2 & T < 5:
                # Train LSTM model using last 3 events in sliding window 
                # Always set Y = 1, 2, 3 ?
                # Let M be the resulting model and use it to predict D_T+1
                pass

            elif T >= 5 & T < 7:
                # Calculate AARE_T
                # Train LSTM model using last 3 events in sliding window
                # Let M be the resulting model and use it to predict D_T+1
                pass

            elif T >= 7 & flag == True:
                if T != 7: # Use M to precdict D_T
                    pass
                # Calculate AARE_T
                # Calculate Thd
                
                if AARE_T <= Thd:
                    # D_T is not reported as anomaly
                    pass
            
                else:
                    # Train an LSTM model with the last 3 events in the sliding window
                    # Use the model to predict D_T
                    # Re-calculate AARE_T
                    # Re-calculate Thd
                    if AARE_T <= Thd:
                        # D_T is not reported as anomaly
                        # Replace M with the new model
                        # Update flag to True
                        pass

                    else:
                        # D_T reported as anomaly immediately
                        # Update flag to False
                        pass
            
            elif T >= 7 & flag == False:
                # Train an LSTM model with D_T-3, D_T-2, D_T-1
                # Use the model to predict D_T
                # Calculate AARE_T
                # Calculate Thd

                if AARE_T <= Thd:
                    # D_T is not reported as anomaly
                    # Replace M with the new model
                    # Update flag to True
                    pass

                else:
                    # D_T reported as anomaly immediately
                    # Update flag to False
                    pass


            # Example of processing the events
            # Print batch nr as seperator
            #batch_nr += 1
            #print("Batch nr: ", batch_nr)
            #for event in batch_events:
                # Print the timestamp and value of the event
                #print(f'Time: {event.get_time()}, Value: {event.get_value()}')

##############################################################################

            # After processing the event
            if i + 3 >= len(events):
                # Update start time for the next iteration
                last_event_time = batch_events[-1].get_time()
                # Increment by 1 second to avoid duplicate events
                start_time = (last_event_time + timedelta(seconds=time_increment)).isoformat()
                break

    else:
        print("No events found in range.")
        break

    time.sleep(poll_interval)

# Print a padding line
#print("Padding line")
for event in sliding_window:
    print(f'Time: {event.get_time()}, Value: {event.get_value()}')
