In [30]:
import networkx as nx
from networkx import number_connected_components, get_node_attributes
import pandas as pd
import numpy as np
import dateutil.parser
import plotly.graph_objects as go
import datetime
import plotly.express as px

In [34]:
# Optimized function to convert to ISO format
def convert_to_iso_format(seconds, nanoseconds):
    total_nanoseconds = seconds * 1e9 + nanoseconds
    timestamp_series = pd.to_datetime(total_nanoseconds, unit='ns', utc=True)
    #timestamp_series.dt.strftime('%Y-%m-%dT%H:%M:%S.%fZ') 
    return timestamp_series # Format similar to isoformat

def plot_timestamp(datelist, title):
    #timestamplist = [datetime.fromtimestamp(x) for x in datelist]
    timestamplist = datelist
    length = len(datelist)

    # Create the figure
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=timestamplist, y=[0] * length, mode="markers", marker_size=20)
    )

    fig.update_xaxes(showgrid=False)

    fig.update_yaxes(
        showgrid=False,
        zeroline=True,
        zerolinecolor="black",
        zerolinewidth=3,
        showticklabels=False,
    )

    fig.update_layout(height=200, plot_bgcolor="white", title=f"{title}")
    # fig.savefig(f"{title}fig_.png")
    fig.show()
    
def plot_nb_event(data_edges,time_to_study, start_time, end_time):
    
    edges_to_see = data_edges[(data_edges[time_to_study] > startime_attack) & (data_edges[time_to_study] < endtime_attack)]
    datelist = edges_to_see.sort_values(f'{time_to_study}')
    # Cumulative count
    datelist['cumulative_count'] = range(1, len(datelist) + 1)

    fig = px.line(datelist, x=f'{time_to_study}', y='cumulative_count', title='Cumulative Number of Events Over Time')
    fig.update_xaxes(rangeslider_visible=True)
    fig.show()

In [9]:
node_file_path = 'data/bare_effects_filtered.jsonl'
edges_file_path = 'data/bare_edge_effects_filtered.jsonl'

#data_nodes = pd.read_json(f'{node_file_path}', lines=True)
data_edges = pd.read_json(f'{edges_file_path}', lines=True)
data_edges= pd.concat([data_edges.drop('uuid', axis=1), 
                        pd.json_normalize(data_edges['timestamp'])], axis=1).drop('type', axis=1)

In [10]:
# List of base columns
base_columns = ['timestamp', 'firstSeen', 'lastSeen']

# Loop over base columns instead of individual 'seconds' and 'nanos' columns
for base_col in base_columns:
    seconds_col = f'{base_col}.seconds'
    nanos_col = f'{base_col}.nanos'
    
    if seconds_col in data_edges and nanos_col in data_edges:
        new_col = f'{base_col}_iso'
        # Vectorized operation
        data_edges[new_col] = convert_to_iso_format(data_edges[seconds_col], data_edges[nanos_col])


In [49]:
time_to_study = 'firstSeen_iso'
datelist = data_edges[f'{time_to_study}'].to_list()[:10]
# Convert Unix epoch time to datetime objects
plot_timestamp(datelist, f'{time_to_study}')

In [50]:

    
# fig = px.line(datelist[:10], x=f'{time_to_study}', y='cumulative_count', title='Cumulative Number of Events Over Time')
# fig.update_xaxes(rangeslider_visible=True)
# fig.show()

startime_attack = datetime.datetime(2021,3 ,28 ,22,34,31, tzinfo=datetime.timezone.utc)
# endtime_attack = datetime.datetime(2021,3,29, 0,0,0, tzinfo=datetime.timezone.utc)
endtime_attack = datetime.datetime(2021,3,28, 22,38,0, tzinfo=datetime.timezone.utc)
plot_nb_event(data_edges,time_to_study, startime_attack, endtime_attack)

In [51]:
def plot_event_rate(data_edges,time_to_study, start_time, end_time):

    edges_to_see = data_edges[(data_edges[time_to_study] > start_time) & (data_edges[time_to_study] < end_time)]
    datelist = edges_to_see.sort_values(f'{time_to_study}')
    datelist['cumulative_count'] = range(1, len(datelist) + 1)
    # Calculate the time differences (in seconds or appropriate unit) between consecutive events
    datelist['time_diff'] = datelist[time_to_study].diff().dt.total_seconds()

    # Calculate the difference in cumulative count between consecutive events
    datelist['count_diff'] = datelist['cumulative_count'].diff()

    # Compute the rate of events (count difference per unit time)
    datelist['event_rate'] = datelist['count_diff'] / datelist['time_diff']

    # Plotting the derivative (event rate) with Plotly
    fig = px.line(datelist, x=time_to_study, y='event_rate', title='Rate of Events Over Time')
    fig.update_xaxes(rangeslider_visible=True)
    fig.show()
    
plot_event_rate(data_edges,time_to_study, startime_attack, endtime_attack)

In [58]:
def plot_event_rate(data_edges, time_to_study, start_time, end_time, window_size):
    edges_to_see = data_edges[(data_edges[time_to_study] > start_time) & (data_edges[time_to_study] < end_time)]
    
    if edges_to_see.empty:
        print("No data available in the specified time range.")
        return

    datelist = edges_to_see.sort_values(time_to_study)
    datelist['cumulative_count'] = range(1, len(datelist) + 1)
    
    # Calculate the time differences
    datelist['time_diff'] = datelist[time_to_study].diff().dt.total_seconds()

    # Handle the first NaN values
    datelist['time_diff'].fillna(0, inplace=True)

    # Rolling window of size 4 for cumulative count and time differences
    datelist['rolling_count_diff'] = datelist['cumulative_count'].diff().rolling(window=window_size).sum()
    datelist['rolling_time_diff'] = datelist['time_diff'].rolling(window=window_size).sum()

    # Compute the event rate over the window
    datelist['event_rate'] = datelist['rolling_count_diff'] / datelist['rolling_time_diff']

    # Plotting
    title = f'Rate of Events (Over 4 Events Window) from {start_time} to {end_time}'
    fig = px.line(datelist, x=time_to_study, y='event_rate', title=title)
    fig.update_xaxes(rangeslider_visible=True)
    fig.show()

# Example usage
plot_event_rate(data_edges,time_to_study, startime_attack, endtime_attack, window_size=15)

In [61]:
startime_benign = datetime.datetime(2020,3 ,6 ,0,0,0, tzinfo=datetime.timezone.utc)
endtime_benign = datetime.datetime(2021,3,27, 22,38,0, tzinfo=datetime.timezone.utc)
plot_event_rate(data_edges,time_to_study, startime_benign, endtime_benign, window_size=10)