In [114]:
dir = 'outputs/memory'
N_values = ['10k', '100k', '200k', '400k', '600k', '800k', '1m']

# Generate data

In [115]:
import os

for N in N_values:
    os.system(f"./analysis_memory_usage.sh {dir} {N}")

NOTICE:  index "sift_base10k_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX


NOTICE:  index "sift_base100k_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX


NOTICE:  index "sift_base200k_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX


NOTICE:  index "sift_base400k_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX
DROP INDEX


NOTICE:  index "sift_base600k_index" does not exist, skipping


CREATE INDEX
DROP INDEX


NOTICE:  index "sift_base800k_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX


NOTICE:  index "sift_base1m_index" does not exist, skipping


DROP INDEX
CREATE INDEX
DROP INDEX


# Parse data

In [116]:
import re
from datetime import datetime

data = []
for N in N_values:

    # Define the log file path
    TOP_LOG = f"{dir}/{N}_top.log"
    
    # Regular expression patterns for extracting data
    event_timestamp_pattern = r'^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2} \| (.+)'
    
    top_timestamp_pattern = r'top - \d{2}:\d{2}:\d{2}'
    
    # Initialize variables
    memory_data = []
    event_timestamps = []
    
    # Read the log file
    with open(TOP_LOG, 'r') as file:
        lines = file.readlines()
    
        # Get event timestamps
        for line in lines:
            timestamp_match = re.search(event_timestamp_pattern, line)
            if timestamp_match:
                parts = line.split(' | ')
                event_time = parts[0].split(' ')[1]
                event_name = parts[1].strip()
                event_timestamps.append((event_time, event_name))
    
        current_timestamp = None
        current_memory_data = {}
        
        for line in lines:
            # Extract timestamp
            timestamp_match = re.search(top_timestamp_pattern, line)
            if timestamp_match:
                if 'top - ' in line:
                    if current_timestamp is not None:
                        memory_data.append((current_timestamp, current_data))
                    current_timestamp = timestamp_match.group(0).split(' - ')[1]
                    current_data = {}
    
            elif 'MiB Mem' in line:
                pattern = r'MiB Mem :\s+\d+\.\d+ total,\s+\d+\.\d+ free,\s+(\d+\.\d+) used'
                match = re.search(pattern, line)
                if match:
                    used_memory = match.group(1)
                    current_data['memory'] = used_memory
    
        memory_data.append((current_timestamp, current_data))

    data.append((N, event_timestamps, memory_data))

# Plot data

In [117]:
import plotly.graph_objects as go

for N, event_timestamps, memory_data in data:

    # Memory stuff

    x_values = [datetime.strptime(timestamp, '%H:%M:%S') for timestamp, _ in memory_data]
    y_values = [float(data_point[1]['memory']) for data_point in memory_data]
    fig = go.Figure(data=go.Scatter(x=x_values, y=y_values, mode='lines+markers'))

    # Event stuff
    
    event_times = {}
    for timestamp, event_name in event_timestamps:
        if timestamp in event_times:
            event_times[timestamp] += '\n' + event_name
        else:
            event_times[timestamp] = event_name
    
    event_times = [(datetime.strptime(timestamp, '%H:%M:%S'), event_name) for timestamp, event_name in event_times.items()]
    
    max_event_time = max(y_values)
    min_event_time = min(y_values)
    low_annotation_point = min_event_time + 0.4 * (max_event_time - min_event_time)
    high_annotation_point = min_event_time + 0.6 * (max_event_time - min_event_time)
    
    for index, (timestamp, event_name) in enumerate(event_times):
        fig.add_shape(type='line',
                      x0=timestamp, y0=min(y_values), x1=timestamp, y1=max(y_values),
                      line=dict(color='red', width=1, dash='dot'))
    
        annotation_y = high_annotation_point
        if index % 2:
            annotation_y = low_annotation_point
        fig.add_annotation(x=timestamp, y=annotation_y, text=event_name, showarrow=False)
    
    fig.update_layout(title=f"Memory Over Time for Index Creation on {N} rows",
                      xaxis_title='Time',
                      yaxis_title='Memory (MiB)')
    
    fig.show()