 the Jupyter Notebook code to compare the average execution time and the number of discovered patterns for TaSTSP with varying lengths of stq, starting from sequences containing a single event type up to sequences containing 10 event types. The experiments are repeated for four different theta values (0.15, 0.1, 0.05, 0.01) and the results are plotted using two line plots.

In [15]:
import os
import sys
import time
import random
import pandas as pd
import matplotlib.pyplot as plt

sys.path.append('path/to/src')

from src.TaSTSPM import tastsp_algorithm
from src.Sequence import Dataset, Sequence, Element

# Define parameters
dataset_path = '../data/collisions_Seattle_processed.csv'
R = 200.0
T = 8640
thetas = [round(x * 0.01, 2) for x in range(1, 11)]
sequence_lengths = [1, 2, 3]
num_repeats = 10

# Load dataset for TaSTSP
print("Loading dataset for TaSTSP...")
dataset_tastsp = Dataset(dataset_path)
F = list(dataset_tastsp.dataset_dict.keys())
print("Dataset loaded.")

# Function to generate random sequences
def generate_random_sequences(event_types, num_sequences):
    sequences = []
    for _ in range(num_sequences):
        random_event = random.choice(event_types)
        sequence = Sequence([Element(random_event)])
        sequences.append(sequence)
    return sequences

# Initialize results storage
execution_times = {length: [] for length in sequence_lengths}
pattern_counts = {length: [] for length in sequence_lengths}

# Perform experiments
for length in sequence_lengths:
    for theta in thetas:
        total_time = 0
        total_patterns = 0
        for _ in range(num_repeats):
            stq = generate_random_sequences(F, length)
            start_time = time.time()
            result_tastsp = tastsp_algorithm(dataset_tastsp, F, R, T, theta, stq)
            total_time += time.time() - start_time
            total_patterns += len(result_tastsp)
        average_time = total_time / num_repeats
        average_patterns = total_patterns / num_repeats
        execution_times[length].append(average_time)
        pattern_counts[length].append(average_patterns)
        print(f"Length: {length}, Theta: {theta}, Avg Time: {average_time}, Avg Patterns: {average_patterns}")


Loading dataset for TaSTSP...
Loading data from ../data/collisions_Seattle_processed.csv
Data read successfully: 15352 rows
Dataset loaded.


KeyboardInterrupt: 

In [None]:
# Plot the execution times
plt.figure(figsize=(10, 6))
for length in sequence_lengths:
    plt.plot(thetas, execution_times[length], label=f'Sequence Length={length}')
plt.xlabel('Theta')
plt.ylabel('Average Execution Time (seconds)')
plt.title('Average Execution Time for TaSTSP')
plt.legend()
plt.grid(True)
plt.show()

# Plot the number of discovered patterns
plt.figure(figsize=(10, 6))
width = 0.2  # width of the bars
x = range(len(thetas))

for i, length in enumerate(sequence_lengths):
    plt.bar([p + width * i for p in x], pattern_counts[length], width, label=f'Sequence Length={length}')

plt.xlabel('Theta')
plt.ylabel('Average Number of Discovered Patterns')
plt.title('Average Number of Discovered Patterns for TaSTSP')
plt.xticks([p + 1.5 * width for p in x], thetas)
plt.legend()
plt.grid(True)
plt.show()