In [8]:
# Import necessary modules and algorithms
import os
import sys
import time
import pandas as pd
import matplotlib.pyplot as plt
import itertools
import random

In [10]:


sys.path.append('path/to/src')

from src.other_algorithms.run_algorithm import run_algorithm, print_sequences
from src.TaSTSPM import tastsp_algorithm
from src.Sequence import Dataset, Sequence, Element

# Define parameters
dataset_path = '../data/crimes_Boston_reduced.csv'
R = 500
T = 43200
theta = 0.01

# Load dataset for TaSTSP
print("Loading dataset for TaSTSP...")
dataset_tastsp = Dataset(dataset_path)
F = list(dataset_tastsp.dataset_dict.keys())
print("Dataset loaded.")

# Run CST_SPMiner
print("Running CST_SPMiner...")
start_time = time.time()
result_CST_SPMiner = run_algorithm(dataset_path, R, T, theta, 'CST_SPMiner', 2)
cst_spminer_time = time.time() - start_time
cst_spminer_patterns = sum(len(level) for level in result_CST_SPMiner)
print("CST_SPMiner completed.")

# Run STBFM
print("Running STBFM...")
start_time = time.time()
result_STBFM = run_algorithm(dataset_path, R, T, theta, 'STBFM', 2)
stbfm_time = time.time() - start_time
stbfm_patterns = sum(len(level) for level in result_STBFM)
print("STBFM completed.")

# Run TaSTSP for each event type (singular sequences)
tastsp_times_single = []
tastsp_patterns_single = []

print("Running TaSTSP for singular event types...")
for event_type in F:
    print(event_type)
    stq = [Sequence([Element(event_type)])]
    start_time = time.time()
    result_tastsp = tastsp_algorithm(dataset_tastsp, F, R, T, theta, stq)
    tastsp_times_single.append(time.time() - start_time)
    tastsp_patterns_single.append(len(result_tastsp))

# Function to generate random sequences
def generate_random_sequences(event_types, num_sequences):
    sequences = []
    for _ in range(num_sequences):
        random_event = random.choice(event_types)
        sequence = Sequence([Element(random_event)])
        sequences.append(sequence)
    return sequences

# Run TaSTSP for randomly selected sequences
num_sequences = 10
tastsp_times_two = []
tastsp_patterns_two = []
tastsp_times_three = []
tastsp_patterns_three = []

print("Running TaSTSP for two randomly selected event types...")
random_sequences_two = [generate_random_sequences(F, 2) for _ in range(num_sequences)]
for sequence in random_sequences_two:
    start_time = time.time()
    result_tastsp = tastsp_algorithm(dataset_tastsp, F, R, T, theta, sequence)
    tastsp_times_two.append(time.time() - start_time)
    tastsp_patterns_two.append(len(result_tastsp))

print("Running TaSTSP for three randomly selected event types...")
random_sequences_three = [generate_random_sequences(F, 3) for _ in range(num_sequences)]
for sequence in random_sequences_three:
    start_time = time.time()
    result_tastsp = tastsp_algorithm(dataset_tastsp, F, R, T, theta, sequence)
    tastsp_times_three.append(time.time() - start_time)
    tastsp_patterns_three.append(len(result_tastsp))

# Calculate averages for TaSTSP
average_tastsp_time_two = sum(tastsp_times_two) / len(tastsp_times_two)
average_tastsp_patterns_two = sum(tastsp_patterns_two) / len(tastsp_patterns_two)
average_tastsp_time_three = sum(tastsp_times_three) / len(tastsp_times_three)
average_tastsp_patterns_three = sum(tastsp_patterns_three) / len(tastsp_patterns_three)
print("TaSTSP completed.")

Loading dataset for TaSTSP...
Loading data from ../data/crimes_Boston_reduced.csv
Data read successfully: 896 rows
Dataset loaded.
Running CST_SPMiner...
Starting CST_SPMiner Algorithm
Created 1-length sequences
Created 2-length sequences
Created 3-length sequences
Created 4-length sequences
Created 5-length sequences
Created 6-length sequences
Created 7-length sequences
Created 8-length sequences
Created 9-length sequences
Created 10-length sequences
Created 11-length sequences
Created 12-length sequences
Created 13-length sequences
Created 14-length sequences
Created 15-length sequences
Created 16-length sequences
Created 17-length sequences
Created 18-length sequences
Created 19-length sequences
Created 20-length sequences
Created 21-length sequences
Created 22-length sequences
CST_SPMiner Algorithm Completed
CST_SPMiner completed.
Running STBFM...
Starting STBFM Algorithm
Created 1-length sequences
Created 2-length sequences
Created 3-length sequences
Created 4-length sequences
Cre

KeyboardInterrupt: 

In [None]:
# Plot the execution times for all algorithms
algorithms = ['CST_SPMiner', 'STBFM', 'TaSTSP (Single Event)', 'TaSTSP (Two Events)', 'TaSTSP (Three Events)']
execution_times = [cst_spminer_time, stbfm_time, sum(tastsp_times_single) / len(tastsp_times_single), average_tastsp_time_two, average_tastsp_time_three]

plt.figure(figsize=(10, 6))
plt.bar(algorithms, execution_times, color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Algorithms')
plt.ylabel('Execution Time (seconds)')
plt.title('Execution Times of Algorithms')
plt.show()

# Plot the number of discovered patterns for all algorithms
patterns_found = [cst_spminer_patterns, stbfm_patterns, sum(tastsp_patterns_single) / len(tastsp_patterns_single), average_tastsp_patterns_two, average_tastsp_patterns_three]

plt.figure(figsize=(10, 6))
plt.bar(algorithms, patterns_found, color=['blue', 'green', 'orange', 'red', 'purple'])
plt.xlabel('Algorithms')
plt.ylabel('Number of Discovered Patterns')
plt.title('Number of Discovered Patterns by Algorithms')
plt.show()

# Plot specific times for sequences consisting of 1 event type
plt.figure(figsize=(15, 6))
plt.bar(F, tastsp_times_single, color='orange')
plt.xlabel('Event Types')
plt.ylabel('Execution Time (seconds)')
plt.title('Execution Times of TaSTSP for Each Single Event Type')
plt.xticks(rotation=90)
plt.show()

# Plot number of patterns for sequences consisting of 1 event type
plt.figure(figsize=(15, 6))
plt.bar(F, tastsp_patterns_single, color='orange')
plt.xlabel('Event Types')
plt.ylabel('Number of Discovered Patterns')
plt.title('Number of Discovered Patterns by TaSTSP for Each Single Event Type')
plt.xticks(rotation=90)
plt.show()

# Plot specific times for sequences consisting of 2 randomly selected event types
combination_labels_two = [f"{seq.elements[0].event_type} & {seq.elements[1].event_type}" for seq in random_sequences_two]

plt.figure(figsize=(15, 6))
plt.bar(combination_labels_two, tastsp_times_two, color='red')
plt.xlabel('Event Type Combinations')
plt.ylabel('Execution Time (seconds)')
plt.title('Execution Times of TaSTSP for Each Combination of Two Event Types')
plt.xticks(rotation=90)
plt.show()

# Plot number of patterns for sequences consisting of 2 randomly selected event types
plt.figure(figsize=(15, 6))
plt.bar(combination_labels_two, tastsp_patterns_two, color='red')
plt.xlabel('Event Type Combinations')
plt.ylabel('Number of Discovered Patterns')
plt.title('Number of Discovered Patterns by TaSTSP for Each Combination of Two Event Types')
plt.xticks(rotation=90)
plt.show()

# Plot specific times for sequences consisting of 3 randomly selected event types
combination_labels_three = [f"{seq.elements[0].event_type}, {seq.elements[1].event_type}, {seq.elements[2].event_type}" for seq in random_sequences_three]

plt.figure(figsize=(15, 6))
plt.bar(combination_labels_three, tastsp_times_three, color='purple')
plt.xlabel('Event Type Combinations')
plt.ylabel('Execution Time (seconds)')
plt.title('Execution Times of TaSTSP for Each Combination of Three Event Types')
plt.xticks(rotation=90)
plt.show()

# Plot number of patterns for sequences consisting of 3 randomly selected event types
plt.figure(figsize=(15, 6))
plt.bar(combination_labels_three, tastsp_patterns_three, color='purple')
plt.xlabel('Event Type Combinations')
plt.ylabel('Number of Discovered Patterns')
plt.title('Number of Discovered Patterns by TaSTSP for Each Combination of Three Event Types')
plt.xticks(rotation=90)
plt.show()