In [2]:
import numpy as np
import pandas as pd
import time
import sys

# DTW implementation using Euclidean distance
def compute_dtw(seq_a, seq_b):
    n, m = len(seq_a), len(seq_b)
    dtw_matrix = np.full((n + 1, m + 1), np.inf)
    dtw_matrix[0, 0] = 0
    
    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = (seq_a[i - 1] - seq_b[j - 1]) ** 2  # Euclidean distance
            dtw_matrix[i, j] = cost + min(
                dtw_matrix[i - 1, j],    # Insertion
                dtw_matrix[i, j - 1],    # Deletion
                dtw_matrix[i - 1, j - 1] # Match
            )
    
    return np.sqrt(dtw_matrix[n, m])

# Load the dataset
input_file = "dtw_test.csv"
print(f"Loading dataset from {input_file}...")
data = pd.read_csv(input_file)

# Ensure column names are as expected
if 'series_a' not in data.columns or 'series_b' not in data.columns:
    raise KeyError("The required columns 'series_a' and 'series_b' are not found in the dataset.")
print("Dataset loaded successfully.")

results = []

start_time = time.time()

# Compute DTW distances
print("Starting DTW computations...")
total_rows = len(data)
for index, row in data.iterrows():
    # Convert string to array
    seq_a = np.array(eval(row['series_a']))
    seq_b = np.array(eval(row['series_b']))  
    dtw_distance = compute_dtw(seq_a, seq_b)
    results.append({'id': row['id'], 'DTW distance': dtw_distance})

    # Print progress as a percentage
    percentage = ((index + 1) / total_rows) * 100
    sys.stdout.write(f"\rProgress: {percentage:.2f}%")
    sys.stdout.flush()

print()

# Save results to  dtw.csv file
output_file = "dtw.csv"
results_df = pd.DataFrame(results)
results_df.to_csv(output_file, index=False)

end_time = time.time()

print(f"DTW computation completed.")
print(f"Results saved to {output_file}.")
print(f"Total time taken: {end_time - start_time:.2f} seconds")


Loading dataset from dtw_test.csv...
Dataset loaded successfully.
Starting DTW computations...
Progress: 100.00%
DTW computation completed.
Results saved to dtw_results.csv.
Total time taken: 1426.60 seconds
