In [None]:
!pip install fastdtw euclidean np

Collecting np
  Downloading np-1.0.2.tar.gz (7.4 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: np
  Building wheel for np (setup.py) ... [?25l[?25hdone
  Created wheel for np: filename=np-1.0.2-py3-none-any.whl size=13658 sha256=16323721db6e22caf70c66f1aac05839a9e32a2eb313f8c1527ca25614125395
  Stored in directory: /root/.cache/pip/wheels/19/20/42/6ee214e617f78123903f603524d662ac6fa14154c3027fd992
Successfully built np
Installing collected packages: np
Successfully installed np-1.0.2


In [None]:
import numpy as np
from fastdtw import fastdtw

# Use a lambda function for scalar absolute difference
distance_func = lambda x, y: abs(x - y)

np.random.seed(42)
num_pocs = 5      # Number of Points of Connection (POCs)
time_points = 100 # Number of time steps

# Synthetic data: each column represents a POC's time series
data = np.random.rand(time_points, num_pocs)

# Initialize a distance matrix to store DTW distances
distance_matrix = np.zeros((num_pocs, num_pocs))

# Compute DTW distances for each pair of POCs
for i in range(num_pocs):
    for j in range(i, num_pocs):
        distance, _ = fastdtw(data[:, i], data[:, j], dist=distance_func)
        distance_matrix[i, j] = distance
        distance_matrix[j, i] = distance  # DTW distance is symmetric

print("DTW Distance Matrix:")
print(distance_matrix)


DTW Distance Matrix:
[[ 0.         23.61640928 21.36818676 18.53723285 25.08882841]
 [23.61640928  0.         21.54035057 24.29120754 21.2258704 ]
 [21.36818676 21.54035057  0.         24.02886081 20.68632799]
 [18.53723285 24.29120754 24.02886081  0.         20.09840659]
 [25.08882841 21.2258704  20.68632799 20.09840659  0.        ]]


In [None]:
import os
import pandas as pd
import numpy as np
from fastdtw import fastdtw

# Path to the featured dataset CSV file
FEATURED_DATA_PATH = "/content/drive/MyDrive/WattGrid/data/processed/featured_data.csv"

# Check if file exists and load it
if os.path.exists(FEATURED_DATA_PATH):
    print(" Featured data exists!")
    df = pd.read_csv(FEATURED_DATA_PATH)
    print("Sample data:")
    print(df.head())
else:
    raise FileNotFoundError("Error: Featured data not found!")

# Convert TradingDate to datetime for proper sorting
df['TradingDate'] = pd.to_datetime(df['TradingDate'])

# Create a time index by combining TradingDate and TradingPeriod
# This will serve as the index for our pivot table.
df['TimeIndex'] = df['TradingDate'].dt.strftime('%Y-%m-%d') + '_' + df['TradingPeriod'].astype(str)

# Pivot the DataFrame so that each column is a unique PointOfConnection and the values are DollarsPerMegawattHour.
pivot_df = df.pivot(index='TimeIndex', columns='PointOfConnection', values='DollarsPerMegawattHour')

# Sort the pivoted DataFrame by the time index
pivot_df.sort_index(inplace=True)

print("\nPivoted DataFrame (first few rows):")
print(pivot_df.head())

# Handle any missing values by forward filling then back filling (if necessary)
pivot_df = pivot_df.fillna(method='ffill').fillna(method='bfill')

# Extract the time series data for each POC as columns.
poc_names = list(pivot_df.columns)
num_pocs = pivot_df.shape[1]
distance_matrix = np.zeros((num_pocs, num_pocs))

# Define a simple custom distance function: absolute difference between scalars.
distance_func = lambda x, y: abs(x - y)

# Compute DTW distances for each pair of POCs
for i in range(num_pocs):
    ts_i = pivot_df.iloc[:, i].values
    for j in range(i, num_pocs):
        ts_j = pivot_df.iloc[:, j].values
        distance, _ = fastdtw(ts_i, ts_j, dist=distance_func)
        distance_matrix[i, j] = distance
        distance_matrix[j, i] = distance  # DTW is symmetric

# Create a DataFrame for the distance matrix for better readability
distance_df = pd.DataFrame(distance_matrix, index=poc_names, columns=poc_names)

print("\nDTW Distance Matrix:")
print(distance_df)


✅ Featured data exists!
Sample data:
  TradingDate  TradingPeriod                PublishDateTime PointOfConnection  \
0  2022-12-03              1  2022-12-02T23:33:02.000+13:00           ALB0331   
1  2022-12-03              1  2022-12-02T23:33:02.000+13:00           ALB1101   
2  2022-12-03              1  2022-12-02T23:33:02.000+13:00           ARA2201   
3  2022-12-03              1  2022-12-02T23:33:02.000+13:00           ARI1101   
4  2022-12-03              1  2022-12-02T23:33:02.000+13:00           ARI1102   

  Island IsProxyPriceFlag  DollarsPerMegawattHour  \
0     NI                N                    0.54   
1     NI                N                    0.54   
2     NI                N                    0.49   
3     NI                N                    0.47   
4     NI                N                    0.52   

                                       source_file  RollingMean_7  \
0  data/raw/2022/20221203_DispatchEnergyPrices.csv       0.512857   
1  data/raw/2022/20

  pivot_df = pivot_df.fillna(method='ffill').fillna(method='bfill')



DTW Distance Matrix:
            ABY0111     ALB0331     ALB1101    APS0111    ARA2201     ARG1101  \
ABY0111        0.00  1024198.70  1021853.60  432165.85  899262.25   786016.80   
ALB0331  1024198.70        0.00     8074.25  893849.57  657819.24   895950.62   
ALB1101  1021853.60     8074.25        0.00  893336.40  640778.88   896377.81   
APS0111   432165.85   893849.57   893336.40       0.00  977660.66   323645.30   
ARA2201   899262.25   657819.24   640778.88  977660.66       0.00  1012517.99   
...             ...         ...         ...        ...        ...         ...   
WTU0331   831516.08   469644.50   460344.96  948444.10  134055.95   972879.44   
WVY0111   687234.26   700048.77   691436.31  894414.29  353804.96   981392.91   
WVY1101   677879.25   734796.36   708774.83  898606.64  352598.96   985665.24   
WWD1102   477388.73   607240.90   602871.35  591480.24  443669.44   910871.21   
WWD1103   477638.87   607001.28   602683.32  591444.81  443484.87   909673.33   

     

In [None]:
# Define the output file path for the DTW adjacency matrix
OUTPUT_PATH = "/content/drive/MyDrive/WattGrid/data/processed/dtw_adjacency_matrix.csv"

# Save the DataFrame to a CSV file
distance_df.to_csv(OUTPUT_PATH)

print("DTW Adjacency Matrix saved to:", OUTPUT_PATH)


DTW Adjacency Matrix saved to: /content/drive/MyDrive/WattGrid/data/processed/dtw_adjacency_matrix.csv


In [None]:
import numpy as np
import pandas as pd

# Assume 'distance_matrix' is the DTW grid you computed and stored in a DataFrame 'distance_df'

# Calculate sigma as the standard deviation of the distances
sigma = np.std(distance_matrix)

# Apply the Gaussian (RBF) kernel transformation to convert distances to similarities
adjacency_matrix = np.exp(- (distance_matrix**2) / (2 * sigma**2))

# Convert the result into a DataFrame for easier interpretation
adjacency_df = pd.DataFrame(adjacency_matrix, index=distance_df.index, columns=distance_df.columns)

print("Adjacency Matrix (Gaussian Kernel Transformation):")
print(adjacency_df.head())


Adjacency Matrix (Gaussian Kernel Transformation):
          ABY0111   ALB0331   ALB1101   APS0111   ARA2201   ARG1101   ARI1101  \
ABY0111  1.000000  0.285198  0.286840  0.799818  0.380160  0.477635  0.453504   
ALB0331  0.285198  1.000000  0.999922  0.384599  0.595988  0.382873  0.545713   
ALB1101  0.286840  0.999922  1.000000  0.385021  0.611972  0.382523  0.575213   
APS0111  0.799818  0.384599  0.385021  1.000000  0.318814  0.882254  0.347501   
ARA2201  0.380160  0.595988  0.611972  0.318814  1.000000  0.293430  0.938637   

          ARI1102   ASB0661   ASY0111  ...   WRK0331   WRK2201   WTK0111  \
ABY0111  0.426958  0.938929  0.473272  ...  0.435478  0.436377  0.955956   
ALB0331  0.904267  0.433111  0.374822  ...  0.677116  0.688401  0.335769   
ALB1101  0.906610  0.434562  0.376887  ...  0.684517  0.702725  0.336040   
APS0111  0.479712  0.942057  0.868611  ...  0.378169  0.379783  0.719422   
ARA2201  0.864559  0.329263  0.276082  ...  0.992963  0.993601  0.421575   

     

In [None]:
import numpy as np
import pandas as pd

# Assume 'distance_matrix' is the DTW grid you computed and stored in a DataFrame 'distance_df'
# For example:
# distance_matrix = distance_df.values

# Calculate sigma as the standard deviation of the distances
sigma = np.std(distance_matrix)

# Apply the Gaussian (RBF) kernel transformation to convert distances to similarities
adjacency_matrix = np.exp(- (distance_matrix**2) / (2 * sigma**2))

# Convert the result into a DataFrame for easier interpretation
adjacency_df = pd.DataFrame(adjacency_matrix, index=distance_df.index, columns=distance_df.columns)

print("Adjacency Matrix (Gaussian Kernel Transformation):")
print(adjacency_df.head())

# Define the output CSV file path
output_path = "/content/drive/MyDrive/WattGrid/data/processed/dtw_adjacency2_matrix.csv"

# Save the adjacency matrix DataFrame to a CSV file
adjacency_df.to_csv(output_path)

print("Adjacency matrix saved to:", output_path)


Adjacency Matrix (Gaussian Kernel Transformation):
          ABY0111   ALB0331   ALB1101   APS0111   ARA2201   ARG1101   ARI1101  \
ABY0111  1.000000  0.285198  0.286840  0.799818  0.380160  0.477635  0.453504   
ALB0331  0.285198  1.000000  0.999922  0.384599  0.595988  0.382873  0.545713   
ALB1101  0.286840  0.999922  1.000000  0.385021  0.611972  0.382523  0.575213   
APS0111  0.799818  0.384599  0.385021  1.000000  0.318814  0.882254  0.347501   
ARA2201  0.380160  0.595988  0.611972  0.318814  1.000000  0.293430  0.938637   

          ARI1102   ASB0661   ASY0111  ...   WRK0331   WRK2201   WTK0111  \
ABY0111  0.426958  0.938929  0.473272  ...  0.435478  0.436377  0.955956   
ALB0331  0.904267  0.433111  0.374822  ...  0.677116  0.688401  0.335769   
ALB1101  0.906610  0.434562  0.376887  ...  0.684517  0.702725  0.336040   
APS0111  0.479712  0.942057  0.868611  ...  0.378169  0.379783  0.719422   
ARA2201  0.864559  0.329263  0.276082  ...  0.992963  0.993601  0.421575   

     