In [None]:
!sudo apt install tree

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  tree
0 upgraded, 1 newly installed, 0 to remove and 41 not upgraded.
Need to get 47.9 kB of archives.
After this operation, 116 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tree amd64 2.0.2-1 [47.9 kB]
Fetched 47.9 kB in 0s (175 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 1.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package tree.
(Reading database ... 121689 files and directories currently install

In [None]:
!tree

/bin/bash: line 1: tree: command not found


**Hierarchical Symbolic-Quantized Patching (HSQP) Implementation**
This script implements the HSQP approach for time-series tokenization,
which integrates patching, ABBA symbolic aggregation, and quantization.

The pipeline follows these steps:
1. Raw Time Series Input
2. Initial Patching (Coarse-Graining)
3. ABBA Symbolic Aggregation (Pattern Extraction)
4. Quantization of ABBA-Derived Features
5. LLM Tokenization and Embedding
6. (Optional) Inverse Transformation for forecasting/regression
"""

In [None]:
!mkdir HSQP
%cd HSQP
!mkdir data config library models utils
!touch data/__init__.py
!touch library/__init__.py
!touch config/__init__.py
!touch models/__init__.py
!touch utils/__init__.py

/content/HSQP


In [None]:
!gdown https://drive.google.com/drive/folders/1jJF4eyd6jOhtDdBxsBqZxADycebXLKi8?usp=sharing -O ./data --folder

Retrieving folder contents
Processing file 1eBBuh0tErhOi8pn8MbZSdZsLheWQlJmF electricity.csv
Processing file 10YIsPPBXP67AIYrRV3L5R1zP7qyuSAcu ETTh1.csv
Processing file 1bc2152E8Kz7zT1RUJdEnvfYnaeNSu3so ETTh2.csv
Processing file 1pM6fCIjuOYs572Rxh17hS-TyyGp1vGjZ ETTm1.csv
Processing file 1CRfjDX4Cgb15Y372j--cnjZQ-ONYMRXE ETTm2.csv
Processing file 1O22tzJo80P4CdhF3s3OWvjehV1wARRtC national_illness.csv
Processing file 1X2ICEGDQRUR0GbiOcCvE80lrDGYRtj3F timeseries.csv
Processing file 1VK13chaIDj8bmSajDfmWGECeZBwx3NEt traffic.csv
Processing file 1dcK4jg9qpeEYH7383s1YH6RXKK3cFzBX weather.csv
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1eBBuh0tErhOi8pn8MbZSdZsLheWQlJmF
To: /content/HSQP/data/electricity.csv
100% 36.4M/36.4M [00:01<00:00, 22.4MB/s]
Downloading...
From: https://drive.google.com/uc?id=10YIsPPBXP67AIYrRV3L5R1zP7qyuSAcu
To: /content/HSQP/data/ETTh1.csv
100% 2.59M/2.59M 

In [None]:
#@title requirements.txt
%%writefile requirements.txt
scikit-learn
dtw-python
fastdtw
torch
scikit-learn
pandas
matplotlib
numpy

Writing requirements.txt


In [None]:
!pip install -r requirements.txt

Collecting dtw-python (from -r requirements.txt (line 2))
  Downloading dtw_python-1.7.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (7.5 kB)
Collecting fastdtw (from -r requirements.txt (line 3))
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading dtw_python-1.7.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (825 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m825.0/825.0 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: fastdtw
  Building wheel for fastdtw (setup.py) ... [?25l[?25hdone
  Created wheel for fastdtw: filename=fastdtw-0.3.4-cp312-cp312-linux_x86_64.whl size=567859 sha256=ab402eaa04c14b27d7b9dbf79ecbbfaf052e5887ff22793b9f763abf15dc7b47
  Stored in d

In [None]:
#@title hsqp_utils.py

%%writefile utils/hsqp_utils.py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
from typing import List, Tuple, Dict, Union, Optional
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
# --- Code from hsqp_utils.py ---

class HSQPUtils:
  @staticmethod
  def load_time_series_data(file_path: str) -> np.ndarray:
      """
      Loads time series data from a CSV file.

      Args:
          file_path: Path to the CSV file.

      Returns:
          NumPy array of the time series data.
      """
      if not os.path.exists(file_path):
          raise FileNotFoundError(f"The file {file_path} does not exist.")
      df = pd.read_csv(file_path)

      # Explicitly drop the 'date' column if it exists to ensure only numerical columns remain
      if 'date' in df.columns:
          df = df.drop(columns=['date'])

      # Assuming the time series is in the first numerical column after dropping 'date'
      numeric_cols = df.select_dtypes(include=[np.number]).columns
      if len(numeric_cols) == 0:
          raise ValueError("CSV file does not contain any numerical columns for time series data.")

      # Select the first numerical column and explicitly convert to numeric, coercing errors
      ts_series = pd.to_numeric(df[numeric_cols[0]], errors='coerce')

      # Drop any rows that resulted in NaN after coercion
      ts_series.dropna(inplace=True)

      # Return the values as a NumPy array
      return ts_series.values

  @staticmethod
  def calculate_compression_ratio(original_data_size: int, compressed_data_size: int) -> float:
      """
      Calculates the compression ratio.

      Args:
          original_data_size: Size of the original data in bytes.
          compressed_data_size: Size of the compressed data in bytes.

      Returns:
          Compression ratio.
      """
      if compressed_data_size == 0:
          return float('inf') # Handle cases where compressed data is empty
      return original_data_size / compressed_data_size

  @staticmethod
  def calculate_rmse(original_ts: np.ndarray, reconstructed_ts: np.ndarray) -> float:
      """
      Calculates the Root Mean Squared Error (RMSE).

      Args:
          original_ts: Original time series.
          reconstructed_ts: Reconstructed time series.

      Returns:
          RMSE value.
      """
      # Ensure both arrays have the same length, truncate the longer one if necessary
      min_len = min(len(original_ts), len(reconstructed_ts))
      return np.sqrt(mean_squared_error(original_ts[:min_len], reconstructed_ts[:min_len]))

  @staticmethod
  def calculate_mae(original_ts: np.ndarray, reconstructed_ts: np.ndarray) -> float:
      """
      Calculates the Mean Absolute Error (MAE).

      Args:
          original_ts: Original time series.
          reconstructed_ts: Reconstructed time series.

      Returns:
          MAE value.
      """
      # Ensure both arrays have the same length, truncate the longer one if necessary
      min_len = min(len(original_ts), len(reconstructed_ts))
      return mean_absolute_error(original_ts[:min_len], reconstructed_ts[:min_len])

  @staticmethod
  def plot_time_series(original_ts: np.ndarray, reconstructed_ts: np.ndarray, title: str = "Time Series Reconstruction"):
      """
      Plots the original and reconstructed time series.

      Args:
          original_ts: Original time series.
          reconstructed_ts: Reconstructed time series.
          title: Title of the plot.
      """
      plt.figure(figsize=(15, 6))
      plt.plot(original_ts, label="Original Time Series", alpha=0.7)
      plt.plot(reconstructed_ts, label="Reconstructed Time Series", alpha=0.7, linestyle=":")
      plt.title(title)
      plt.xlabel("Time Step")
      plt.ylabel("Value")
      plt.legend()
      plt.grid(True)
      plt.tight_layout()
      plt.savefig(f"{title.replace(' ', '_').lower()}.png")
      plt.close()

  @staticmethod
  def plot_error_series(original_ts: np.ndarray, reconstructed_ts: np.ndarray, title: str = "Reconstruction Error"):
      """
      Plots the reconstruction error series.

      Args:
          original_ts: Original time series.
          reconstructed_ts: Reconstructed time series.
          title: Title of the plot.
      """
      min_len = min(len(original_ts), len(reconstructed_ts))
      error_ts = original_ts[:min_len] - reconstructed_ts[:min_len]

      plt.figure(figsize=(15, 4))
      plt.plot(error_ts, label="Error", color='red', alpha=0.7)
      plt.title(title)
      plt.xlabel("Time Step")
      plt.ylabel("Error")
      plt.legend()
      plt.grid(True)
      plt.tight_layout()
      plt.savefig(f"{title.replace(' ', '_').lower()}.png")
      plt.close()

  @staticmethod
  def plot_time_series(original, reconstructed, title="Time Series Reconstruction",
                        save_path=None, figsize=(12, 4)):
        """
        Plot original vs reconstructed time series.

        Args:
            original: Original time series
            reconstructed: Reconstructed time series
            title: Plot title
            save_path: Path to save the plot (optional)
            figsize: Figure size
        """
        plt.figure(figsize=figsize)

        # Trim to same length
        min_len = min(len(original), len(reconstructed))
        time_index = np.arange(min_len)

        plt.plot(time_index, original[:min_len], label='Original', alpha=0.7, linewidth=1.5)
        plt.plot(time_index, reconstructed[:min_len], label='Reconstructed', alpha=0.7, linewidth=1.5)

        plt.title(title, fontsize=14)
        plt.xlabel("Time Index", fontsize=12)
        plt.ylabel("Value", fontsize=12)
        plt.legend(fontsize=12)
        plt.grid(True, alpha=0.3)

        # Add statistics text box
        stats_text = f"RMSE: {np.sqrt(np.mean((original[:min_len] - reconstructed[:min_len])**2)):.4f}\n"
        stats_text += f"MAE: {np.mean(np.abs(original[:min_len] - reconstructed[:min_len])):.4f}"

        plt.text(0.02, 0.98, stats_text, transform=plt.gca().transAxes,
                fontsize=10, verticalalignment='top',
                bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=150, bbox_inches='tight')

        plt.show()

Overwriting utils/hsqp_utils.py


In [None]:
#@title __init__.py

%%writefile utils/__init__.py

from .hsqp_utils import HSQPUtils

__all__ = ['HSQPUtils']

Overwriting utils/__init__.py


In [None]:
#@title dataset.py

%%writefile data/dataset.py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
from typing import List, Tuple, Dict, Union, Optional
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

class ABBASymbolicAggregation:
    """
    Implementation of ABBA (Aggregation-Based Amplitude Scaling) for symbolic pattern extraction (Step 3 in HSQP).
    This is a simplified implementation based on the fABBA library concepts.
    """
    def __init__(self, tol: float = 0.1, alpha: float = 0.1, sorting: str = '2-norm', scl: float = 1, k: int = 10, seed: int = 42):
        """
        Initialize ABBA parameters.

        Args:
            tol: Tolerance for compression
            alpha: Parameter for digitization
            sorting: Method for sorting ('2-norm', 'area', etc.)
            scl: Scaling factor
            k: Number of symbols/clusters
            seed: Random seed for KMeans reproducibility
        """
        self.tol = tol
        self.alpha = alpha
        self.sorting = sorting
        self.scl = scl
        self.k = k
        self.seed = seed
        self.parameters = None
        self.kmeans = None

    def compress(self, ts: np.ndarray) -> List[Tuple[float, float]]:
        """
        Compress time series into piecewise linear segments (polygonal chain).

        Args:
            ts: Time series data

        Returns:
            List of (len, inc) tuples representing the polygonal segments
        """
        # Ensure ts is a 1D array and explicitly convert to float
        ts = np.asarray(ts, dtype=np.float64).flatten()
        n = len(ts)

        pieces = []
        start_idx = 0

        while start_idx < n - 1:
            end_idx = start_idx + 1
            while end_idx < n:
                if end_idx == start_idx + 1:
                    line_segment = np.array([ts[start_idx], ts[end_idx]])
                else:
                    t = np.linspace(0, 1, end_idx - start_idx + 1)
                    line_segment = ts[start_idx] + (ts[end_idx] - ts[start_idx]) * t

                if np.max(np.abs(line_segment - ts[start_idx:end_idx+1])) <= self.tol:
                    end_idx += 1
                else:
                    end_idx -= 1
                    break

            if end_idx >= n:
                end_idx = n - 1

            length = end_idx - start_idx
            increment = ts[end_idx] - ts[start_idx]

            pieces.append((length, increment))

            start_idx = end_idx

        return pieces

    def fit_kmeans(self, features: np.ndarray):
        """
        Fit KMeans model to the extracted features.

        Args:
            features: Features (len, inc) from all pieces
        """
        if features.shape[0] < self.k:
            print(f"Warning: Number of samples ({features.shape[0]}) is less than n_clusters ({self.k}). Adjusting n_clusters to {features.shape[0]}.")
            self.k = features.shape[0]
            if self.k == 0:
                self.kmeans = None
                return

        if self.kmeans is None:
            # Use the seed for reproducible KMeans clustering
            self.kmeans = KMeans(n_clusters=self.k, random_state=self.seed, n_init=10)
        self.kmeans.fit(features)
        self.parameters = {
            'centers': self.kmeans.cluster_centers_,
            'scl': self.scl,
            'alpha': self.alpha
        }

    def predict_symbols(self, features: np.ndarray) -> List[str]:
        """
        Predict symbols for given features using the fitted KMeans model.

        Args:
            features: Features (len, inc) from pieces

        Returns:
            List of symbols
        """
        if self.kmeans is None:
            raise ValueError("KMeans model not fitted. Call fit_kmeans() first.")

        if features.shape[0] == 0:
            return []

        if self.scl != 1:
            features = features / self.scl

        labels = self.kmeans.predict(features)

        symbols = [chr(97 + label) for label in labels]

        return symbols

    def inverse_transform(self, string: str, initial_value: float) -> np.ndarray:
        """
        Convert symbolic representation back to time series.

        Args:
            string: Symbolic representation
            initial_value: Initial value of the time series

        Returns:
            Reconstructed time series
        """
        if self.parameters is None or self.kmeans is None:
            raise ValueError("ABBA model must be fitted before inverse transform")

        indices = [ord(s) - 97 for s in string]

        centers = self.parameters['centers']

        if self.scl != 1:
            centers = centers * self.scl

        pieces = [tuple(centers[idx]) for idx in indices]

        ts_recon = [initial_value]
        for length, increment in pieces:
            length = int(round(length))
            if length < 1:
                length = 1

            if length == 1:
                ts_recon.append(ts_recon[-1] + increment)
            else:
                for i in range(1, length + 1):
                    ts_recon.append(ts_recon[-1] + increment / length)

        return np.array(ts_recon)

Writing data/dataset.py


In [None]:
%%writefile data/__init__.py

from .dataset import ABBASymbolicAggregation

__all__ = [
    'ABBASymbolicAggregation',
]


Overwriting data/__init__.py


In [None]:
#@title time_series.py

%%writefile library/time_series.py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
from typing import List, Tuple, Dict, Union, Optional
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

class TimeSeriesPatching:
    """
    Class for creating and merging patches from time series data (Step 2 in HSQP).
    """
    def __init__(self, patch_length: int = 60, stride: int = 32, overlap: bool = True):
        """
        Initialize the patching parameters.

        Args:
            patch_length: Length of each patch
            stride: Step size between patches (if overlap=True)
            overlap: Whether patches should overlap
        """
        self.patch_length = patch_length
        self.stride = stride if overlap else patch_length
        self.overlap = overlap

    def create_patches(self, time_series: np.ndarray) -> np.ndarray:
        """
        Create patches from a time series.

        Args:
            time_series: Time series data of shape [batch_size, seq_length, features]
                         or [seq_length, features] or [seq_length]

        Returns:
            Patches of shape [batch_size, num_patches, patch_length, features]
                         or [num_patches, patch_length, features]
                         or [num_patches, patch_length]
        """
        original_shape = time_series.shape
        if len(original_shape) == 1:
            time_series = time_series.reshape(-1, 1)
            seq_length, features = time_series.shape
            batch_size = None
        elif len(original_shape) == 2:
            seq_length, features = time_series.shape
            batch_size = None
        else:
            batch_size, seq_length, features = time_series.shape

        num_patches = (seq_length - self.patch_length) // self.stride + 1

        if num_patches <= 0:
            raise ValueError(f"Number of patches ({num_patches}) is not positive. Ensure seq_length ({seq_length}) is greater than or equal to patch_length ({self.patch_length}) and stride ({self.stride}) allows at least one patch.")

        if batch_size is None:
            # Vectorized patch creation
            indices = np.arange(num_patches) * self.stride
            patches = np.array([time_series[i:i + self.patch_length] for i in indices])
            if len(original_shape) == 1:
                patches = patches.reshape(num_patches, self.patch_length)
        else:
            # Batched patch creation
            patches = np.zeros((batch_size, num_patches, self.patch_length, features))
            for b in range(batch_size):
                indices = np.arange(num_patches) * self.stride
                patches[b] = np.array([time_series[b, i:i + self.patch_length] for i in indices])

        return patches

    def merge_patches(self, patches: np.ndarray, original_length: Optional[int] = None) -> np.ndarray:
        """
        Merge patches back into a time series.
        For overlapping regions, values are averaged.

        Args:
            patches: Patches of shape [batch_size, num_patches, patch_length, features]
                     or [num_patches, patch_length, features]
                     or [num_patches, patch_length]
            original_length: Original sequence length (optional)

        Returns:
            Reconstructed time series
        """
        original_patch_shape = patches.shape
        if len(original_patch_shape) == 2:
            patches = patches.reshape(original_patch_shape[0], original_patch_shape[1], 1)
            num_patches, patch_length, features = patches.shape
            batch_size = None
        elif len(original_patch_shape) == 3:
            num_patches, patch_length, features = patches.shape
            batch_size = None
        else:
            batch_size, num_patches, patch_length, features = patches.shape

        if original_length is None:
            seq_length = (num_patches - 1) * self.stride + patch_length
        else:
            seq_length = original_length

        if batch_size is None:
            reconstructed = np.zeros((seq_length, features))
            counts = np.zeros((seq_length, features))

            for i in range(num_patches):
                start_idx = i * self.stride
                end_idx = start_idx + patch_length
                reconstructed[start_idx:end_idx] += patches[i]
                counts[start_idx:end_idx] += 1

            reconstructed = reconstructed / np.maximum(counts, 1)

            if len(original_patch_shape) == 2:
                reconstructed = reconstructed.reshape(seq_length)
        else:
            reconstructed = np.zeros((batch_size, seq_length, features))
            counts = np.zeros((batch_size, seq_length, features))

            for b in range(batch_size):
                for i in range(num_patches):
                    start_idx = i * self.stride
                    end_idx = start_idx + patch_length
                    reconstructed[b, start_idx:end_idx] += patches[b, i]
                    counts[b, i] += 1

            reconstructed = reconstructed / np.maximum(counts, 1)

        return reconstructed



Writing library/time_series.py


In [None]:
#@title feature_quantization.py

%%writefile library/feature_quantization.py
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
from typing import List, Tuple, Dict, Union, Optional
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error

class FeatureQuantization:
    """
    Quantization of ABBA-derived features for efficiency optimization (Step 4 in HSQP).
    """
    def __init__(self, bit_width: int = 8, method: str = 'affine', block_size: int = 32):
        """
        Initialize quantization parameters.

        Args:
            bit_width: Target bit width (e.g., 8 for INT8, 4 for INT4)
            method: Quantization method (\'affine\', \'abs_max\')
            block_size: Block size for block-wise quantization
        """
        self.bit_width = bit_width
        self.method = method
        self.block_size = block_size
        self.scale = None
        self.zero_point = None

        self.qmin = -(2 ** (bit_width - 1))
        self.qmax = 2 ** (bit_width - 1) - 1

    def quantize(self, features: np.ndarray) -> np.ndarray:
        """
        Quantize features to lower precision.

        Args:
            features: Input features

        Returns:
            Quantized features
        """
        if features.size == 0:
            return np.array([], dtype=np.int8 if self.bit_width <= 8 else np.int16)

        if self.method == 'abs_max':
            abs_max = np.max(np.abs(features))
            if abs_max == 0:
                abs_max = 1.0

            self.scale = self.qmax / abs_max
            self.zero_point = 0

            q_features = np.round(features * self.scale)
            q_features = np.clip(q_features, self.qmin, self.qmax)

        elif self.method == 'affine':
            f_min = np.min(features)
            f_max = np.max(features)

            if f_min == f_max:
                self.scale = 1.0
                self.zero_point = 0
            else:
                self.scale = (self.qmax - self.qmin) / (f_max - f_min)
                self.zero_point = self.qmin - round(f_min * self.scale)

            q_features = np.round(features * self.scale + self.zero_point)
            q_features = np.clip(q_features, self.qmin, self.qmax)

        else:
            raise ValueError(f"Unknown quantization method: {self.method}")

        return q_features.astype(np.int8 if self.bit_width <= 8 else np.int16)

    def dequantize(self, q_features: np.ndarray) -> np.ndarray:
        """
        Dequantize features back to original precision.

        Args:
            q_features: Quantized features

        Returns:
            Dequantized features
        """
        if self.scale is None or (self.method == 'affine' and self.zero_point is None):
            raise ValueError("Quantization parameters not set. Call quantize() first.")

        if self.method == 'abs_max':
            return q_features / self.scale
        elif self.method == 'affine':
            return (q_features - self.zero_point) / self.scale
        else:
            raise ValueError(f"Unknown quantization method: {self.method}")


Writing library/feature_quantization.py


In [None]:
%%writefile library/__init__.py

from .feature_quantization import FeatureQuantization
from .time_series import TimeSeriesPatching

__all__ = [
    'FeatureQuantization',
    'TimeSeriesPatching',
]


Overwriting library/__init__.py


In [None]:
%%writefile models/__init__.py

from .hsqp import HSQP

__all__ = ['HSQP']

Overwriting models/__init__.py


In [None]:
#@title hsqp.py

%%writefile models/hsqp.py
# --- Updated models/hsqp.py ---
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.cluster import KMeans
from typing import List, Tuple, Dict, Union, Optional
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error
from library import FeatureQuantization, TimeSeriesPatching
from data import ABBASymbolicAggregation

class HSQP:
    """
    Hierarchical Symbolic-Quantized Patching (HSQP) with Ablation support.
    """
    def __init__(self,
                 patch_length: int = 60,
                 stride: int = 22,
                 tol: float = 0.02,
                 alpha: float = 0.1,
                 k: int = 26,
                 bit_width: int = 8,
                 quant_method: str = 'affine',
                 embedding_dim: int = 128,
                 use_patching: bool = True,
                 use_abba: bool = True,
                 use_quant: bool = True,
                 seed: int = 42):

        # Store seed
        self.seed = seed

        # Ablation Flags
        self.use_patching = use_patching
        self.use_abba = use_abba
        self.use_quant = use_quant

        # Modules (pass seed to components that need it)
        self.patching = TimeSeriesPatching(patch_length=patch_length, stride=stride)
        self.abba = ABBASymbolicAggregation(tol=tol, alpha=alpha, k=k, seed=seed)
        self.quantization = FeatureQuantization(bit_width=bit_width, method=quant_method)

        self.embedding_dim = embedding_dim
        self.embedding = None

    def fit_transform(self, time_series: np.ndarray) -> Tuple[List[str], np.ndarray, List[List[Tuple[float, float]]]]:
        # 1. Patching Ablation
        if self.use_patching:
            patches = self.patching.create_patches(time_series)
        else:
            patches = time_series.reshape(1, -1)

        # ... [num_patches calculation remains the same] ...
        num_patches = patches.shape[1] if len(patches.shape) == 4 else patches.shape[0]
        is_batched = len(patches.shape) == 4

        all_pieces = []
        pieces_list = []

        for i in range(num_patches):
            patch = patches[0, i].flatten() if is_batched else patches[i].flatten()

            if self.use_abba:
                pieces = self.abba.compress(patch)
            else:
                # Bypass ABBA: Use raw (value, time_index) as "pieces"
                pieces = [(val, float(idx)) for idx, val in enumerate(patch)]

            pieces_list.append(pieces)
            all_pieces.extend(pieces)

        all_features = np.array(all_pieces)

        # 3. Quantization Ablation
        if self.use_quant and all_features.size > 0:
            quantized_features = self.quantization.quantize(all_features)
        else:
            quantized_features = all_features

        # FIX: Store the results in the object so inverse_transform can see them
        self.quantized_cache = quantized_features
        self.symbols_cache = pieces_list # Useful for non-ABBA reconstruction

        # Fit K-Means only if using ABBA
        symbols_list = []
        if self.use_abba and all_features.shape[0] > 0:
            self.abba.fit_kmeans(all_features)
            for pieces in pieces_list:
                symbols = self.abba.predict_symbols(np.array(pieces))
                symbols_list.append(''.join(symbols))
        else:
            symbols_list = ["NOSYM"] * len(pieces_list)

        return symbols_list, quantized_features, pieces_list

    def inverse_transform(self, symbols_list: List[str], initial_values: List[float], original_length: Optional[int] = None) -> np.ndarray:
        """
        Convert symbolic representation back to time series with ablation handling.
        """
        # If ABBA was bypassed, we reconstruct directly from the cached features
        if not self.use_abba:
            features = self.quantization.dequantize(self.quantized_cache) if self.use_quant else self.quantized_cache
            # features is (N, 2) where col 0 is value. Extract values.
            return features[:, 0].flatten()[:original_length]

        # Standard ABBA Reconstruction logic
        reconstructed_patches = []
        for symbol, initial_value in zip(symbols_list, initial_values):
            if symbol == "NOSYM": continue
            patch_recon = self.abba.inverse_transform(symbol, initial_value)
            reconstructed_patches.append(patch_recon)

        if not reconstructed_patches:
            return np.array([])

        if not self.use_patching:
            return np.concatenate(reconstructed_patches)[:original_length]

        # Standard Patch Merge
        target_patch_length = self.patching.patch_length
        uniform_patches = [np.pad(p, (0, max(0, target_patch_length - len(p))), 'edge')[:target_patch_length] for p in reconstructed_patches]
        return self.patching.merge_patches(np.array(uniform_patches), original_length)

    def create_llm_embeddings(self, quantized_features: np.ndarray) -> np.ndarray:
        if quantized_features.size == 0: return np.array([])

        input_dim = quantized_features.shape[1] if len(quantized_features.shape) > 1 else 1
        if self.embedding is None:
            # Set PyTorch seed for embedding initialization
            torch.manual_seed(self.seed)
            self.embedding = nn.Linear(input_dim, self.embedding_dim)

        q_features_tensor = torch.tensor(quantized_features, dtype=torch.float32)
        with torch.no_grad():
            return self.embedding(q_features_tensor).numpy()


Overwriting models/hsqp.py


In [None]:
#@title main.py

%%writefile main.py
import argparse
import numpy as np
import torch
import os
import random
import pandas as pd
import math
from pathlib import Path
import warnings
from collections import Counter
from scipy.stats import entropy as scipy_entropy
warnings.filterwarnings('ignore')

from models import HSQP
from utils import HSQPUtils

def set_seed(seed):
    """
    Set random seed for reproducibility across NumPy, PyTorch, and Python's random module.

    Args:
        seed: Integer seed value
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # For deterministic behavior in PyTorch (may impact performance)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)
    print(f"Random seed set to: {seed}")

def get_dataset_params(dataset_name):
    """
    Returns specific configuration based on the dataset type.
    """
    configs = {
        'timeseries': {
            'file': 'data/timeseries.csv',
            'patch_length': 60,
            'stride': 32,
            'k': 26
        },
        'electricity': {
            'file': 'data/electricity.csv',
            'patch_length': 60,
            'stride': 32,
            'k': 26
        },
        'national_illness': {
            'file': 'data/national_illness.csv',
            'patch_length': 96,
            'stride': 48,
            'k': 30
        },
        'etth1': {
            'file': 'data/ETTh1.csv',
            'patch_length': 144,
            'stride': 72,
            'k': 20
        },
        'etth2': {
            'file': 'data/ETTh2.csv',
            'patch_length': 144,
            'stride': 72,
            'k': 20
        },
        'ettm1': {
            'file': 'data/ETTm1.csv',
            'patch_length': 144,
            'stride': 72,
            'k': 20
        },
        'ettm2': {
            'file': 'data/ETTm2.csv',
            'patch_length': 144,
            'stride': 72,
            'k': 20
        }
    }
    return configs.get(dataset_name.lower(), configs['electricity'])

class TokenEntropyAnalyzer:
    """Analyzer for token/symbol entropy in HSQP."""

    @staticmethod
    def calculate_symbol_entropy(symbols_list):
        """
        Calculate entropy of symbols generated by ABBA.

        Args:
            symbols_list: List of symbol strings

        Returns:
            Dictionary with various entropy metrics
        """
        if not symbols_list:
            return {
                'symbol_entropy': np.nan,
                'symbol_uniqueness': np.nan,
                'symbol_distribution': None,
                'effective_alphabet_size': np.nan
            }

        # Flatten all symbols into a single sequence
        all_symbols = ''.join(symbols_list)

        if not all_symbols:
            return {
                'symbol_entropy': np.nan,
                'symbol_uniqueness': np.nan,
                'symbol_distribution': None,
                'effective_alphabet_size': np.nan
            }

        # Calculate symbol frequencies
        symbol_counts = Counter(all_symbols)
        total_symbols = len(all_symbols)

        # Calculate probabilities
        probs = np.array([count / total_symbols for count in symbol_counts.values()])

        # Calculate Shannon entropy
        shannon_entropy = -np.sum(probs * np.log2(probs + 1e-10))

        # Maximum possible entropy (uniform distribution)
        max_entropy = np.log2(len(symbol_counts)) if len(symbol_counts) > 0 else 0

        # Normalized entropy (0 to 1)
        normalized_entropy = shannon_entropy / max_entropy if max_entropy > 0 else 0

        # Symbol uniqueness ratio
        unique_symbols = len(symbol_counts)
        total_occurrences = total_symbols
        uniqueness_ratio = unique_symbols / total_occurrences if total_occurrences > 0 else 0

        # Effective alphabet size (2^H)
        effective_alphabet_size = 2 ** shannon_entropy if not np.isnan(shannon_entropy) else 0

        # Symbol distribution
        symbol_distribution = {
            'unique_symbols': unique_symbols,
            'total_symbols': total_symbols,
            'most_common': symbol_counts.most_common(10),  # Top 10 symbols
            'symbol_frequencies': {k: v/total_symbols for k, v in symbol_counts.items()}
        }

        return {
            'symbol_entropy': shannon_entropy,
            'symbol_entropy_normalized': normalized_entropy,
            'symbol_uniqueness': uniqueness_ratio,
            'symbol_distribution': symbol_distribution,
            'effective_alphabet_size': effective_alphabet_size,
            'max_possible_entropy': max_entropy,
            'unique_symbol_count': unique_symbols,
            'total_symbol_count': total_symbols
        }

    @staticmethod
    def calculate_n_gram_entropy(symbols_list, n=2):
        """
        Calculate n-gram entropy for symbols.

        Args:
            symbols_list: List of symbol strings
            n: n-gram size (default: 2 for bigrams)

        Returns:
            n-gram entropy
        """
        if not symbols_list:
            return np.nan

        all_symbols = ''.join(symbols_list)

        if len(all_symbols) < n:
            return np.nan

        # Extract n-grams
        n_grams = [all_symbols[i:i+n] for i in range(len(all_symbols) - n + 1)]

        # Calculate n-gram frequencies
        n_gram_counts = Counter(n_grams)
        total_n_grams = len(n_grams)

        # Calculate probabilities and entropy
        probs = np.array([count / total_n_grams for count in n_gram_counts.values()])
        n_gram_entropy = -np.sum(probs * np.log2(probs + 1e-10))

        return n_gram_entropy

    @staticmethod
    def calculate_quantization_entropy(quantized_features):
        """
        Calculate entropy of quantized features.

        Args:
            quantized_features: Tensor of quantized features

        Returns:
            Dictionary with quantization entropy metrics
        """
        if quantized_features is None:
            return {
                'quant_entropy': np.nan,
                'quant_value_distribution': None,
                'quant_sparsity': np.nan
            }

        # Flatten the features
        features_flat = quantized_features.flatten().numpy() if torch.is_tensor(quantized_features) else quantized_features.flatten()

        # Discretize for entropy calculation (bin into 256 levels)
        if len(features_flat) > 0:
            # Normalize to 0-255 range
            min_val = np.min(features_flat)
            max_val = np.max(features_flat)

            if max_val > min_val:
                normalized = (features_flat - min_val) / (max_val - min_val)
                discretized = (normalized * 255).astype(int)
            else:
                discretized = np.zeros_like(features_flat, dtype=int)

            # Calculate entropy
            value_counts = Counter(discretized)
            total_values = len(discretized)
            probs = np.array([count / total_values for count in value_counts.values()])
            quant_entropy = -np.sum(probs * np.log2(probs + 1e-10))

            # Calculate sparsity (percentage of zero or near-zero values)
            sparsity = np.sum(np.abs(features_flat) < 1e-6) / len(features_flat)

            # Value distribution statistics
            value_distribution = {
                'unique_values': len(value_counts),
                'min_value': float(np.min(features_flat)),
                'max_value': float(np.max(features_flat)),
                'mean_value': float(np.mean(features_flat)),
                'std_value': float(np.std(features_flat))
            }

        else:
            quant_entropy = np.nan
            sparsity = np.nan
            value_distribution = None

        return {
            'quant_entropy': quant_entropy,
            'quant_value_distribution': value_distribution,
            'quant_sparsity': sparsity
        }

    @staticmethod
    def calculate_joint_entropy(symbols_list, quantized_features):
        """
        Calculate joint entropy between symbols and quantized features.

        Args:
            symbols_list: List of symbol strings
            quantized_features: Tensor of quantized features

        Returns:
            Joint entropy and mutual information metrics
        """
        if not symbols_list or quantized_features is None:
            return {
                'joint_entropy': np.nan,
                'mutual_information': np.nan,
                'normalized_mutual_information': np.nan
            }

        # Get symbol entropy
        symbol_metrics = TokenEntropyAnalyzer.calculate_symbol_entropy(symbols_list)
        symbol_entropy = symbol_metrics['symbol_entropy']

        # Get quantization entropy
        quant_metrics = TokenEntropyAnalyzer.calculate_quantization_entropy(quantized_features)
        quant_entropy = quant_metrics['quant_entropy']

        # For joint entropy, we'd need joint distribution
        # This is a simplified version - in practice you'd need the joint probability distribution
        if not np.isnan(symbol_entropy) and not np.isnan(quant_entropy):
            # Upper bound for joint entropy (sum of individual entropies)
            joint_entropy_upper = symbol_entropy + quant_entropy

            # Simplified mutual information approximation
            # In practice, you'd calculate this from joint distribution
            mutual_info = max(0, symbol_entropy + quant_entropy - joint_entropy_upper)

            # Normalized mutual information
            normalized_mi = mutual_info / min(symbol_entropy, quant_entropy) if min(symbol_entropy, quant_entropy) > 0 else np.nan
        else:
            joint_entropy_upper = np.nan
            mutual_info = np.nan
            normalized_mi = np.nan

        return {
            'joint_entropy_upper_bound': joint_entropy_upper,
            'mutual_information_approx': mutual_info,
            'normalized_mutual_information': normalized_mi
        }

    @staticmethod
    def print_entropy_report(entropy_metrics, dataset_name):
        """Print comprehensive entropy analysis report."""
        print(f"\n{'='*80}")
        print(f"TOKEN ENTROPY ANALYSIS - {dataset_name.upper()}")
        print(f"{'='*80}")

        # Symbol Entropy
        if 'symbol_entropy' in entropy_metrics:
            print(f"\n SYMBOL ENTROPY:")
            print(f"  Shannon Entropy: {entropy_metrics.get('symbol_entropy', np.nan):.4f} bits")
            print(f"  Normalized Entropy: {entropy_metrics.get('symbol_entropy_normalized', np.nan):.4f} (0-1 scale)")
            print(f"  Max Possible Entropy: {entropy_metrics.get('max_possible_entropy', np.nan):.4f} bits")
            print(f"  Effective Alphabet Size: {entropy_metrics.get('effective_alphabet_size', np.nan):.2f}")
            print(f"  Unique Symbols: {entropy_metrics.get('unique_symbol_count', 0)}")
            print(f"  Total Symbols: {entropy_metrics.get('total_symbol_count', 0)}")
            print(f"  Symbol Uniqueness Ratio: {entropy_metrics.get('symbol_uniqueness', np.nan):.4f}")

            # N-gram entropies
            for n in [2, 3]:
                ngram_key = f'{n}_gram_entropy'
                if ngram_key in entropy_metrics:
                    print(f"  {n}-gram Entropy: {entropy_metrics[ngram_key]:.4f} bits")

        # Quantization Entropy
        if 'quant_entropy' in entropy_metrics:
            print(f"\n QUANTIZATION ENTROPY:")
            print(f"  Quantization Entropy: {entropy_metrics.get('quant_entropy', np.nan):.4f} bits")
            print(f"  Quantization Sparsity: {entropy_metrics.get('quant_sparsity', np.nan):.4%}")

            dist = entropy_metrics.get('quant_value_distribution')
            if dist:
                print(f"  Unique Quantized Values: {dist.get('unique_values', 0)}")
                print(f"  Value Range: [{dist.get('min_value', 0):.4f}, {dist.get('max_value', 0):.4f}]")
                print(f"  Mean ± Std: {dist.get('mean_value', 0):.4f} ± {dist.get('std_value', 0):.4f}")

        # Joint Entropy
        if 'joint_entropy_upper_bound' in entropy_metrics:
            print(f"\n JOINT ENTROPY & MUTUAL INFORMATION:")
            print(f"  Joint Entropy Upper Bound: {entropy_metrics.get('joint_entropy_upper_bound', np.nan):.4f} bits")
            print(f"  Mutual Information (approx): {entropy_metrics.get('mutual_information_approx', np.nan):.4f} bits")
            print(f"  Normalized Mutual Information: {entropy_metrics.get('normalized_mutual_information', np.nan):.4f}")

        print(f"\n ENTROPY INTERPRETATION:")
        symbol_entropy = entropy_metrics.get('symbol_entropy', np.nan)
        if not np.isnan(symbol_entropy):
            if symbol_entropy < 2:
                print(f"  Symbol Entropy ({symbol_entropy:.2f} bits): LOW - Highly repetitive patterns")
            elif symbol_entropy < 4:
                print(f"  Symbol Entropy ({symbol_entropy:.2f} bits): MODERATE - Some structure present")
            else:
                print(f"  Symbol Entropy ({symbol_entropy:.2f} bits): HIGH - Complex, information-rich")

        print(f"{'='*80}")

class MetricsCalculator:
    """Comprehensive metrics calculator for time series reconstruction."""

    @staticmethod
    def calculate_mse(original, reconstructed):
        """Calculate Mean Squared Error."""
        if len(original) != len(reconstructed):
            min_len = min(len(original), len(reconstructed))
            original = original[:min_len]
            reconstructed = reconstructed[:min_len]

        if len(original) == 0:
            return np.nan

        return np.mean((original - reconstructed) ** 2)

    @staticmethod
    def calculate_mae(original, reconstructed):
        """Calculate Mean Absolute Error."""
        if len(original) != len(reconstructed):
            min_len = min(len(original), len(reconstructed))
            original = original[:min_len]
            reconstructed = reconstructed[:min_len]

        if len(original) == 0:
            return np.nan

        return np.mean(np.abs(original - reconstructed))

    @staticmethod
    def calculate_rmse(original, reconstructed):
        """Calculate Root Mean Squared Error."""
        mse = MetricsCalculator.calculate_mse(original, reconstructed)
        return np.sqrt(mse) if not np.isnan(mse) else np.nan

    @staticmethod
    def calculate_mape(original, reconstructed, epsilon=1e-10):
        """
        Calculate Mean Absolute Percentage Error.

        Args:
            original: Original time series
            reconstructed: Reconstructed time series
            epsilon: Small value to avoid division by zero
        """
        if len(original) != len(reconstructed):
            min_len = min(len(original), len(reconstructed))
            original = original[:min_len]
            reconstructed = reconstructed[:min_len]

        if len(original) == 0:
            return np.nan

        # Avoid division by zero
        denominator = np.abs(original)
        denominator[denominator < epsilon] = epsilon

        mape = np.mean(np.abs((original - reconstructed) / denominator)) * 100
        return mape

    @staticmethod
    def calculate_rrmse(original, reconstructed):
        """Calculate Relative Root Mean Squared Error."""
        rmse = MetricsCalculator.calculate_rmse(original, reconstructed)
        if np.isnan(rmse) or np.std(original) == 0:
            return np.nan

        return rmse / np.std(original)

    @staticmethod
    def calculate_rmae(original, reconstructed):
        """Calculate Relative Mean Absolute Error."""
        mae = MetricsCalculator.calculate_mae(original, reconstructed)
        if np.isnan(mae) or np.mean(np.abs(original)) == 0:
            return np.nan

        return mae / np.mean(np.abs(original))

    @staticmethod
    def calculate_compression_ratio(original_ts, symbols_list, quantized_features,
                                   use_abba=True, use_quant=True):
        """
        Calculate compression ratio.

        Args:
            original_ts: Original time series
            symbols_list: List of symbols from ABBA
            quantized_features: Quantized features tensor
            use_abba: Whether ABBA was used
            use_quant: Whether quantization was used
        """
        # Original size in bytes
        original_size = original_ts.nbytes

        # Calculate compressed size
        compressed_size = 0

        if use_abba and symbols_list:
            # Estimate symbol storage size (1 byte per symbol if using ASCII)
            symbols_size = sum(len(s) for s in symbols_list)
            compressed_size += symbols_size

        if use_quant and quantized_features is not None:
            # Quantized features size
            if torch.is_tensor(quantized_features):
                compressed_size += quantized_features.nelement() * quantized_features.element_size()
            else:
                compressed_size += quantized_features.nbytes

        # If nothing was compressed, ratio is 1
        if compressed_size == 0:
            return 1.0

        return original_size / compressed_size

    @staticmethod
    def calculate_all_metrics(original, reconstructed, symbols_list=None,
                             quantized_features=None, use_abba=True, use_quant=True):
        """
        Calculate all comprehensive metrics including token entropy.

        Returns:
            Dictionary containing all metrics
        """
        metrics = {}

        # Error metrics
        metrics['MSE'] = MetricsCalculator.calculate_mse(original, reconstructed)
        metrics['MAE'] = MetricsCalculator.calculate_mae(original, reconstructed)
        metrics['RMSE'] = MetricsCalculator.calculate_rmse(original, reconstructed)
        metrics['MAPE'] = MetricsCalculator.calculate_mape(original, reconstructed)
        metrics['RRMSE'] = MetricsCalculator.calculate_rrmse(original, reconstructed)
        metrics['RMAE'] = MetricsCalculator.calculate_rmae(original, reconstructed)

        # Compression metrics
        metrics['Compression_Ratio'] = MetricsCalculator.calculate_compression_ratio(
            original, symbols_list, quantized_features, use_abba, use_quant
        )

        # Token Entropy metrics
        entropy_metrics = {}
        if symbols_list:
            symbol_entropy = TokenEntropyAnalyzer.calculate_symbol_entropy(symbols_list)
            entropy_metrics.update(symbol_entropy)

            # Calculate n-gram entropies
            for n in [2, 3]:
                ngram_entropy = TokenEntropyAnalyzer.calculate_n_gram_entropy(symbols_list, n)
                entropy_metrics[f'{n}_gram_entropy'] = ngram_entropy

        if quantized_features is not None:
            quant_entropy = TokenEntropyAnalyzer.calculate_quantization_entropy(quantized_features)
            entropy_metrics.update(quant_entropy)

        if symbols_list and quantized_features is not None:
            joint_entropy = TokenEntropyAnalyzer.calculate_joint_entropy(symbols_list, quantized_features)
            entropy_metrics.update(joint_entropy)

        # Add entropy metrics to main metrics
        metrics.update(entropy_metrics)

        # Additional statistics
        metrics['Original_Length'] = len(original)
        metrics['Reconstructed_Length'] = len(reconstructed)
        metrics['Num_Symbols'] = sum(len(s) for s in symbols_list) if symbols_list else 0
        metrics['Quantized_Features_Shape'] = quantized_features.shape if quantized_features is not None else None

        return metrics

    @staticmethod
    def print_metrics_table(metrics, dataset_name, ablation_tag=""):
        """Print metrics in a formatted table."""
        print(f"\n{'='*80}")
        print(f"COMPREHENSIVE METRICS REPORT")
        print(f"Dataset: {dataset_name.upper()} {ablation_tag}")
        print(f"{'='*80}")

        print(f"{'Metric':<25} {'Value':<20} {'Unit/Description':<35}")
        print(f"{'-'*80}")

        # Error metrics (with 4 decimal places)
        error_metrics = ['MSE', 'MAE', 'RMSE', 'MAPE', 'RRMSE', 'RMAE']
        for metric in error_metrics:
            value = metrics.get(metric, np.nan)
            if not np.isnan(value):
                if metric == 'MAPE':
                    print(f"{metric:<25} {value:.4f}%{'':<12} Percentage error")
                else:
                    print(f"{metric:<25} {value:.6f}{'':<12} Absolute error")

        print(f"{'-'*80}")

        # Compression metric
        comp_ratio = metrics.get('Compression_Ratio', 1.0)
        print(f"{'Compression_Ratio':<25} {comp_ratio:.2f}x{'':<12} Size reduction factor")

        print(f"{'-'*80}")

        # Token Entropy metrics
        entropy_metrics = [
            'symbol_entropy', 'symbol_entropy_normalized',
            'quant_entropy', 'effective_alphabet_size'
        ]

        print(f"{'TOKEN ENTROPY METRICS':<25}")
        print(f"{'-'*25}")

        if 'symbol_entropy' in metrics:
            print(f"{'Symbol_Entropy':<25} {metrics['symbol_entropy']:.4f}{'':<12} bits")

        if 'symbol_entropy_normalized' in metrics:
            print(f"{'Symbol_Entropy_Norm':<25} {metrics['symbol_entropy_normalized']:.4f}{'':<12} (0-1 scale)")

        if 'quant_entropy' in metrics:
            print(f"{'Quantization_Entropy':<25} {metrics['quant_entropy']:.4f}{'':<12} bits")

        if 'effective_alphabet_size' in metrics:
            print(f"{'Effective_Alphabet_Size':<25} {metrics['effective_alphabet_size']:.2f}{'':<12} symbols")

        # N-gram entropies
        for n in [2, 3]:
            ngram_key = f'{n}_gram_entropy'
            if ngram_key in metrics and not np.isnan(metrics[ngram_key]):
                print(f"{f'{n}-Gram_Entropy':<25} {metrics[ngram_key]:.4f}{'':<12} bits")

        print(f"{'-'*80}")

        # Additional info
        print(f"{'Original_Length':<25} {metrics.get('Original_Length', 0):<20} Data points")
        print(f"{'Reconstructed_Length':<25} {metrics.get('Reconstructed_Length', 0):<20} Data points")
        print(f"{'Num_Symbols':<25} {metrics.get('Num_Symbols', 0):<20} Total symbols")

        if metrics.get('unique_symbol_count'):
            print(f"{'Unique_Symbols':<25} {metrics['unique_symbol_count']:<20} Distinct symbols")

        if metrics.get('Quantized_Features_Shape'):
            shape_str = str(metrics['Quantized_Features_Shape'])
            print(f"{'Quantized_Shape':<25} {shape_str:<20} (patches × features)")

        print(f"{'='*80}")

    @staticmethod
    def save_metrics_to_csv(metrics, dataset_name, ablation_tag, output_dir="results"):
        """Save metrics to CSV file."""
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

        csv_file = output_path / f"metrics_{dataset_name}_{ablation_tag}.csv"

        # Prepare data for CSV
        metrics_data = {
            'Dataset': [dataset_name],
            'Ablation_Tag': [ablation_tag],
            'Timestamp': [pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')]
        }

        # Add all metrics
        for key, value in metrics.items():
            if isinstance(value, (list, tuple, dict)):
                metrics_data[key] = [str(value)]
            elif isinstance(value, np.ndarray):
                metrics_data[key] = [str(value.shape)]
            elif torch.is_tensor(value):
                metrics_data[key] = [str(value.shape)]
            else:
                metrics_data[key] = [value]

        # Create DataFrame and save
        df = pd.DataFrame(metrics_data)

        # Append to existing file or create new
        if csv_file.exists():
            existing_df = pd.read_csv(csv_file)
            df = pd.concat([existing_df, df], ignore_index=True)

        df.to_csv(csv_file, index=False)
        print(f"Metrics saved to: {csv_file}")

        # Also save a summary CSV with key metrics only
        summary_file = output_path / f"summary_{dataset_name}.csv"
        key_metrics = [
            'Dataset', 'Ablation_Tag', 'Timestamp',
            'MSE', 'MAE', 'RMSE', 'MAPE', 'Compression_Ratio',
            'symbol_entropy', 'symbol_entropy_normalized',
            'effective_alphabet_size', 'unique_symbol_count'
        ]

        summary_data = {k: metrics_data.get(k, [np.nan]) for k in key_metrics}
        summary_df = pd.DataFrame(summary_data)

        if summary_file.exists():
            existing_summary = pd.read_csv(summary_file)
            summary_df = pd.concat([existing_summary, summary_df], ignore_index=True)

        summary_df.to_csv(summary_file, index=False)

def main():
    parser = argparse.ArgumentParser(description="HSQP Multi-Dataset & Ablation Runner with Comprehensive Metrics")

    # Seed for reproducibility
    parser.add_argument('--seed', type=int, default=42,
                        help="Random seed for reproducibility (default: 42)")

    # Output control
    parser.add_argument('--output_dir', type=str, default='results',
                        help="Directory to save metrics and plots (default: 'results')")
    parser.add_argument('--save_metrics', action='store_true',
                        help="Save metrics to CSV file")
    parser.add_argument('--no_plots', action='store_true',
                        help="Disable plotting")
    parser.add_argument('--entropy_analysis', action='store_true',
                        help="Enable detailed entropy analysis")

    # Dataset Selection
    parser.add_argument('--dataset', type=str, default='timeseries',
                        choices=['electricity', 'timeseries', 'national_illness', 'etth1', 'etth2', 'ettm1', 'ettm2'],
                        help="Select predefined dataset or 'custom'")
    parser.add_argument('--data_path', type=str, help="Path to custom CSV file")

    # Model Hyperparameters (Defaults will be overridden by dataset choice if not provided)
    parser.add_argument('--patch_length', type=int)
    parser.add_argument('--stride', type=int)
    parser.add_argument('--k', type=int, help="Number of symbols")

    # Ablation Flags
    parser.add_argument('--no_patching', action='store_true')
    parser.add_argument('--no_abba', action='store_true')
    parser.add_argument('--no_quant', action='store_true')

    args = parser.parse_args()

    # Create output directory
    output_dir = Path(args.output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Set random seed for reproducibility
    set_seed(args.seed)

    # --- Load Dataset Configuration ---
    ds_config = get_dataset_params(args.dataset)

    # Use command line override if provided, else use dataset default
    data_file = args.data_path if args.data_path else ds_config['file']
    p_len = args.patch_length if args.patch_length else ds_config['patch_length']
    stride = args.stride if args.stride else ds_config['stride']
    k_val = args.k if args.k else ds_config['k']

    print(f"{'='*80}")
    print(f"HSQP EXPERIMENT: {args.dataset.upper()}")
    print(f"{'='*80}")
    print(f"Configuration:")
    print(f"  Dataset: {args.dataset}")
    print(f"  Patch Length: {p_len}")
    print(f"  Stride: {stride}")
    print(f"  K (symbols): {k_val}")
    print(f"  Patching: {'Disabled' if args.no_patching else 'Enabled'}")
    print(f"  ABBA: {'Disabled' if args.no_abba else 'Enabled'}")
    print(f"  Quantization: {'Disabled' if args.no_quant else 'Enabled'}")
    print(f"  Seed: {args.seed}")
    print(f"  Entropy Analysis: {'Enabled' if args.entropy_analysis else 'Disabled'}")
    print(f"{'='*80}")

    # --- Load Data ---
    try:
        original_ts = HSQPUtils.load_time_series_data(data_file)
        print(f"Loaded time series: {len(original_ts)} points from {data_file}")
    except Exception as e:
        print(f"Error: Could not load {data_file}. {e}")
        return

    # --- Initialize HSQP Model ---
    hsqp = HSQP(
        patch_length=p_len,
        stride=stride,
        k=k_val,
        use_patching=not args.no_patching,
        use_abba=not args.no_abba,
        use_quant=not args.no_quant,
        seed=args.seed  # Pass seed to HSQP model
    )

    # --- Apply HSQP Transformation ---
    print("\nApplying HSQP transformation...")
    try:
        symbols_list, quantized_features, pieces_list = hsqp.fit_transform(original_ts)
        print(f"✓ Transformation successful")
        print(f"  Quantized Features Shape: {quantized_features.shape}")
        print(f"  Number of symbol sequences: {len(symbols_list)}")

        # Print symbol statistics
        if symbols_list:
            total_symbols = sum(len(s) for s in symbols_list)
            unique_symbols = len(set(''.join(symbols_list)))
            print(f"  Total symbols generated: {total_symbols}")
            print(f"  Unique symbols: {unique_symbols}")
            print(f"  Symbol diversity: {unique_symbols/total_symbols:.2%}")
    except Exception as e:
        print(f"✗ Transformation failed: {e}")
        return

    # --- Reconstruction ---
    print("\nReconstructing time series...")
    try:
        if not args.no_patching:
            patches = hsqp.patching.create_patches(original_ts)
            initial_values = [p[0, 0] if len(p.shape) > 1 else p[0] for p in patches]
        else:
            initial_values = [original_ts[0]]

        reconstructed_ts = hsqp.inverse_transform(symbols_list, initial_values, len(original_ts))
        print(f"✓ Reconstruction successful: {len(reconstructed_ts)} points")
    except Exception as e:
        print(f"✗ Reconstruction failed: {e}")
        return

    # --- Calculate Comprehensive Metrics ---
    print("\n" + "="*80)
    print("CALCULATING METRICS")
    print("="*80)

    # Ensure same length for metrics calculation
    min_len = min(len(original_ts), len(reconstructed_ts))
    original_for_metrics = original_ts[:min_len]
    reconstructed_for_metrics = reconstructed_ts[:min_len]

    # Calculate all metrics
    all_metrics = MetricsCalculator.calculate_all_metrics(
        original=original_for_metrics,
        reconstructed=reconstructed_for_metrics,
        symbols_list=symbols_list,
        quantized_features=quantized_features,
        use_abba=not args.no_abba,
        use_quant=not args.no_quant
    )

    # Create ablation tag for identification
    ablation_tag = f"p{int(not args.no_patching)}_a{int(not args.no_abba)}_q{int(not args.no_quant)}"

    # Print metrics table
    MetricsCalculator.print_metrics_table(all_metrics, args.dataset, ablation_tag)

    # Detailed entropy analysis if requested
    if args.entropy_analysis and symbols_list:
        TokenEntropyAnalyzer.print_entropy_report(all_metrics, args.dataset)

    # Save metrics to CSV if requested
    if args.save_metrics:
        MetricsCalculator.save_metrics_to_csv(
            all_metrics, args.dataset, ablation_tag, args.output_dir
        )

    # --- Generate Summary Statistics ---
    print(f"\n{'='*80}")
    print(f"SUMMARY STATISTICS")
    print(f"{'='*80}")

    print(f"Original Time Series:")
    print(f"  Length: {len(original_ts)}")
    print(f"  Mean: {np.mean(original_ts):.4f}")
    print(f"  Std: {np.std(original_ts):.4f}")
    print(f"  Min: {np.min(original_ts):.4f}")
    print(f"  Max: {np.max(original_ts):.4f}")

    print(f"\nReconstructed Time Series:")
    print(f"  Length: {len(reconstructed_ts)}")
    print(f"  Mean: {np.mean(reconstructed_ts):.4f}")
    print(f"  Std: {np.std(reconstructed_ts):.4f}")
    print(f"  Min: {np.min(reconstructed_ts):.4f}")
    print(f"  Max: {np.max(reconstructed_ts):.4f}")

    # --- Key Performance Indicators with Entropy ---
    print(f"\n{'='*80}")
    print(f"KEY PERFORMANCE INDICATORS")
    print(f"{'='*80}")

    compression_ratio = all_metrics.get('Compression_Ratio', 1.0)
    rmse = all_metrics.get('RMSE', np.nan)
    mape = all_metrics.get('MAPE', np.nan)
    symbol_entropy = all_metrics.get('symbol_entropy', np.nan)
    effective_alphabet = all_metrics.get('effective_alphabet_size', np.nan)

    print(f"Compression Efficiency: {compression_ratio:.2f}x size reduction")
    print(f"Reconstruction Accuracy: RMSE = {rmse:.4f}")
    print(f"Relative Error: MAPE = {mape:.2f}%")

    if not np.isnan(symbol_entropy):
        print(f"Information Content: Symbol Entropy = {symbol_entropy:.2f} bits")
        print(f"Effective Alphabet Size: {effective_alphabet:.1f} symbols")

    # Qualitative assessment
    if not np.isnan(rmse):
        if rmse < 0.01:
            print(f"Quality: Excellent reconstruction (RMSE < 0.01)")
        elif rmse < 0.1:
            print(f"Quality: Good reconstruction (RMSE < 0.1)")
        elif rmse < 0.5:
            print(f"Quality: Acceptable reconstruction (RMSE < 0.5)")
        else:
            print(f"Quality: Poor reconstruction (RMSE ≥ 0.5)")

    # Entropy interpretation
    if not np.isnan(symbol_entropy):
        if symbol_entropy < 2:
            print(f"Symbol Complexity: Low entropy (repetitive patterns)")
        elif symbol_entropy < 3:
            print(f"Symbol Complexity: Moderate entropy (structured)")
        else:
            print(f"Symbol Complexity: High entropy (complex patterns)")

    # --- Plotting ---
    if not args.no_plots:
        print(f"\nGenerating plots...")
        plot_tag = f"{args.dataset}_{ablation_tag}"

        # Plot 1: Original vs Reconstructed
        HSQPUtils.plot_time_series(
            original_ts,
            reconstructed_ts[:len(original_ts)],
            f"Reconstruction - {args.dataset.upper()} ({ablation_tag})",
            save_path=output_dir / f"reconstruction_{plot_tag}.png"
        )

        # Plot 2: Error distribution
        errors = original_for_metrics - reconstructed_for_metrics
        import matplotlib.pyplot as plt

        fig, axes = plt.subplots(2, 2, figsize=(12, 8))

        # Error histogram
        axes[0, 0].hist(errors, bins=50, alpha=0.7, edgecolor='black', density=True)
        axes[0, 0].set_title(f"Error Distribution - {args.dataset}")
        axes[0, 0].set_xlabel("Reconstruction Error")
        axes[0, 0].set_ylabel("Density")
        axes[0, 0].grid(True, alpha=0.3)

        # Error sequence
        axes[0, 1].plot(errors[:min(500, len(errors))], alpha=0.7)
        axes[0, 1].set_title(f"Error Sequence (first 500 points)")
        axes[0, 1].set_xlabel("Time Index")
        axes[0, 1].set_ylabel("Error")
        axes[0, 1].grid(True, alpha=0.3)

        # Symbol distribution (if available)
        if symbols_list and 'symbol_distribution' in all_metrics:
            symbol_dist = all_metrics['symbol_distribution']
            if symbol_dist and 'most_common' in symbol_dist:
                symbols, counts = zip(*symbol_dist['most_common'])
                axes[1, 0].bar(range(len(symbols)), counts, alpha=0.7)
                axes[1, 0].set_title(f"Top {len(symbols)} Most Common Symbols")
                axes[1, 0].set_xlabel("Symbol")
                axes[1, 0].set_ylabel("Frequency")
                axes[1, 0].set_xticks(range(len(symbols)))
                axes[1, 0].set_xticklabels(symbols, rotation=45)
                axes[1, 0].grid(True, alpha=0.3)

        # Entropy metrics visualization
        axes[1, 1].axis('off')
        if not np.isnan(symbol_entropy):
            metrics_text = "Entropy Metrics:\n"
            metrics_text += f"Symbol Entropy: {symbol_entropy:.2f} bits\n"
            metrics_text += f"Normalized: {all_metrics.get('symbol_entropy_normalized', 0):.2f}\n"
            metrics_text += f"Effective Alphabet: {effective_alphabet:.1f}\n"
            metrics_text += f"Unique Symbols: {all_metrics.get('unique_symbol_count', 0)}\n"

            if 'quant_entropy' in all_metrics:
                metrics_text += f"\nQuant Entropy: {all_metrics['quant_entropy']:.2f} bits"

            axes[1, 1].text(0.1, 0.5, metrics_text,
                          fontsize=10, verticalalignment='center',
                          bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))

        plt.suptitle(f"HSQP Analysis - {args.dataset.upper()} ({ablation_tag})", fontsize=14)
        plt.tight_layout()
        plt.savefig(output_dir / f"analysis_{plot_tag}.png", dpi=150, bbox_inches='tight')
        plt.show()

        print(f"Plots saved to: {output_dir}/")

    print(f"\n{'='*80}")
    print(f"EXPERIMENT COMPLETE")
    print(f"Ablation Tag: {ablation_tag}")
    print(f"Output Directory: {output_dir}")
    print(f"{'='*80}")

if __name__ == "__main__":
    main()

Overwriting main.py


In [None]:
!python main.py

Random seed set to: 42
HSQP EXPERIMENT: TIMESERIES
Configuration:
  Dataset: timeseries
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Enabled
  ABBA: Enabled
  Quantization: Enabled
  Seed: 42
  Entropy Analysis: Disabled
Loaded time series: 7306 points from data/timeseries.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (13112, 2)
  Number of symbol sequences: 227
  Total symbols generated: 13112
  Unique symbols: 26
  Symbol diversity: 0.20%

Reconstructing time series...
✓ Reconstruction successful: 7306 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: TIMESERIES p1_a1_q1
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       0.288126             Absolute error
MAE                       0.380585             Absolute error
RMSE                      0.536774             Abso

In [None]:
!python main.py --no_patching

Random seed set to: 42
HSQP EXPERIMENT: TIMESERIES
Configuration:
  Dataset: timeseries
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Disabled
  ABBA: Enabled
  Quantization: Enabled
  Seed: 42
Loaded time series: 7306 points from data/timeseries.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (7149, 2)
  Number of symbol sequences: 1

Reconstructing time series...
✓ Reconstruction successful: 7295 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: TIMESERIES p0_a1_q1
Metric               Value                Unit/Description                        
--------------------------------------------------------------------------------
MSE                  16.615156                Absolute error
MAE                  3.293676                Absolute error
RMSE                 4.076169                Absolute error
MAPE                 104.9411%                Percentage error
RRMSE                2.098894             

In [None]:
!python main.py --no_abba

Random seed set to: 42
HSQP EXPERIMENT: TIMESERIES
Configuration:
  Dataset: timeseries
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Enabled
  ABBA: Disabled
  Quantization: Enabled
  Seed: 42
  Entropy Analysis: Disabled
Loaded time series: 7306 points from data/timeseries.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (13620, 2)
  Number of symbol sequences: 227
  Total symbols generated: 1135
  Unique symbols: 5
  Symbol diversity: 0.44%

Reconstructing time series...
✓ Reconstruction successful: 7306 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: TIMESERIES p1_a0_q1
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       7.329952             Absolute error
MAE                       2.163278             Absolute error
RMSE                      2.707388             Absol

In [None]:
!python main.py --no_quant

Random seed set to: 42
HSQP EXPERIMENT: TIMESERIES
Configuration:
  Dataset: timeseries
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Enabled
  ABBA: Enabled
  Quantization: Disabled
  Seed: 42
  Entropy Analysis: Disabled
Loaded time series: 7306 points from data/timeseries.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (13112, 2)
  Number of symbol sequences: 227
  Total symbols generated: 13112
  Unique symbols: 26
  Symbol diversity: 0.20%

Reconstructing time series...
✓ Reconstruction successful: 7306 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: TIMESERIES p1_a1_q0
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       0.288126             Absolute error
MAE                       0.380585             Absolute error
RMSE                      0.536774             Abs

In [None]:
!python main.py --dataset timeseries

Random seed set to: 42
--- Dataset: TIMESERIES ---
Config: Patch=60, Stride=32, K=26
Applying HSQP transformation...
Quantized Features Shape: (13112, 2)
Reconstructing time series...

--- Metrics ---
Compression Ratio: 1.49x
RMSE: 0.5368
MAE:  0.3806
Experiment complete. Plots saved with tag: pTrue_aTrue_qTrue
Results for timeseries: RMSE = 0.5368


In [None]:
!python main.py --dataset timeseries --seed 123

Random seed set to: 123
--- Dataset: TIMESERIES ---
Config: Patch=60, Stride=32, K=26
Results for timeseries: RMSE = 0.5095


In [None]:
# Run with entropy analysis
!python main.py --dataset electricity --entropy_analysis --save_metrics

Random seed set to: 42
HSQP EXPERIMENT: ELECTRICITY
Configuration:
  Dataset: electricity
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Enabled
  ABBA: Enabled
  Quantization: Enabled
  Seed: 42
  Entropy Analysis: Enabled
Loaded time series: 26304 points from data/electricity.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (41672, 2)
  Number of symbol sequences: 821
  Total symbols generated: 41672
  Unique symbols: 26
  Symbol diversity: 0.06%

Reconstructing time series...
✓ Reconstruction successful: 26304 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: ELECTRICITY p1_a1_q1
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       138.689527             Absolute error
MAE                       6.752451             Absolute error
RMSE                      11.776652         

In [None]:
# Ablation study with entropy
!python main.py --dataset etth1 --no_quant --entropy_analysis

Random seed set to: 42
HSQP EXPERIMENT: ETTH1
Configuration:
  Dataset: etth1
  Patch Length: 144
  Stride: 72
  K (symbols): 20
  Patching: Enabled
  ABBA: Enabled
  Quantization: Disabled
  Seed: 42
  Entropy Analysis: Enabled
Loaded time series: 17420 points from data/ETTh1.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (33161, 2)
  Number of symbol sequences: 240
  Total symbols generated: 33161
  Unique symbols: 20
  Symbol diversity: 0.06%

Reconstructing time series...
✓ Reconstruction successful: 17420 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: ETTH1 p1_a1_q0
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       12.591516             Absolute error
MAE                       2.456813             Absolute error
RMSE                      3.548453             Absolute error
MAPE 

In [None]:
# Complete analysis with all metrics
!python main.py --dataset timeseries --entropy_analysis --save_metrics --output_dir results/full_analysis

Random seed set to: 42
HSQP EXPERIMENT: TIMESERIES
Configuration:
  Dataset: timeseries
  Patch Length: 60
  Stride: 32
  K (symbols): 26
  Patching: Enabled
  ABBA: Enabled
  Quantization: Enabled
  Seed: 42
  Entropy Analysis: Enabled
Loaded time series: 7306 points from data/timeseries.csv

Applying HSQP transformation...
✓ Transformation successful
  Quantized Features Shape: (13112, 2)
  Number of symbol sequences: 227
  Total symbols generated: 13112
  Unique symbols: 26
  Symbol diversity: 0.20%

Reconstructing time series...
✓ Reconstruction successful: 7306 points

CALCULATING METRICS

COMPREHENSIVE METRICS REPORT
Dataset: TIMESERIES p1_a1_q1
Metric                    Value                Unit/Description                   
--------------------------------------------------------------------------------
MSE                       0.288126             Absolute error
MAE                       0.380585             Absolute error
RMSE                      0.536774             Absol

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -r ../HSQP ../drive/MyDrive/Ablation

In [None]:
!rsync -av --exclude='*.csv' ../HSQP/ /content/drive/MyDrive/Ablation/

sending incremental file list
./
HSQP.zip
electricity_reconstruction_error.png
electricity_time_series_reconstruction.png
main.py
reconstruction_(pfalse_atrue_qtrue).png
reconstruction_(ptrue_afalse_qtrue).png
reconstruction_(ptrue_atrue_qfalse).png
reconstruction_(ptrue_atrue_qtrue).png
requirements.txt
config/
config/__init__.py
data/
data/__init__.py
data/dataset.py
data/__pycache__/
data/__pycache__/__init__.cpython-312.pyc
data/__pycache__/dataset.cpython-312.pyc
library/
library/__init__.py
library/feature_quantization.py
library/time_series.py
library/__pycache__/
library/__pycache__/__init__.cpython-312.pyc
library/__pycache__/feature_quantization.cpython-312.pyc
library/__pycache__/time_series.cpython-312.pyc
models/
models/__init__.py
models/hsqp.py
models/__pycache__/
models/__pycache__/__init__.cpython-312.pyc
models/__pycache__/hsqp.cpython-312.pyc
utils/
utils/__init__.py
utils/hsqp_utils.py
utils/__pycache__/
utils/__pycache__/__init__.cpython-312.pyc
utils/__pycache__/h

In [None]:
!zip -r HSQP.zip /content/HSQP/*

  adding: content/HSQP/config/ (stored 0%)
  adding: content/HSQP/config/__init__.py (stored 0%)
  adding: content/HSQP/data/ (stored 0%)
  adding: content/HSQP/data/__init__.py (deflated 23%)
  adding: content/HSQP/data/dataset.py (deflated 67%)
  adding: content/HSQP/library/ (stored 0%)
  adding: content/HSQP/library/time_series.py (deflated 76%)
  adding: content/HSQP/library/__init__.py (deflated 39%)
  adding: content/HSQP/library/feature_quantization.py (deflated 70%)
  adding: content/HSQP/main.py (deflated 64%)
  adding: content/HSQP/models/ (stored 0%)
  adding: content/HSQP/models/__init__.py (stored 0%)
  adding: content/HSQP/models/hsqp.py (deflated 67%)
  adding: content/HSQP/requirements.txt (deflated 16%)
  adding: content/HSQP/utils/ (stored 0%)
  adding: content/HSQP/utils/__init__.py (deflated 13%)
  adding: content/HSQP/utils/hsqp_utils.py (deflated 72%)
