In [None]:
!pip install pmdarima
!pip install pyts

Collecting pmdarima
[?25l  Downloading https://files.pythonhosted.org/packages/6d/e9/6587edeffba78fbed826c45d2e85edfba1fcb18f3d7d5347b20cdbdc7327/pmdarima-1.7.0-cp36-cp36m-manylinux1_x86_64.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 3.5MB/s 
Collecting statsmodels>=0.11
[?25l  Downloading https://files.pythonhosted.org/packages/cb/83/540fd83238a18abe6c2d280fa8e489ac5fcefa1f370f0ca1acd16ae1b860/statsmodels-0.11.1-cp36-cp36m-manylinux1_x86_64.whl (8.7MB)
[K     |████████████████████████████████| 8.7MB 25.4MB/s 
Collecting Cython<0.29.18,>=0.29
[?25l  Downloading https://files.pythonhosted.org/packages/e7/d7/510ddef0248f3e1e91f9cc7e31c0f35f8954d0af92c5c3fd4c853e859ebe/Cython-0.29.17-cp36-cp36m-manylinux1_x86_64.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 41.6MB/s 
Installing collected packages: statsmodels, Cython, pmdarima
  Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodel

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


# tsfile loader

In [None]:
import pandas as pd
import numpy as np

class TsFileParseException(Exception):
    """
    Should be raised when parsing a .ts file and the format is incorrect.
    """
    pass


def load_from_tsfile_to_dataframe(path_to_file, return_separate_X_and_y=True,
                                  replace_missing_vals_with='NaN'):
    """Loads data from a .ts file into a Pandas DataFrame.

    Parameters
    ----------
    path_to_file: str
        The full pathname of the .ts file to read.
    return_separate_X_and_y: bool
        true if X and Y values should be returned as separate Data Frames (X) and a numpy array (y), false otherwise.
        This is only relevant for data that
    replace_missing_vals_with: str
       The value that missing values in the text file should be replaced with prior to parsing.

    Returns
    -------
    DataFrame, ndarray
        If return_separate_X_and_y then a tuple containing a DataFrame and a numpy array containing the relevant time-series and corresponding class values.
    DataFrame
        If not return_separate_X_and_y then a single DataFrame containing all time-series and (if relevant) a column "class_vals" the associated class values.
    """

    # Initialize flags and variables used when parsing the file
    metadata_started = False
    data_started = False

    has_problem_name_tag = False
    has_timestamps_tag = False
    has_univariate_tag = False
    has_class_labels_tag = False
    has_target_labels_tag = False
    has_data_tag = False

    previous_timestamp_was_float = None
    previous_timestamp_was_int = None
    previous_timestamp_was_timestamp = None
    num_dimensions = None
    is_first_case = True
    instance_list = []
    class_val_list = []
    line_num = 0

    # Parse the file
    with open(path_to_file, 'r', encoding='utf-8') as file:
        for line in file:
            # Strip white space from start/end of line and change to lowercase for use below
            line = line.strip().lower()
            # Empty lines are valid at any point in a file
            if line:
                # Check if this line contains metadata
                # Please note that even though metadata is stored in this function it is not currently published externally
                if line.startswith("@problemname"):
                    # Check that the data has not started
                    if data_started:
                        raise TsFileParseException("metadata must come before data")
                    # Check that the associated value is valid
                    tokens = line.split(' ')
                    token_len = len(tokens)

                    if token_len == 1:
                        raise TsFileParseException("problemname tag requires an associated value")

                    problem_name = line[len("@problemname") + 1:]
                    has_problem_name_tag = True
                    metadata_started = True
                elif line.startswith("@timestamps"):
                    # Check that the data has not started
                    if data_started:
                        raise TsFileParseException("metadata must come before data")

                    # Check that the associated value is valid
                    tokens = line.split(' ')
                    token_len = len(tokens)

                    if token_len != 2:
                        raise TsFileParseException("timestamps tag requires an associated Boolean value")
                    elif tokens[1] == "true":
                        timestamps = True
                    elif tokens[1] == "false":
                        timestamps = False
                    else:
                        raise TsFileParseException("invalid timestamps value")
                    has_timestamps_tag = True
                    metadata_started = True
                elif line.startswith("@univariate"):
                    # Check that the data has not started
                    if data_started:
                        raise TsFileParseException("metadata must come before data")

                    # Check that the associated value is valid
                    tokens = line.split(' ')
                    token_len = len(tokens)
                    if token_len != 2:
                        raise TsFileParseException("univariate tag requires an associated Boolean value")
                    elif tokens[1] == "true":
                        univariate = True
                    elif tokens[1] == "false":
                        univariate = False
                    else:
                        raise TsFileParseException("invalid univariate value")

                    has_univariate_tag = True
                    metadata_started = True
                elif line.startswith("@classlabel"):
                    # Check that the data has not started
                    if data_started:
                        raise TsFileParseException("metadata must come before data")

                    # Check that the associated value is valid
                    tokens = line.split(' ')
                    token_len = len(tokens)

                    if token_len == 1:
                        raise TsFileParseException("classlabel tag requires an associated Boolean value")

                    if tokens[1] == "true":
                        class_labels = True
                    elif tokens[1] == "false":
                        class_labels = False
                    else:
                        raise TsFileParseException("invalid classLabel value")

                    # Check if we have any associated class values
                    if token_len == 2 and class_labels:
                        raise TsFileParseException("if the classlabel tag is true then class values must be supplied")

                    has_class_labels_tag = True
                    class_label_list = [token.strip() for token in tokens[2:]]
                    metadata_started = True
                elif line.startswith("@targetlabel"):
                    # Check that the data has not started
                    if data_started:
                        raise TsFileParseException("metadata must come before data")

                    # Check that the associated value is valid
                    tokens = line.split(' ')
                    token_len = len(tokens)

                    if token_len == 1:
                        raise TsFileParseException("targetlabel tag requires an associated Boolean value")

                    if tokens[1] == "true":
                        target_labels = True
                    elif tokens[1] == "false":
                        target_labels = False
                    else:
                        raise TsFileParseException("invalid targetLabel value")

                    has_target_labels_tag = True
                    class_val_list = []
                    metadata_started = True
                # Check if this line contains the start of data
                elif line.startswith("@data"):
                    if line != "@data":
                        raise TsFileParseException("data tag should not have an associated value")

                    if data_started and not metadata_started:
                        raise TsFileParseException("metadata must come before data")
                    else:
                        has_data_tag = True
                        data_started = True
                # If the 'data tag has been found then metadata has been parsed and data can be loaded
                elif data_started:
                    # Check that a full set of metadata has been provided
                    incomplete_regression_meta_data = not has_problem_name_tag or not has_timestamps_tag or not has_univariate_tag or not has_target_labels_tag or not has_data_tag
                    incomplete_classification_meta_data = not has_problem_name_tag or not has_timestamps_tag or not has_univariate_tag or not has_class_labels_tag or not has_data_tag
                    if incomplete_regression_meta_data and incomplete_classification_meta_data:
                        raise TsFileParseException("a full set of metadata has not been provided before the data")

                    # Replace any missing values with the value specified
                    line = line.replace("?", replace_missing_vals_with)

                    # Check if we dealing with data that has timestamps
                    if timestamps:
                        # We're dealing with timestamps so cannot just split line on ':' as timestamps may contain one
                        has_another_value = False
                        has_another_dimension = False

                        timestamps_for_dimension = []
                        values_for_dimension = []

                        this_line_num_dimensions = 0
                        line_len = len(line)
                        char_num = 0

                        while char_num < line_len:
                            # Move through any spaces
                            while char_num < line_len and str.isspace(line[char_num]):
                                char_num += 1

                            # See if there is any more data to read in or if we should validate that read thus far

                            if char_num < line_len:

                                # See if we have an empty dimension (i.e. no values)
                                if line[char_num] == ":":
                                    if len(instance_list) < (this_line_num_dimensions + 1):
                                        instance_list.append([])

                                    instance_list[this_line_num_dimensions].append(pd.Series())
                                    this_line_num_dimensions += 1

                                    has_another_value = False
                                    has_another_dimension = True

                                    timestamps_for_dimension = []
                                    values_for_dimension = []

                                    char_num += 1
                                else:
                                    # Check if we have reached a class label
                                    if line[char_num] != "(" and target_labels:
                                        class_val = line[char_num:].strip()

                                        # if class_val not in class_val_list:
                                        #     raise TsFileParseException(
                                        #         "the class value '" + class_val + "' on line " + str(
                                        #             line_num + 1) + " is not valid")

                                        class_val_list.append(float(class_val))
                                        char_num = line_len

                                        has_another_value = False
                                        has_another_dimension = False

                                        timestamps_for_dimension = []
                                        values_for_dimension = []

                                    else:

                                        # Read in the data contained within the next tuple

                                        if line[char_num] != "(" and not target_labels:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " does not start with a '('")

                                        char_num += 1
                                        tuple_data = ""

                                        while char_num < line_len and line[char_num] != ")":
                                            tuple_data += line[char_num]
                                            char_num += 1

                                        if char_num >= line_len or line[char_num] != ")":
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " does not end with a ')'")

                                        # Read in any spaces immediately after the current tuple

                                        char_num += 1

                                        while char_num < line_len and str.isspace(line[char_num]):
                                            char_num += 1

                                        # Check if there is another value or dimension to process after this tuple

                                        if char_num >= line_len:
                                            has_another_value = False
                                            has_another_dimension = False

                                        elif line[char_num] == ",":
                                            has_another_value = True
                                            has_another_dimension = False

                                        elif line[char_num] == ":":
                                            has_another_value = False
                                            has_another_dimension = True

                                        char_num += 1

                                        # Get the numeric value for the tuple by reading from the end of the tuple data backwards to the last comma

                                        last_comma_index = tuple_data.rfind(',')

                                        if last_comma_index == -1:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains a tuple that has no comma inside of it")

                                        try:
                                            value = tuple_data[last_comma_index + 1:]
                                            value = float(value)

                                        except ValueError:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains a tuple that does not have a valid numeric value")

                                        # Check the type of timestamp that we have

                                        timestamp = tuple_data[0: last_comma_index]

                                        try:
                                            timestamp = int(timestamp)
                                            timestamp_is_int = True
                                            timestamp_is_timestamp = False
                                        except ValueError:
                                            timestamp_is_int = False

                                        if not timestamp_is_int:
                                            try:
                                                timestamp = float(timestamp)
                                                timestamp_is_float = True
                                                timestamp_is_timestamp = False
                                            except ValueError:
                                                timestamp_is_float = False

                                        if not timestamp_is_int and not timestamp_is_float:
                                            try:
                                                timestamp = timestamp.strip()
                                                timestamp_is_timestamp = True
                                            except ValueError:
                                                timestamp_is_timestamp = False

                                        # Make sure that the timestamps in the file (not just this dimension or case) are consistent

                                        if not timestamp_is_timestamp and not timestamp_is_int and not timestamp_is_float:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains a tuple that has an invalid timestamp '" + timestamp + "'")

                                        if previous_timestamp_was_float is not None and previous_timestamp_was_float and not timestamp_is_float:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains tuples where the timestamp format is inconsistent")

                                        if previous_timestamp_was_int is not None and previous_timestamp_was_int and not timestamp_is_int:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains tuples where the timestamp format is inconsistent")

                                        if previous_timestamp_was_timestamp is not None and previous_timestamp_was_timestamp and not timestamp_is_timestamp:
                                            raise TsFileParseException(
                                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                                    line_num + 1) + " contains tuples where the timestamp format is inconsistent")

                                        # Store the values

                                        timestamps_for_dimension += [timestamp]
                                        values_for_dimension += [value]

                                        #  If this was our first tuple then we store the type of timestamp we had

                                        if previous_timestamp_was_timestamp is None and timestamp_is_timestamp:
                                            previous_timestamp_was_timestamp = True
                                            previous_timestamp_was_int = False
                                            previous_timestamp_was_float = False

                                        if previous_timestamp_was_int is None and timestamp_is_int:
                                            previous_timestamp_was_timestamp = False
                                            previous_timestamp_was_int = True
                                            previous_timestamp_was_float = False

                                        if previous_timestamp_was_float is None and timestamp_is_float:
                                            previous_timestamp_was_timestamp = False
                                            previous_timestamp_was_int = False
                                            previous_timestamp_was_float = True

                                        # See if we should add the data for this dimension

                                        if not has_another_value:
                                            if len(instance_list) < (this_line_num_dimensions + 1):
                                                instance_list.append([])

                                            if timestamp_is_timestamp:
                                                timestamps_for_dimension = pd.DatetimeIndex(timestamps_for_dimension)

                                            instance_list[this_line_num_dimensions].append(
                                                pd.Series(index=timestamps_for_dimension, data=values_for_dimension))
                                            this_line_num_dimensions += 1

                                            timestamps_for_dimension = []
                                            values_for_dimension = []

                            elif has_another_value:
                                raise TsFileParseException(
                                    "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                        line_num + 1) + " ends with a ',' that is not followed by another tuple")

                            elif has_another_dimension and target_labels:
                                raise TsFileParseException(
                                    "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                        line_num + 1) + " ends with a ':' while it should list a class value")

                            elif has_another_dimension and not target_labels:
                                if len(instance_list) < (this_line_num_dimensions + 1):
                                    instance_list.append([])

                                instance_list[this_line_num_dimensions].append(pd.Series(dtype=np.float32))
                                this_line_num_dimensions += 1
                                num_dimensions = this_line_num_dimensions

                            # If this is the 1st line of data we have seen then note the dimensions

                            if not has_another_value and not has_another_dimension:
                                if num_dimensions is None:
                                    num_dimensions = this_line_num_dimensions

                                if num_dimensions != this_line_num_dimensions:
                                    raise TsFileParseException("line " + str(
                                        line_num + 1) + " does not have the same number of dimensions as the previous line of data")

                        # Check that we are not expecting some more data, and if not, store that processed above

                        if has_another_value:
                            raise TsFileParseException(
                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                    line_num + 1) + " ends with a ',' that is not followed by another tuple")

                        elif has_another_dimension and target_labels:
                            raise TsFileParseException(
                                "dimension " + str(this_line_num_dimensions + 1) + " on line " + str(
                                    line_num + 1) + " ends with a ':' while it should list a class value")

                        elif has_another_dimension and not target_labels:
                            if len(instance_list) < (this_line_num_dimensions + 1):
                                instance_list.append([])

                            instance_list[this_line_num_dimensions].append(pd.Series())
                            this_line_num_dimensions += 1
                            num_dimensions = this_line_num_dimensions

                        # If this is the 1st line of data we have seen then note the dimensions

                        if not has_another_value and num_dimensions != this_line_num_dimensions:
                            raise TsFileParseException("line " + str(
                                line_num + 1) + " does not have the same number of dimensions as the previous line of data")

                        # Check if we should have class values, and if so that they are contained in those listed in the metadata

                        if target_labels and len(class_val_list) == 0:
                            raise TsFileParseException("the cases have no associated class values")
                    else:
                        dimensions = line.split(":")
                        # If first row then note the number of dimensions (that must be the same for all cases)
                        if is_first_case:
                            num_dimensions = len(dimensions)

                            if target_labels:
                                num_dimensions -= 1

                            for dim in range(0, num_dimensions):
                                instance_list.append([])
                            is_first_case = False

                        # See how many dimensions that the case whose data in represented in this line has
                        this_line_num_dimensions = len(dimensions)

                        if target_labels:
                            this_line_num_dimensions -= 1

                        # All dimensions should be included for all series, even if they are empty
                        if this_line_num_dimensions != num_dimensions:
                            raise TsFileParseException("inconsistent number of dimensions. Expecting " + str(
                                num_dimensions) + " but have read " + str(this_line_num_dimensions))

                        # Process the data for each dimension
                        for dim in range(0, num_dimensions):
                            dimension = dimensions[dim].strip()

                            if dimension:
                                data_series = dimension.split(",")
                                data_series = [float(i) for i in data_series]
                                instance_list[dim].append(pd.Series(data_series))
                            else:
                                instance_list[dim].append(pd.Series())

                        if target_labels:
                            class_val_list.append(float(dimensions[num_dimensions].strip()))

            line_num += 1

    # Check that the file was not empty
    if line_num:
        # Check that the file contained both metadata and data
        complete_regression_meta_data = has_problem_name_tag and has_timestamps_tag and has_univariate_tag and has_target_labels_tag and has_data_tag
        complete_classification_meta_data = has_problem_name_tag and has_timestamps_tag and has_univariate_tag and has_class_labels_tag and has_data_tag

        if metadata_started and not complete_regression_meta_data and not complete_classification_meta_data:
            raise TsFileParseException("metadata incomplete")
        elif metadata_started and not data_started:
            raise TsFileParseException("file contained metadata but no data")
        elif metadata_started and data_started and len(instance_list) == 0:
            raise TsFileParseException("file contained metadata but no data")

        # Create a DataFrame from the data parsed above
        data = pd.DataFrame(dtype=np.float32)

        for dim in range(0, num_dimensions):
            data['dim_' + str(dim)] = instance_list[dim]

        # Check if we should return any associated class labels separately
        if target_labels:
            if return_separate_X_and_y:
                return data, np.asarray(class_val_list)
            else:
                data['class_vals'] = pd.Series(class_val_list)
                return data
        else:
            return data
    else:
        raise TsFileParseException("empty file")

#Data Loader


In [None]:
import numpy as np
import pmdarima as pm
from pyts.approximation import PiecewiseAggregateApproximation
from pyts.image import RecurrencePlot
import time


class DataLoader:
    def __init__(self, path_to_file):
        self.data, _ = load_from_tsfile_to_dataframe(path_to_file=path_to_file)
        self.num_instances, self.num_variables = self.data.shape

        # Use first 20k samples only to save memory
        if self.num_instances > 20000:
            self.data = self.data.iloc[:20000]
            self.num_instances = 20000

        # Get (max) time series length.
        # Handle case where variables do not have the same lengths by taking the max
        # Shorter ones are padded
        self.ts_length = max([len(self.data.iloc[0][i]) for i in range(self.num_variables)])

        # If no. of observations > 2000, use the first 2000 observations only
        self.ts_length = self.ts_length if self.ts_length <= 2000 else 2000

    def load_data(self):
        """
        Prepare numpy array X with shape (num_instances, ts_length, num_variables)
        and Y with shape (num_instances, num_variables)
        """

        # Decrement by 1 since observation s_j, 1 <= j <= t is split as such:
        # X_i = [s_1,...,s_t-1], Y_i = s_t
        X, Y = np.empty((self.num_instances, self.ts_length - 1, self.num_variables)), \
               np.empty((self.num_instances, self.num_variables))

        # For all instance
        start = time.time()
        for idx, row in enumerate(self.data.iterrows()):
            for i in range(self.num_variables):
                # Get current variable's series
                # Apply linear interpolation on missing values
                # Handle case when no. observations > 2000 by enforcing a length slice
                s = row[1][i].interpolate(limit_direction='both').to_numpy()[:self.ts_length]

                # Case when a variable's series has a shorter length
                if s.size != self.ts_length:
                    # Pad beginning with zeros
                    s = np.pad(s, (self.ts_length - s.size, 0), 'constant', constant_values=0.)

                X[idx, :, i] = s[:-1]
                Y[idx, i] = s[-1]
        end = time.time()
        # print(f"Data loaded in {end - start} seconds")

        # Free data variable
        self.data = None

        return X, Y

    def get_residuals(self):
        """
        Get ARIMA residuals of each variable. Used for BDS tests
        """
        # Get time series length.
        # Handle case where variables do not have the same lengths by taking the max
        # Shorter ones are padded
        self.ts_length = max([len(self.data.iloc[0][i]) for i in range(self.num_variables)])

        residuals = np.empty((self.num_variables, self.ts_length))

        # Take a sample. For each variable
        for i in range(self.num_variables):
            # Obtain variable's time series
            sample = self.data.iloc[0][i].interpolate(limit_direction='both').to_numpy()

            # Fit arima and obtain residuals
            model = pm.auto_arima(sample, seasonal=False, start_p=2, max_p=10, max_d=10, max_q=10)
            res = np.array(model.resid())

            # Case when a variable's series has a shorter length
            if res.size != self.ts_length:
                # Pad beginning with zeros
                res = np.pad(res, (self.ts_length - res.size, 0), 'constant', constant_values=0.)

            residuals[i] = res

        return residuals


class CNNDataLoader(DataLoader):
    def __init__(self, path_to_file, img_size):
        super().__init__(path_to_file)
        self.img_size = img_size

    def load_data(self):
        """
        Prepare numpy array X with shape (num_instances, img_size, img_size, num_variables)
        and y with shape (num_instances, num_variables)
        """

        X, Y = np.empty((self.num_instances, self.img_size, self.img_size, self.num_variables)), \
               np.empty((self.num_instances, self.num_variables))
        # print(X.shape)

        # Initialize PAA transformer
        paa = PiecewiseAggregateApproximation(window_size=None, output_size=self.img_size, overlapping=False)
        rp = RecurrencePlot()

        # For all instance
        start = time.time()
        for idx, row in enumerate(self.data.iterrows()):
            for i in range(self.num_variables):
                # Get current variable's series
                # Apply linear interpolation on missing values
                # Handle case when no. observations > 2000 by enforcing a length slice
                s = row[1][i].interpolate(limit_direction='both').to_numpy()[:self.ts_length]

                # Case when a variable's series has a shorter length
                if s.size != self.ts_length:
                    # Pad beginning with zeros
                    s = np.pad(s, (self.ts_length - s.size, 0), 'constant', constant_values=0.)

                # Apply PAA and RP
                X[idx, :, :, i] = rp.transform(paa.transform(np.expand_dims(s[:-1], axis=0)))[0]
                Y[idx, i] = s[-1]
        end = time.time()
        # print(f"Data loaded in {end - start} seconds")

        return X, Y

#Models

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, Input
from tensorflow.keras.layers import Conv1D, Conv2D
from tensorflow.keras.layers import Activation, BatchNormalization, Concatenate, Add, add
from tensorflow.keras.layers import GlobalAveragePooling1D, MaxPool1D, MaxPooling2D
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.models import Model
import tensorflow as tf
import os

use_tpu = True

if use_tpu:
    assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

if 'COLAB_TPU_ADDR' in os.environ:
  TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
else:
  TF_MASTER=''

resolver = tf.distribute.cluster_resolver.TPUClusterResolver(TF_MASTER)
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

class BaseModel:
    def __init__(self, ts_length, num_variables, loss, epochs, batch_size, optimizer):
        self.name = 'Base'
        self.model = None
        self.num_variables = num_variables
        self.ts_length = ts_length
        self.loss = loss
        self.epochs = epochs
        self.batch_size = batch_size
        self.optimizer = optimizer

    def fit(self, dataset):
        X_train, Y_train = dataset['X_train'], dataset['Y_train']
        X_test, Y_test = dataset['X_test'], dataset['Y_test']

        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=50, min_lr=0.0001)
        early_stopping = EarlyStopping(monitor='val_loss', patience=200)

        # Train model
        self.model.fit(X_train, Y_train, epochs=self.epochs, batch_size=self.batch_size*8,
                       verbose=0, validation_split=0.2, callbacks=[reduce_lr, early_stopping])
        _, rmse = self.model.evaluate(X_test, Y_test)

        return rmse

    def build_model(self):
        pass


class CNNModel(BaseModel):
    def __init__(self, img_size, num_variables, loss, epochs, batch_size, optimizer):
        super().__init__(ts_length=None,
                         num_variables=num_variables,
                         loss=loss,
                         epochs=epochs,
                         batch_size=batch_size,
                         optimizer=optimizer)
        self.img_size = img_size
        self.name = 'CNN'
        self.model = self.build_model()

    def build_model(self):
        with strategy.scope():
            height, width, n_channels = self.img_size, self.img_size, self.num_variables

            model = Sequential([
                Conv2D(filters=32, kernel_size=3, padding='same', input_shape=(height, width, n_channels)),
                BatchNormalization(),
                Activation('relu'),
                MaxPooling2D(pool_size=2),
                Conv2D(filters=32, kernel_size=3, padding='same'),
                BatchNormalization(),
                Activation('relu'),
                MaxPooling2D(pool_size=2),
                Dropout(0.25),
                Flatten(),
                Dense(256, activation='relu'),
                Dropout(0.25),
                Dense(self.num_variables, activation='linear')
            ])

            model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[RootMeanSquaredError()])

        return model


class FCNModel(BaseModel):
    def __init__(self, ts_length, num_variables, loss, epochs, batch_size, optimizer):
        super().__init__(ts_length=None,
                         num_variables=num_variables,
                         loss=loss,
                         epochs=epochs,
                         batch_size=batch_size,
                         optimizer=optimizer)
        self.name = 'FCN'
        self.model = self.build_model()

    def build_model(self):
        with strategy.scope():
            input_layer = Input((self.ts_length, self.num_variables))

            conv1 = Conv1D(filters=128, kernel_size=8, padding='same')(input_layer)
            conv1 = BatchNormalization()(conv1)
            conv1 = Activation(activation='relu')(conv1)

            conv2 = Conv1D(filters=256, kernel_size=5, padding='same')(conv1)
            conv2 = BatchNormalization()(conv2)
            conv2 = Activation('relu')(conv2)

            conv3 = Conv1D(128, kernel_size=3, padding='same')(conv2)
            conv3 = BatchNormalization()(conv3)
            conv3 = Activation('relu')(conv3)

            gap_layer = GlobalAveragePooling1D()(conv3)

            output_layer = Dense(self.num_variables, activation='linear')(gap_layer)

            model = Model(inputs=input_layer, outputs=output_layer)

            model.compile(loss=self.loss,
                          optimizer=self.optimizer,
                          metrics=[RootMeanSquaredError()])

        return model


class InceptionTimeModel(BaseModel):
    def __init__(self, ts_length, num_variables, loss, epochs, batch_size, optimizer):
        super().__init__(ts_length=None,
                         num_variables=num_variables,
                         loss=loss,
                         epochs=epochs,
                         batch_size=batch_size,
                         optimizer=optimizer)
        self.name = 'InceptionTime'
        self.nb_filters = 32
        self.use_residual = True
        self.use_bottleneck = True
        self.depth = 6
        self.kernel_size = 40
        self.bottleneck_size = 32
        self.model = self.build_model()

    def _inception_module(self, input_tensor, stride=1, activation='linear'):
        if self.use_bottleneck and int(input_tensor.shape[-1]) > 1:
            input_inception = Conv1D(filters=self.bottleneck_size, kernel_size=1,
                                                  padding='same', activation=activation, use_bias=False)(input_tensor)
        else:
            input_inception = input_tensor

        # kernel_size_s = [3, 5, 8, 11, 17]
        kernel_size_s = [self.kernel_size // (2 ** i) for i in range(3)]

        conv_list = []

        for i in range(len(kernel_size_s)):
            conv_list.append(Conv1D(filters=self.nb_filters, kernel_size=kernel_size_s[i],
                                                 strides=stride, padding='same', activation=activation, use_bias=False)(
                input_inception))

        max_pool_1 = MaxPool1D(pool_size=3, strides=stride, padding='same')(input_tensor)

        conv_6 = Conv1D(filters=self.nb_filters, kernel_size=1,
                                     padding='same', activation=activation, use_bias=False)(max_pool_1)

        conv_list.append(conv_6)

        x = Concatenate(axis=2)(conv_list)
        x = BatchNormalization()(x)
        x = Activation(activation='relu')(x)
        return x

    def _shortcut_layer(self, input_tensor, out_tensor):
        shortcut_y = Conv1D(filters=int(out_tensor.shape[-1]), kernel_size=1,
                                         padding='same', use_bias=False)(input_tensor)
        shortcut_y = BatchNormalization()(shortcut_y)

        x = Add()([shortcut_y, out_tensor])
        x = Activation('relu')(x)
        return x

    def build_model(self):
        with strategy.scope():
            input_layer = Input((self.ts_length, self.num_variables))

            x = input_layer
            input_res = input_layer

            for d in range(self.depth):
                x = self._inception_module(x)
                if self.use_residual and d % 3 == 2:
                    x = self._shortcut_layer(input_res, x)
                    input_res = x

            gap_layer = GlobalAveragePooling1D()(x)
            output_layer = Dense(self.num_variables, activation='linear')(gap_layer)

            model = Model(inputs=input_layer, outputs=output_layer)
            model.compile(loss=self.loss,
                          optimizer=self.optimizer,
                          metrics=[RootMeanSquaredError()])

        return model


class ResNetModel(BaseModel):
    def __init__(self, ts_length, num_variables, loss, epochs, batch_size, optimizer):
        super().__init__(ts_length=None,
                         num_variables=num_variables,
                         loss=loss,
                         epochs=epochs,
                         batch_size=batch_size,
                         optimizer=optimizer)
        self.name = 'ResNet'
        self.model = self.build_model()

    def build_model(self):
        with strategy.scope():
            n_feature_maps = 64
            input_layer = Input((self.ts_length, self.num_variables))

            # BLOCK 1
            conv_x = Conv1D(filters=n_feature_maps, kernel_size=8, padding='same')(input_layer)
            conv_x = BatchNormalization()(conv_x)
            conv_x = Activation('relu')(conv_x)

            conv_y = Conv1D(filters=n_feature_maps, kernel_size=5, padding='same')(conv_x)
            conv_y = BatchNormalization()(conv_y)
            conv_y = Activation('relu')(conv_y)

            conv_z = Conv1D(filters=n_feature_maps, kernel_size=3, padding='same')(conv_y)
            conv_z = BatchNormalization()(conv_z)

            # expand channels for the sum
            shortcut_y = Conv1D(filters=n_feature_maps, kernel_size=1, padding='same')(input_layer)
            shortcut_y = BatchNormalization()(shortcut_y)

            output_block_1 = add([shortcut_y, conv_z])
            output_block_1 = Activation('relu')(output_block_1)

            # BLOCK 2
            conv_x = Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_1)
            conv_x = BatchNormalization()(conv_x)
            conv_x = Activation('relu')(conv_x)

            conv_y = Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
            conv_y = BatchNormalization()(conv_y)
            conv_y = Activation('relu')(conv_y)

            conv_z = Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
            conv_z = BatchNormalization()(conv_z)

            # expand channels for the sum
            shortcut_y = Conv1D(filters=n_feature_maps * 2, kernel_size=1, padding='same')(output_block_1)
            shortcut_y = BatchNormalization()(shortcut_y)

            output_block_2 = add([shortcut_y, conv_z])
            output_block_2 = Activation('relu')(output_block_2)

            # BLOCK 3
            conv_x = Conv1D(filters=n_feature_maps * 2, kernel_size=8, padding='same')(output_block_2)
            conv_x = BatchNormalization()(conv_x)
            conv_x = Activation('relu')(conv_x)

            conv_y = Conv1D(filters=n_feature_maps * 2, kernel_size=5, padding='same')(conv_x)
            conv_y = BatchNormalization()(conv_y)
            conv_y = Activation('relu')(conv_y)

            conv_z = Conv1D(filters=n_feature_maps * 2, kernel_size=3, padding='same')(conv_y)
            conv_z = BatchNormalization()(conv_z)

            # no need to expand channels because they are equal
            shortcut_y = BatchNormalization()(output_block_2)

            output_block_3 = add([shortcut_y, conv_z])
            output_block_3 = Activation('relu')(output_block_3)

            # FINAL
            gap_layer = GlobalAveragePooling1D()(output_block_3)
            output_layer = Dense(self.num_variables, activation='linear')(gap_layer)

            model = Model(inputs=input_layer, outputs=output_layer)
            model.compile(loss=self.loss,
                          optimizer=self.optimizer,
                          metrics=[RootMeanSquaredError()])

        return model

INFO:tensorflow:Initializing the TPU system: grpc://10.86.43.42:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.86.43.42:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


#Run Models

In [None]:
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
import numpy as np
import logging
import csv
import gc

NUM_ITERS = 5
IMG_SIZE = [16, 32, 64]

if __name__ == '__main__':
    regression_datasets = ["PPGDalia", "BIDMC32HR"]

    for name in regression_datasets:
        for curr_size in IMG_SIZE:
            data_train = CNNDataLoader(path_to_file="/content/gdrive/My Drive/rcnn/dataset/"
                                                    f"{name}/{name}_TRAIN.ts", img_size=curr_size)

            data_test = CNNDataLoader(path_to_file="/content/gdrive/My Drive/rcnn/dataset/"
                                                   f"{name}/{name}_TEST.ts", img_size=curr_size)

            X_train, Y_train = data_train.load_data()
            X_test, Y_test = data_test.load_data()

            dataset = {
                "X_train": X_train, "Y_train": Y_train,
                "X_test": X_test, "Y_test": Y_test
            }
            with open('/content/gdrive/My Drive/rcnn/results/results.csv', 'a', newline='') as file:
                writer = csv.writer(file)
                for i in range(NUM_ITERS):
                    K.clear_session()
                    cnn = CNNModel(img_size=curr_size, num_variables=data_train.num_variables,
                                   loss="mean_squared_error", epochs=1000, batch_size=128,
                                   optimizer=Adam())
                    result = np.mean(cnn.fit(dataset))
                    del cnn
                    gc.collect()

                    print(f"{name},CNN-{curr_size},{i + 1},{result}")
                    writer.writerow([name, f"CNN-{curr_size}", i+1, result])

Instructions for updating:
Use `tf.data.Iterator.get_next_as_optional()` instead.


Instructions for updating:
Use `tf.data.Iterator.get_next_as_optional()` instead.






PPGDalia,CNN-16,1,54.97922897338867




PPGDalia,CNN-16,2,54.714752197265625




PPGDalia,CNN-16,3,54.989158630371094




PPGDalia,CNN-16,4,55.278831481933594




PPGDalia,CNN-16,5,55.084716796875




PPGDalia,CNN-32,1,51.40248107910156




PPGDalia,CNN-32,2,51.364444732666016




PPGDalia,CNN-32,3,51.744354248046875




PPGDalia,CNN-32,4,52.24946594238281




PPGDalia,CNN-32,5,51.674537658691406




PPGDalia,CNN-64,1,43.71049880981445




PPGDalia,CNN-64,2,43.49440383911133




PPGDalia,CNN-64,3,43.44133758544922




PPGDalia,CNN-64,4,44.0413703918457




PPGDalia,CNN-64,5,43.619384765625




BIDMC32HR,CNN-16,1,0.3491670489311218




BIDMC32HR,CNN-16,2,0.37040603160858154




BIDMC32HR,CNN-16,3,0.3555140793323517




BIDMC32HR,CNN-16,4,0.3522086441516876




BIDMC32HR,CNN-16,5,0.3537915349006653




BIDMC32HR,CNN-32,1,0.343741774559021




BIDMC32HR,CNN-32,2,0.34539490938186646




BIDMC32HR,CNN-32,3,0.3415064811706543




BIDMC32HR,CNN-32,4,0.33947911858558655




BIDMC32HR,CNN-32,5,0.3447003960609436




BIDMC32HR,CNN-64,1,0.4374832808971405




BIDMC32HR,CNN-64,2,0.31123000383377075




BIDMC32HR,CNN-64,3,0.31233420968055725




BIDMC32HR,CNN-64,4,0.32552996277809143




BIDMC32HR,CNN-64,5,0.43781137466430664
