## Importing libraries

In [2]:
# Fix randomness and hide warnings
seed = 42

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ['MPLCONFIGDIR'] = os.getcwd()+'/configs/'

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=Warning)

import numpy as np
np.random.seed(seed)

import logging

import random
random.seed(seed)

In [3]:
# Import tensorflow
import tensorflow as tf
from tensorflow import keras as tfk
from tensorflow.keras import layers as tfkl
tf.autograph.set_verbosity(0)
tf.get_logger().setLevel(logging.ERROR)
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
tf.random.set_seed(seed)
tf.compat.v1.set_random_seed(seed)
print(tf.__version__)

2.13.0


In [4]:
import pandas as pd
import seaborn as sns

from datetime import datetime
import matplotlib.pyplot as plt
plt.rc('font', size=16)
from sklearn.preprocessing import MinMaxScaler

Matplotlib is building the font cache; this may take a moment.


In [6]:
# Load dataset
categories = np.load('training_dataset_preprocessed/categories_preprocessed.npy')
data_train = np.load('training_dataset_preprocessed/training_data_preprocessed.npy')
valid_periods = np.load('training_dataset_preprocessed/valid_periods_preprocessed.npy')

In [49]:
def normalize_window(windows_batch, return_minmax= False):
    single_sample = False
    if len(windows_batch.shape) == 2:
        single_sample = True

    # Squeeze last dimension
    windows_batch = np.squeeze(windows_batch)
    batch_size = len(windows_batch)

    # For each sample compute min and max value
    if single_sample:
        minmax_batch_array = np.array([[np.min(windows_batch)], [np.max(windows_batch)]])
    else:
        minmax_batch_array = np.array([[np.min(windows_batch[i]), np.max(windows_batch[i])] for i in range(batch_size)])
   
    # Perform min-max scaling for each sample
    if single_sample:
        scaled_batch = np.array([(windows_batch - minmax_batch_array[0]) / (minmax_batch_array[1] - minmax_batch_array[0])])
    else:
        scaled_batch = np.array([(windows_batch[i] - minmax_batch_array[i, 0]) / (minmax_batch_array[i, 1] - minmax_batch_array[i, 0]) for i in range(batch_size)])

    # Return results
    if return_minmax:
        return scaled_batch, minmax_batch_array
    else:
        return scaled_batch

In [14]:
data_train.shape

(47974, 2776, 1)

In [19]:
data_try = data_train[:3, -10:, :]
data_try.shape

(3, 10, 1)

In [27]:
data_try =np.squeeze(data_try)

In [28]:
data_try.shape

(10,)

In [14]:
len(data_try)

10

In [16]:
data_try[0]

0.9955514636286977

In [30]:
min_max_batch_array = np.array([[np.min(data_try), np.max(data_try)]])
min_max_batch_array

array([[0.60750486, 0.99555146]])

In [31]:
min_max_batch_array.shape

(1, 2)

In [73]:
data_try.shape

(3, 10)

In [78]:
data_try[2]

array([1.        , 0.54478243, 0.50786404, 0.47132041, 0.36748344,
       0.65839054, 0.3196237 , 0.54485458, 0.53497663, 0.45541526])

In [79]:
min_max_batch_array[2,0]

0.31962369545603175

In [77]:
data_try[2] - min_max_batch_array[2,0]

array([0.6803763 , 0.22515874, 0.18824034, 0.15169672, 0.04785975,
       0.33876684, 0.        , 0.22523088, 0.21535293, 0.13579156])

In [81]:
scaled_batch = np.array([(data_try[i] - min_max_batch_array[i,0]) / (min_max_batch_array[i,1] - min_max_batch_array[i,0]) for i in range(len(data_try))])
scaled_batch

array([[1.        , 0.57004745, 0.52020045, 0.57004745, 0.34990465,
        0.52020045, 0.        , 0.34990465, 0.04867178, 0.37653554],
       [1.        , 0.58326807, 0.33853142, 0.58326807, 0.33751611,
        0.33853142, 0.        , 0.33751611, 0.20274657, 0.34324533],
       [1.        , 0.33093266, 0.27667093, 0.22296002, 0.07034305,
        0.497911  , 0.        , 0.33103869, 0.31652033, 0.19958303]])

In [82]:
scaled_batch.shape

(3, 10)

In [50]:
data_try = data_train[0, -10:, :]
data_try.shape

(10, 1)

In [51]:
new_data, minmax = normalize_window(data_try, return_minmax= True)

True
10


In [52]:
data_try

array([[0.99555146],
       [0.82870984],
       [0.80936688],
       [0.82870984],
       [0.74328417],
       [0.80936688],
       [0.60750486],
       [0.74328417],
       [0.62639178],
       [0.7536182 ]])

In [53]:
new_data

array([[1.        , 0.57004745, 0.52020045, 0.57004745, 0.34990465,
        0.52020045, 0.        , 0.34990465, 0.04867178, 0.37653554]])

In [92]:
minmax

array([[0.60750486, 0.99555146],
       [0.27479158, 1.        ],
       [0.3196237 , 1.        ]])