In [1]:
import pickle
import pandas as pd

NUM_FEATURES = 2 # minimum and maximum temp

# Function to load data
def load_data(picklefile):
    f = open(picklefile, "rb")
    dict = pickle.load(f)
    f.close()
    return dict["Perth_Airport"], dict["Perth_Metro"]
PerthA_df, PerthM_df = load_data("temperatures.pkl")

In [2]:
PerthA_df.columns = ["min", "max"] #renaming column for easy of use
PerthM_df.columns = ["min", "max"]
PerthA_df.head()

Unnamed: 0_level_0,min,max
date,Unnamed: 1_level_1,Unnamed: 2_level_1
1944-06,8.1,20.4
1944-07,7.7,18.3
1944-08,7.1,19.5
1944-09,7.9,20.3
1944-10,10.5,24.6


In [3]:
import tensorflow as tf

def split_data(df, inputs_length, targets_length, batch_size=32, shuffle=True, seed=42):
    # an inner function to do the splitting
    def split_inputs_and_targets(tf_ds):
        return tf_ds[:, :-targets_length], tf_ds[:, -targets_length:]
    if batch_size == -1:
        batch_size = df.shape[0]
    return tf.keras.utils.timeseries_dataset_from_array(
        tf.convert_to_tensor(df, dtype=tf.float32),
        targets = None,
        sequence_length = inputs_length + targets_length,
        batch_size = batch_size,
        shuffle = shuffle,
        seed = seed
    ).map(split_inputs_and_targets)

In [4]:
import numpy as np
import matplotlib.pyplot as plt
import random

In [5]:
from sklearn.preprocessing import MinMaxScaler

# Initialize the scaler
scaler = MinMaxScaler()

# Fit and transform the training data
PerthA_scaled = scaler.fit_transform(PerthA_df)
PerthA_df_scaled = pd.DataFrame(PerthA_scaled, index=PerthA_df.index, columns=PerthA_df.columns)

# Apply the same transformation to validation and test data
PerthM_scaled = scaler.transform(PerthM_df)
PerthM_df_scaled = pd.DataFrame(PerthM_scaled, index=PerthM_df.index, columns=PerthM_df.columns)


In [6]:
def prepare_datasets(inputs_length, targets_length, batch_size_train=32, model_type='model1'):
    """
    Prepares training, validation, and test datasets with specified input and target lengths.

    Parameters:
    - inputs_length: Length of input sequences.
    - targets_length: Length of target sequences.
    - batch_size_train: Batch size for the training dataset.
    - model_type: 'model1' for Models 1 and 2, 'model3' for the Encoder-Decoder model.

    Returns:
    - train_ds, val_ds, test_ds: TensorFlow datasets.
    """
    # Select the appropriate data preparation function based on model_type
    if model_type == 'model1':
        split_fn = split_data  # For Models 1 and 2
    elif model_type == 'model3':
        split_fn = prepare_encoder_decoder_data_internal  # For Model 3
    else:
        raise ValueError("Invalid model_type. Use 'model1' or 'model3'.")

    # Training dataset
    train_ds = split_fn(
        df=PerthA_df_scaled,
        inputs_length=inputs_length,
        targets_length=targets_length,
        batch_size=batch_size_train,
        shuffle=True,
        seed=42
    )

    # Validation dataset
    val_ds = split_fn(
        df=PerthM_df_scaled.loc['1994-01':'2013-12'],
        inputs_length=inputs_length,
        targets_length=targets_length,
        batch_size=-1,
        shuffle=False
    )

    # Test dataset
    test_ds = split_fn(
        df=PerthM_df_scaled.loc['2014-01':],
        inputs_length=inputs_length,
        targets_length=targets_length,
        batch_size=-1,
        shuffle=False
    )

    return train_ds, val_ds, test_ds
