In [None]:
from utils_deepcog import *
import multiprocessing
from multiprocessing import Pool
from datetime import datetime

os.environ['CUDA_VISIBLE_DEVICES'] = "1"  # Set the GPU card to use
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Or '3' for FATAL logs only
np.set_printoptions(suppress=True)
physical_devices = tf.config.list_physical_devices('GPU')
for gpu_instance in physical_devices:
    tf.config.experimental.set_memory_growth(gpu_instance, True)


In [None]:
def load_test_data(city, output, CELL):
    test_X = np.load(f'../../../../oracle-data/serly/TMC_data/Trained_models/{city}/Data_reshaped/test_{CELL}.npy') #TO DO change the cell for multiple cells
    test_Y = np.load(f'../../../../oracle-data/serly/TMC_data/Trained_models/{city}/Data_reshaped/test_{CELL}_Y.npy') #TO DO change the cell for multiple cells
    return test_X, test_Y

city = 'Milan'
nr = 21
lookback = 3
epochs = 20
batchsize = 32
# Load cells based on the city
cells = get_rows_Milan(5060, nr)

output_directory = '../../../../oracle-data/serly/Scalable_dnn/PerBS/'
model_dir = f'../../../../oracle-data/serly/Scalable_dnn/Trained_models/Per_BS/LSTM/{city}/'
os.makedirs(model_dir, exist_ok=True)

In [None]:
def preprocess_data(cell, output_directory, city, lookback, data_type="train"):
    """
    Preprocesses the time series data for a cell, applying normalization and 
    constructing the input X with the given lookback window.

    Args:
        cell (int): Identifier for the cell.
        output_directory (str): Directory where the data is stored.
        city (str): City name to structure the path.
        lookback (int): Number of past time steps to include as input features.
        nr (int): Not used but can be removed or modified as needed.
        data_type (str): Either "train" or "test" to specify data type.

    Returns:
        X (np.ndarray): Preprocessed inputs with shape (n_samples, lookback, 1).
        y (np.ndarray): Corresponding targets with shape (n_samples, 1).
        scaler (MinMaxScaler): Fitted scaler object for inverse normalization.
    """
    X, y = [], []

    # Load the time series data for the cell (train or test depending on data_type)
    file_path = os.path.join(output_directory, f'{city}/{data_type}_{cell}.npy')
    ts_data = np.load(file_path)

    # Normalize the time series data using MinMaxScaler
    scaler = MinMaxScaler(feature_range=(0, 1))
    ts_data = ts_data.reshape(-1, 1)  # Reshape to 2D array for the scaler
    ts_data_normalized = scaler.fit_transform(ts_data).flatten()

    # Create the lookback data
    for t in range(len(ts_data_normalized) - lookback):
        X.append(ts_data_normalized[t:t + lookback])
        y.append(ts_data_normalized[t + lookback])

    X = np.array(X).reshape(-1, lookback, 1)  # Reshape to (n_samples, lookback, 1)
    y = np.array(y).reshape(-1, 1)  # Reshape to (n_samples, 1)

    return X, y, scaler


Train

In [None]:
for cell in cells:
    test_X, test_Y, scaler = preprocess_data(cell, output_directory, city, lookback, data_type="train")
    print(test_X.shape, test_Y.shape)
    # train lstm model
    model = model_lstm(lookback, 1, 1)
    model.fit(test_X, test_Y, epochs=epochs, batch_size=batchsize, verbose=0)
    # save model
    model.save(f'{model_dir}/model_{cell}.h5')

Evaluate

In [6]:
MAE_errors = []
for cell in cells:
    test_X, test_Y, scaler = preprocess_data(cell, output_directory, city, lookback, data_type="test")
    length = 1780 if city == 'Milan' else 400
    test_X = test_X[:length]
    test_Y = test_Y[:length]
    # load model
    model = tf.keras.models.load_model(f'{model_dir}/model_{cell}.h5')
    # evaluate model
    predicted = model.predict(test_X)
    # invert normalization
    predicted = scaler.inverse_transform(predicted)
    error = mean_absolute_error(test_Y, predicted)
    print(f'Cell {cell} MAE: {error}')
    # print(f'Cell {cell} MAE: {error}')
    MAE_errors = np.append(MAE_errors, error)

print(f'MAE: {(MAE_errors)}')

Cell 4050 MAE: 89.54975066533576
Cell 4051 MAE: 50.124027435375844
Cell 4052 MAE: 75.54171878020459
Cell 4053 MAE: 132.2836965091157
Cell 4054 MAE: 134.67332462503276
Cell 4055 MAE: 139.2565536129272
Cell 4056 MAE: 163.15047318725985
Cell 4057 MAE: 148.78507439449007
Cell 4058 MAE: 141.66240676841414
Cell 4059 MAE: 149.15837818403062
Cell 4060 MAE: 153.65989773649488
Cell 4061 MAE: 103.95494929577188
Cell 4062 MAE: 121.85878572322298
Cell 4063 MAE: 126.29539761943802
Cell 4064 MAE: 87.23561591041847
Cell 4065 MAE: 44.822639707237855
Cell 4066 MAE: 22.69729307700464
Cell 4067 MAE: 60.60095270331902
Cell 4068 MAE: 249.75028090744752
Cell 4069 MAE: 232.76509584753197
Cell 4070 MAE: 53.19058262891535
Cell 4150 MAE: 109.2593061671139
Cell 4151 MAE: 90.10898621466826
Cell 4152 MAE: 93.26711035709222
Cell 4153 MAE: 122.61156361836301
Cell 4154 MAE: 193.93068436365002
Cell 4155 MAE: 237.0892613467798
Cell 4156 MAE: 247.1747840402438
Cell 4157 MAE: 198.9817046023857
Cell 4158 MAE: 145.657238015

In [7]:
MAE_errors_sum = np.mean(MAE_errors)
print(f'Total MAE: {MAE_errors_sum}')

MAE_errors_sum = "{:.2e}".format(MAE_errors_sum)
print(f'Total MAE: {MAE_errors_sum}')

Total MAE: 204.82752643549637
Total MAE: 2.05e+02
