In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

import os

# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

import pandas as pd
import numpy as np
pd.set_option('display.max_columns', None)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt

import scipy.stats as stats

import seaborn as sns

from datetime import datetime

import re

from tqdm.notebook import tqdm

import dask.dataframe as dd
from dask.distributed import Client

sys.path.insert(0, 'tools/')

from tools import * 

In [2]:
client = Client()
client

0,1
Connection method: Cluster object,Cluster type: distributed.LocalCluster
Dashboard: http://127.0.0.1:8787/status,

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 5
Total threads: 20,Total memory: 31.04 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:41131,Workers: 5
Dashboard: http://127.0.0.1:8787/status,Total threads: 20
Started: Just now,Total memory: 31.04 GiB

0,1
Comm: tcp://127.0.0.1:44837,Total threads: 4
Dashboard: http://127.0.0.1:44335/status,Memory: 6.21 GiB
Nanny: tcp://127.0.0.1:45909,
Local directory: /tmp/dask-scratch-space/worker-l05x2rhz,Local directory: /tmp/dask-scratch-space/worker-l05x2rhz

0,1
Comm: tcp://127.0.0.1:40797,Total threads: 4
Dashboard: http://127.0.0.1:33565/status,Memory: 6.21 GiB
Nanny: tcp://127.0.0.1:33445,
Local directory: /tmp/dask-scratch-space/worker-bkkvbaj3,Local directory: /tmp/dask-scratch-space/worker-bkkvbaj3

0,1
Comm: tcp://127.0.0.1:33237,Total threads: 4
Dashboard: http://127.0.0.1:33337/status,Memory: 6.21 GiB
Nanny: tcp://127.0.0.1:42455,
Local directory: /tmp/dask-scratch-space/worker-wbuof2kt,Local directory: /tmp/dask-scratch-space/worker-wbuof2kt

0,1
Comm: tcp://127.0.0.1:43767,Total threads: 4
Dashboard: http://127.0.0.1:37575/status,Memory: 6.21 GiB
Nanny: tcp://127.0.0.1:43607,
Local directory: /tmp/dask-scratch-space/worker-e9uaf7w8,Local directory: /tmp/dask-scratch-space/worker-e9uaf7w8

0,1
Comm: tcp://127.0.0.1:40999,Total threads: 4
Dashboard: http://127.0.0.1:42501/status,Memory: 6.21 GiB
Nanny: tcp://127.0.0.1:36985,
Local directory: /tmp/dask-scratch-space/worker-8t237xj7,Local directory: /tmp/dask-scratch-space/worker-8t237xj7


# Merge

In [3]:
config = pd.Series({
    'path': os.path.abspath('dades'),
    'file_type':'csv',
    'years':[2019, 2021, 2022],
    'dataset': 'BicingNou_ESTACIONS_MOD'
})

In [4]:
%%time

def read_data_all(cnfg:dict):

    data = dict()

    for year in cnfg.years:
        cnfg['year'] = year
        data[year] = load_checkpoint(cnfg)
    
    return dd.concat(list(data.values()), interleave_partitions=False)
    
bbdd_completa = read_data_all(config)   

checkpoint reloaded.
checkpoint reloaded.
checkpoint reloaded.
CPU times: user 53 ms, sys: 9.01 ms, total: 62 ms
Wall time: 59.2 ms


In [5]:
%%time

index=0
before = get_ddf_shape(bbdd_completa)
print(before)

index+=1
print(index)
# 1 Error fix. # 2019 gener, febrer and marc have the status reversed
cond = (bbdd_completa.year == 2019) & (bbdd_completa.month.isin([1,2,3]))
bbdd_completa['status'] = bbdd_completa['status'].mask(cond, ((bbdd_completa['status'] + 1) %2))
index+=1
print(index)
# 2 Remove not needed status
cond = (bbdd_completa['status'].isin([2.0,3.0]))
bbdd_completa = bbdd_completa.mask(cond, np.nan)
bbdd_completa = bbdd_completa.dropna(subset=['status'])
index+=1
print(index)
# 3 Remove data from 2020
cond = (bbdd_completa['year'].isin([2020]))
bbdd_completa = bbdd_completa.mask(cond, np.nan)
bbdd_completa = bbdd_completa.dropna(subset=['year'])
index+=1
print(index)
# 4 2019 to 2020 to have continuos data 
cond = (bbdd_completa['year'].isin([2019]))
bbdd_completa['year'] = bbdd_completa['year'].mask(cond, bbdd_completa['year']+1)
index+=1
print(index)
# 5 status reversed 
bbdd_completa['status'] = (bbdd_completa['status'] + 1)%2
index+=1
print(index)
# 6 Column has all ones
bbdd_completa = bbdd_completa.drop(columns=['is_installed'])
index+=1
print(index)
# 7 Column has all ones
bbdd_completa = bbdd_completa.drop(columns=['is_charging_station'])
index+=1
print(index)
# 8 Remove row where status = closed and is renting and is returning
cond = (bbdd_completa['status'].isin([0.0])) & (bbdd_completa['is_renting'].isin([1.0])) & (bbdd_completa['is_returning'].isin([1.0]))
bbdd_completa = bbdd_completa.mask(cond, np.nan)
bbdd_completa = bbdd_completa.dropna(subset=['status'])

after = get_ddf_shape(bbdd_completa)
print(after)

print('Changes to dataframe durinf preprocessing')
print(f'dropeed {(after[0]-before[0]):02d} rows')
print(f'dropped {(after[1]-before[1]):02d} columns')

(12419994, 23)
1
2
3
4
5
6
7
8
(11984228, 21)
Changes to dataframe durinf preprocessing
dropeed -435766 rows
dropped -2 columns
CPU times: user 4.23 s, sys: 146 ms, total: 4.38 s
Wall time: 17.7 s


In [6]:
%%time

unique_ids_by_year = bbdd_completa.groupby('year')['station_id'].unique().compute()
print(unique_ids_by_year)

# Calcula los station_id comunes en todos los años
common_ids = set(unique_ids_by_year[2020])
for year, ids in unique_ids_by_year.items():
    print(year, ids.shape)
    common_ids = common_ids.intersection(set(ids))
# common_ids
print(len(common_ids))

filtered_bbdd = bbdd_completa[bbdd_completa['station_id'].isin(common_ids)]
nonfiltered_bbdd = bbdd_completa[~bbdd_completa['station_id'].isin(common_ids)]

year
2020.0    [1.0, 320.0, 319.0, 318.0, 317.0, 316.0, 315.0...
2021.0    [1.0, 271.0, 400.0, 510.0, 26.0, 270.0, 269.0,...
2022.0    [1.0, 380.0, 28.0, 379.0, 378.0, 377.0, 376.0,...
Name: station_id, dtype: object
2020.0 (410,)
2021.0 (509,)
2022.0 (510,)
408
CPU times: user 1.45 s, sys: 111 ms, total: 1.56 s
Wall time: 10.4 s


In [7]:
%%time

before = get_ddf_shape(filtered_bbdd)
print(before)

filtered_bbdd = filtered_bbdd.drop_duplicates(subset=['timestamp', 'station_id'], keep='last')

after = get_ddf_shape(filtered_bbdd)
print(after)

print('Changes to dataframe durinf preprocessing')
print(f'dropeed {(after[0]-before[0]):02d} rows')
print(f'dropped {(after[1]-before[1]):02d} columns')

(10194539, 21)
(10100285, 21)
Changes to dataframe durinf preprocessing
dropeed -94254 rows
dropped 00 columns
CPU times: user 7.15 s, sys: 555 ms, total: 7.7 s
Wall time: 28.2 s


In [8]:
%%time

before = get_ddf_shape(nonfiltered_bbdd)
print(before)

nonfiltered_bbdd = nonfiltered_bbdd.drop_duplicates(subset=['timestamp', 'station_id'], keep='last')

after = get_ddf_shape(nonfiltered_bbdd)
print(after)

print('Changes to dataframe durinf preprocessing')
print(f'dropeed {(after[0]-before[0]):02d} rows')
print(f'dropped {(after[1]-before[1]):02d} columns')

(1789689, 21)
(1769381, 21)
Changes to dataframe durinf preprocessing
dropeed -20308 rows
dropped 00 columns
CPU times: user 7.02 s, sys: 455 ms, total: 7.47 s
Wall time: 25.4 s


In [9]:
bbdd_completa = None

In [10]:
filtered_bbdd.year.unique().compute()

0    2020.0
1    2021.0
2    2022.0
Name: year, dtype: float64

# Predicción__________________________________
a) Regresión lineal: relación lineal entre las variables de entrada y la variable de salida. 

b) Regresión Redes Neuronales (RNN -redes neuronales recurrentes-): pueden capturar relaciones no lineales entre las variables de entrada y salida.

In [11]:
import os
import csv
import pandas as pd
import random
import numpy as np
import tensorflow as tf
import warnings
warnings.simplefilter('ignore')

2023-06-04 18:37:43.715710: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-04 18:37:44.011976: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [12]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import cross_val_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.model_selection import validation_curve
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import mean_absolute_error

In [13]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [14]:
class Config:
    num_attribs0 = ['ctx1', 'ctx2', 'ctx3', 'ctx4']
    cat_attribs0 = ['station_id', 'month', 'dayofweek', 'day', 'dayofyear', 'hour', 'status', 'is_renting', 'is_returning']
    target_col = ['ctx0']
    
    epochs=5
    batch_size=128
    seed=42
    embedding_dim=8
    lr=1e-4

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'

    tf.config.threading.set_inter_op_parallelism_threads(1)
    tf.config.threading.set_intra_op_parallelism_threads(1)
    

In [15]:
config=Config()
seed_everything(config.seed)

In [16]:
filtered_bbdd.columns

Index(['station_id', 'num_docks_available', 'num_bikes_available', 'status',
       'timestamp', 'is_renting', 'is_returning',
       'num_bikes_available_types.mechanical',
       'num_bikes_available_types.ebike', 'year', 'month', 'dayofweek', 'day',
       'dayofyear', 'hour', 'capacity', 'ctx0', 'ctx1', 'ctx2', 'ctx3',
       'ctx4'],
      dtype='object')

In [17]:
%%time 

x_train = filtered_bbdd.compute().copy()
x_test = nonfiltered_bbdd.compute().copy()

y_train, y_test = x_train[config.target_col[0]].copy(), x_test[config.target_col[0]].copy()


CPU times: user 4.63 s, sys: 2.15 s, total: 6.78 s
Wall time: 29.8 s


In [18]:
import ctypes

def trim_memory() -> int:
    libc = ctypes.CDLL("libc.so.6")
    return libc.malloc_trim(0)

client.run(trim_memory)

{'tcp://127.0.0.1:33237': 1,
 'tcp://127.0.0.1:40797': 1,
 'tcp://127.0.0.1:40999': 1,
 'tcp://127.0.0.1:43767': 1,
 'tcp://127.0.0.1:44837': 1}

## NN 

In [19]:
import tensorflow as tf
from tensorflow import keras
print(keras.__version__)
print(tf.__version__)
print(tf.test.gpu_device_name())

2.12.0
2.12.0
/device:GPU:0


2023-06-04 18:38:18.745349: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /device:GPU:0 with 9870 MB memory:  -> device: 0, name: NVIDIA RTX A2000 12GB, pci bus id: 0000:68:00.0, compute capability: 8.6


In [20]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2023-06-04 18:38:18.761462: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /device:GPU:0 with 9870 MB memory:  -> device: 0, name: NVIDIA RTX A2000 12GB, pci bus id: 0000:68:00.0, compute capability: 8.6


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 13181032581964556710
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 10349838336
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 11707757449223719088
 physical_device_desc: "device: 0, name: NVIDIA RTX A2000 12GB, pci bus id: 0000:68:00.0, compute capability: 8.6"
 xla_global_id: 416903419]

In [21]:
# clear previous session
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

In [22]:
from keras import backend as K
# Configuring a session
session_conf = tf.compat.v1.ConfigProto(
    intra_op_parallelism_threads=3,
    inter_op_parallelism_threads=3
)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
#K.set_session(sess)
tf.compat.v1.keras.backend.set_session(sess)

2023-06-04 18:38:18.798062: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9870 MB memory:  -> device: 0, name: NVIDIA RTX A2000 12GB, pci bus id: 0000:68:00.0, compute capability: 8.6





In [23]:
from keras.callbacks_v1 import TensorBoard
from keras.callbacks import ReduceLROnPlateau

In [24]:
def root_mean_squared_error(y_true, y_pred):
    y = K.cast(y_true, K.np.float32)
    y_hat = K.cast(y_pred, K.np.float32)
    return K.sqrt(K.mean(K.square(y_hat - y)))

In [25]:

def build_preprocessor(config): 
    
    num_attribs0 = config.num_attribs0 # ['year', 'ctx1', 'ctx2', 'ctx3', 'ctx4']
    cat_attribs0 = config.cat_attribs0 #['month', 'dayofweek', 'day', 'dayofyear', 'hour' ]
    
    num_transformer0 = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="mean")),
        ('std_scaler', (StandardScaler())),
    ])

    categorical_transformer0 = Pipeline(steps=[
        ("imputer", SimpleImputer(strategy="constant",fill_value=np.nan)),
        ('ordinal_encoder', (OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=np.nan))),
    ])
    
    preprocessor = ColumnTransformer(
        transformers=[
            ("num0", num_transformer0, num_attribs0),
            ("cat0", categorical_transformer0, cat_attribs0),
        ],
        remainder="drop"
    )
    
    return preprocessor

In [26]:
# Build pipeline
pp = build_preprocessor(config)
pp.fit(x_train)

In [27]:
from tensorflow.keras.layers import Input, Embedding, Dense, Flatten, add, Activation, dot
from tensorflow.keras.models import Model
from tensorflow.keras.regularizers import l2 as l2_reg
from tensorflow.python.keras.utils.vis_utils import plot_model
from tensorflow.keras.callbacks import EarlyStopping

import itertools

def build_model(category_num, category_cols, num_cols, K=8, l2=0.0, l2_fm=0.0):

    # Numerical features
    num_inputs = [Input(shape=(1,), name=col,) for col in num_cols]
    # Categorical features
    cat_inputs = [Input(shape=(1,), name=col,) for col in category_cols]

    inputs = num_inputs + cat_inputs

    flatten_layers=[]
    # Numerical featrue embedding
    for enc_inp, col in zip(num_inputs, num_cols):
        # num feature dense
        x = Dense(K, name = f'embed_{col}',kernel_regularizer=l2_reg(l2_fm))(enc_inp)
        flatten_layers.append(x)

    # Category feature embedding
    for enc_inp, col in zip(cat_inputs, category_cols):
        num_c = category_num[col]
        embed_c = Embedding(input_dim=num_c,
                            output_dim=K,
                            input_length=1,
                            name=f'embed_{col}',
                            embeddings_regularizer=l2_reg(l2_fm))(enc_inp)
        flatten_c = Flatten()(embed_c)
        flatten_layers.append(flatten_c)
                
    # Feature interaction term
    fm_layers = []
    for emb1,emb2 in itertools.combinations(flatten_layers, 2):
        dot_layer = dot([emb1,emb2], axes=1)
        fm_layers.append(dot_layer)        

    # Linear term
    for enc_inp,col in zip(cat_inputs, category_cols):
        # embedding
        num_c = category_num[col]
        embed_c = Embedding(input_dim=num_c,
                            output_dim=1,
                            input_length=1,
                            name=f'linear_{col}',
                            embeddings_regularizer=l2_reg(l2_fm))(enc_inp)
        flatten_c = Flatten()(embed_c)
        fm_layers.append(flatten_c)
                
    for enc_inp, col in zip(num_inputs, num_cols):
        x = Dense(1, name = f'linear_{col}',kernel_regularizer=l2_reg(l2_fm))(enc_inp)
        fm_layers.append(x)

    # Add all terms
    flatten = add(fm_layers)
    outputs = Activation('linear',name='outputs')(flatten)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model  

In [28]:
category_num = {col: x_train[col].nunique() for col in config.cat_attribs0}

model =  build_model(category_num, config.cat_attribs0, config.num_attribs0, config.embedding_dim)# instance the model here

# compile the model here
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=config.lr, beta_1=0.9, beta_2=0.999),
    loss=root_mean_squared_error,
    metrics=[
        'RootMeanSquaredError',
        'MeanAbsoluteError',
        'MeanSquaredError'       
    ]
)

2023-06-04 18:38:28.525800: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1635] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9870 MB memory:  -> device: 0, name: NVIDIA RTX A2000 12GB, pci bus id: 0000:68:00.0, compute capability: 8.6


In [29]:
# cb = [EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=2, verbose=0,)]
# tensorboard=TensorBoard(log_dir="Model_log")
# define a call back
checkpoint_cb = keras.callbacks.ModelCheckpoint("./checkpoints/model.h5", verbose=2, monitor='val_loss', save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-4, patience=3, verbose=2, restore_best_weights=True)

In [30]:
# Set a learning rate annealer - to have a decreasing learning rate during the training to reach efficiently the global minimum of the loss function.
# The LR is decreased dynamically when the score is not improved. This keeps the advantage of the fast computation time with a high LR at the start.
# learning_rate_reduction = ReduceLROnPlateau(
#     monitor='val_loss',  # Track the score on the validation set
#     patience=3,  # Number of epochs in which no improvement is seen.
#     verbose=2,
#     factor=0.4,  # Factor by which the LR is multiplied.
#     min_lr=0.0000001  # Don't go below this value for LR.
# )

In [31]:
feature_num = len(config.cat_attribs0 + config.num_attribs0)

In [32]:
tra_inputs = [pp.transform(x_train)[:, i] for i in range(feature_num)]

In [33]:
val_inputs = [pp.transform(x_test)[:, i] for i in range(feature_num)]

In [34]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 station_id (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 month (InputLayer)             [(None, 1)]          0           []                               
                                                                                                  
 dayofweek (InputLayer)         [(None, 1)]          0           []                               
                                                                                                  
 day (InputLayer)               [(None, 1)]          0           []                               
                                                                                              

 linear_hour (Embedding)        (None, 1, 1)         24          ['hour[0][0]']                   
                                                                                                  
 linear_status (Embedding)      (None, 1, 1)         2           ['status[0][0]']                 
                                                                                                  
 linear_is_renting (Embedding)  (None, 1, 1)         2           ['is_renting[0][0]']             
                                                                                                  
 linear_is_returning (Embedding  (None, 1, 1)        2           ['is_returning[0][0]']           
 )                                                                                                
                                                                                                  
 dot (Dot)                      (None, 1)            0           ['embed_ctx1[0][0]',             
          

                                                                                                  
 dot_25 (Dot)                   (None, 1)            0           ['embed_ctx3[0][0]',             
                                                                  'flatten_1[0][0]']              
                                                                                                  
 dot_26 (Dot)                   (None, 1)            0           ['embed_ctx3[0][0]',             
                                                                  'flatten_2[0][0]']              
                                                                                                  
 dot_27 (Dot)                   (None, 1)            0           ['embed_ctx3[0][0]',             
                                                                  'flatten_3[0][0]']              
                                                                                                  
 dot_28 (D

                                                                  'flatten_4[0][0]']              
                                                                                                  
 dot_53 (Dot)                   (None, 1)            0           ['flatten_1[0][0]',              
                                                                  'flatten_5[0][0]']              
                                                                                                  
 dot_54 (Dot)                   (None, 1)            0           ['flatten_1[0][0]',              
                                                                  'flatten_6[0][0]']              
                                                                                                  
 dot_55 (Dot)                   (None, 1)            0           ['flatten_1[0][0]',              
                                                                  'flatten_7[0][0]']              
          

 flatten_12 (Flatten)           (None, 1)            0           ['linear_day[0][0]']             
                                                                                                  
 flatten_13 (Flatten)           (None, 1)            0           ['linear_dayofyear[0][0]']       
                                                                                                  
 flatten_14 (Flatten)           (None, 1)            0           ['linear_hour[0][0]']            
                                                                                                  
 flatten_15 (Flatten)           (None, 1)            0           ['linear_status[0][0]']          
                                                                                                  
 flatten_16 (Flatten)           (None, 1)            0           ['linear_is_renting[0][0]']      
                                                                                                  
 flatten_1

                                                                  'dot_63[0][0]',                 
                                                                  'dot_64[0][0]',                 
                                                                  'dot_65[0][0]',                 
                                                                  'dot_66[0][0]',                 
                                                                  'dot_67[0][0]',                 
                                                                  'dot_68[0][0]',                 
                                                                  'dot_69[0][0]',                 
                                                                  'dot_70[0][0]',                 
                                                                  'dot_71[0][0]',                 
                                                                  'dot_72[0][0]',                 
          

In [35]:
keras.utils.plot_model(model, "model.png", show_shapes=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [None]:
history = model.fit(
    x=tra_inputs,
    y=y_train,
    epochs= config.epochs+5,
    batch_size=config.batch_size ,
    validation_data=(val_inputs ,y_test),
    callbacks=[checkpoint_cb, early_stopping_cb]
)


Epoch 1/10


2023-06-04 18:39:56.194288: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_18' with dtype float and shape [10100285]
	 [[{{node Placeholder/_18}}]]
2023-06-04 18:39:56.194801: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_18' with dtype float and shape [10100285]
	 [[{{node Placeholder/_18}}]]
2023-06-04 18:40:06.678999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2023-06-04 18:40:06.704347: I tensorflow/compiler/xla/service/service.cc:169] XLA service 0x7fc51b5067a0 initialized fo



In [None]:
import matplotlib.pyplot as plt
def plot_history(history):
    # Plot training & validation accuracy values
    plt.plot(history.history['root_mean_squared_error'])
    plt.plot(history.history['val_root_mean_squared_error'])
    plt.title('Model accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Valid'], loc='upper left')
    plt.show()

    # Plot training & validation loss values
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Valid'], loc='upper left')
    plt.show()    


In [None]:
plot_history(history)

In [None]:
# It can be used to reconstruct the model identically.
best_model = keras.models.load_model("checkpoints/model.h5", custom_objects={'root_mean_squared_error': root_mean_squared_error}, compile=True)

# Let's check:
np.testing.assert_allclose(
    model.predict(val_inputs), best_model.predict(val_inputs)
)

In [None]:
%%time 

cross_val_evaluation(best_model,val_inputs, y_train,'NN')

In [None]:
%%time 

test_model(best_model, tra_inputs, y_train, val_inputs, y_test)