In [323]:
import sys
import random
import operator
import pandas as pd
import numpy as np
from sklearn import preprocessing, metrics, ensemble, neighbors, linear_model, tree, model_selection
from sklearn.model_selection import KFold, train_test_split
from sklearn import manifold, decomposition
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score, mean_squared_error
import warnings
import os
warnings.filterwarnings("ignore")

data_path = r'C:\\Users\\Sunil\\Projects\\Machine Hack\\Wipro\\Data\\'

train = pd.read_csv(data_path + 'train.csv')
test = pd.read_csv(data_path + 'test.csv')
sample_sub = pd.read_csv(data_path + 'sample_submission.csv')

# train.loc[ (train['Fill Flag'] == 0) | (train['Fill Flag'] > 5), 'Fill Flag'] = 0
# test.loc[ (test['Fill Flag'] == 0) | (test['Fill Flag'] > 5), 'Fill Flag'] = 0

In [324]:
df = pd.DataFrame()
df[['Year', 'Month', 'Day', 'Hour', 'Minute', 'Clearsky_DHI',
       'Clearsky_DNI', 'Clearsky_GHI', 'Cloud_Type', 'Dew_Point',
       'Temperature', 'Pressure', 'Relative_Humidity', 'Solar_Zenith_Angle',
       'Precipitable_Water', 'Wind_Direction', 'Wind_Speed', 'Fill_Flag']] = train[['Year', 'Month', 'Day', 'Hour', 'Minute', 'Clearsky DHI', 'Clearsky DNI', 'Clearsky GHI', 'Cloud Type', 'Dew Point', 'Temperature', 'Pressure', 'Relative Humidity', 'Solar Zenith Angle', 'Precipitable Water', 'Wind Direction', 'Wind Speed', 'Fill Flag']].copy()

train = df.copy()

df = pd.DataFrame()
df[['Year', 'Month', 'Day', 'Hour', 'Minute', 'Clearsky_DHI',
       'Clearsky_DNI', 'Clearsky_GHI', 'Cloud_Type', 'Dew_Point',
       'Temperature', 'Pressure', 'Relative_Humidity', 'Solar_Zenith_Angle',
       'Precipitable_Water', 'Wind_Direction', 'Wind_Speed', 'Fill_Flag']] = test[['Year', 'Month', 'Day', 'Hour', 'Minute', 'Clearsky DHI', 'Clearsky DNI', 'Clearsky GHI', 'Cloud Type', 'Dew Point', 'Temperature', 'Pressure', 'Relative Humidity', 'Solar Zenith Angle', 'Precipitable Water', 'Wind Direction', 'Wind Speed', 'Fill Flag']].copy()
test = df.copy()

In [325]:
from sklearn import metrics, preprocessing
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, load_model
from tensorflow.keras import callbacks
from tensorflow.keras import backend as K
from tensorflow.keras import utils
from tensorflow.keras.utils import plot_model
import tensorflow as tf

In [326]:
from numpy.random import seed
seed(1)
tf.random.set_seed(2)

In [327]:
import matplotlib.pyplot as plt 
import seaborn as sns 
plt.style.use('ggplot')

In [328]:
# def coeff_determination(y_true, y_pred):
#     from keras import backend as K
#     SS_res =  K.sum(K.square( y_true-y_pred ))
#     SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
#     return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [329]:
def rmse(y_true, y_pred):
    def fallback_rmse(y_true, y_pred):
        try:
            return np.sqrt(metrics.mean_squared_error(y_true, y_pred))
        except:
            return 0
    return tf.py_function(fallback_rmse, (y_true, y_pred), tf.double)

In [330]:
def create_model(data, catcols):    
    inputs = []
    outputs = []
    for c in catcols:
        num_unique_values = int(data[c].nunique())
        embed_dim = int(min(np.ceil((num_unique_values)/2),50000000000))
        print(embed_dim)
        inp = layers.Input(shape=(1,))
        out = layers.Embedding(num_unique_values, embed_dim, name=c+'_embed')(inp)
        out = layers.SpatialDropout1D(0.3)(out)
        out = layers.Reshape(target_shape=(embed_dim, ))(out)
        inputs.append(inp)
        outputs.append(out)
    
    x = layers.Concatenate()(outputs)
    x = layers.BatchNormalization()(x)
    
    x = layers.Dense(300, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Dense(300, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Dense(300, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.BatchNormalization()(x)

    y = layers.Dense(1)(x)

    model = Model(inputs=inputs, outputs=y)
    return model

In [331]:
# Apply LabelEncoder to categorical features
le_dict= {}
le = preprocessing.LabelEncoder()
for col in train.columns:
    if col not in ['Clearsky_DHI', 'Clearsky_DNI', 'Clearsky_GHI']:
        le.fit(list(train[col].values) + list(test[col].values))
        train['le_'+col] = le.transform(list(train[col].values))
        test['le_' +col] = le.transform(list(test[col].values))
        le_dict[col] = dict(zip(le.classes_, le.transform(le.classes_)))

In [332]:
test.columns

Index(['Year', 'Month', 'Day', 'Hour', 'Minute', 'Clearsky_DHI',
       'Clearsky_DNI', 'Clearsky_GHI', 'Cloud_Type', 'Dew_Point',
       'Temperature', 'Pressure', 'Relative_Humidity', 'Solar_Zenith_Angle',
       'Precipitable_Water', 'Wind_Direction', 'Wind_Speed', 'Fill_Flag',
       'le_Year', 'le_Month', 'le_Day', 'le_Hour', 'le_Minute',
       'le_Cloud_Type', 'le_Dew_Point', 'le_Temperature', 'le_Pressure',
       'le_Relative_Humidity', 'le_Solar_Zenith_Angle',
       'le_Precipitable_Water', 'le_Wind_Direction', 'le_Wind_Speed',
       'le_Fill_Flag'],
      dtype='object')

In [333]:
features = ['Year', 'Month', 'Day', 'Hour', 'Minute', 'Cloud_Type', 'Dew_Point',
       'Temperature', 'Pressure', 'Relative_Humidity', 'Solar_Zenith_Angle',
       'Precipitable_Water', 'Wind_Direction', 'Wind_Speed', 'Fill_Flag']

train_X = train[features]
train_y = train['Clearsky_DHI'] 

test_X = [test.loc[:, features].values[:, k] for k in range(test.loc[:, features].values.shape[1])]

cat_cols = ['Month', 'Fill_Flag']

In [335]:
num_fold=5

In [336]:
oof_preds = np.zeros(train.shape[0])
test_preds = np.zeros(test.shape[0])

kf = model_selection.KFold(n_splits=num_fold, shuffle=True, random_state=2020)

for dev_index, val_index in kf.split(train_X):
    print(len(dev_index))
    dev_X, val_X = train_X.iloc[dev_index, :], train_X.iloc[val_index, :]
    dev_X = dev_X.reset_index(drop=True)
    val_X = val_X.reset_index(drop=True)
    dev_y, val_y = train_y[dev_index], train_y[val_index]
    
    
    model = create_model(train, features)
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=[coeff_determination])
    dev_X = [dev_X.loc[:, features].values[:, k] for k in range(dev_X.loc[:, features].values.shape[1])]
    val_X = [val_X.loc[:, features].values[:, k] for k in range(val_X.loc[:, features].values.shape[1])]
    
    print(len(dev_X), len(val_X))
    es = callbacks.EarlyStopping(monitor='val_coeff_determination', min_delta=0.001, patience=10, 
                                 verbose=1, mode='max', baseline=None, restore_best_weights=True)

    rlr = callbacks.ReduceLROnPlateau(monitor='val_coeff_determination', factor=0.5, patience=5, min_lr=1e-6, 
                                      mode='max', verbose=1)


    history = model.fit(dev_X, dev_y)
    
    # summarize history for accuracy
    # plt.plot(history.history['coeff_determination'])
    # plt.plot(history.history['val_coeff_determination'])
    # plt.title('loss')
    # plt.ylabel('R2 Score')
    # plt.xlabel('epoch')
    # plt.legend(['train', 'val'], loc='upper right')
    # plt.show()

    valid_fold_preds = model.predict(val_X)
    test_fold_preds = model.predict(test_X)
    oof_preds[val_index] = valid_fold_preds.ravel()
    test_preds += test_fold_preds.ravel()
    print("R2: ", metrics.r2_score(val_y, valid_fold_preds))
    print("RMSE: ", np.sqrt(metrics.mean_squared_error(val_y, valid_fold_preds)))
    K.clear_session()

140236
5
6
16
12
1
5
200
218
25
3962
8111
3134
1801
59
16
15 15


InvalidArgumentError: Graph execution error:

Detected at node 'model_36/Day_embed/embedding_lookup' defined at (most recent call last):
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 595, in run_forever
      self._run_once()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1881, in _run_once
      handle._run()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 461, in dispatch_queue
      await self.process_one()
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 450, in process_one
      await dispatch(*args)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 357, in dispatch_shell
      await result
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 652, in execute_request
      reply_content = await reply_content
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2768, in run_cell
      result = self._run_cell(
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2814, in _run_cell
      return runner(coro)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3012, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3191, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3251, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Sunil\AppData\Local\Temp\ipykernel_8280\1180447202.py", line 27, in <module>
      history = model.fit(dev_X, dev_y)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 451, in call
      return self._run_internal_graph(
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\engine\base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\utils\traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Sunil\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\layers\embeddings.py", line 197, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'model_36/Day_embed/embedding_lookup'
indices[14,0] = 31 is not in [0, 31)
	 [[{{node model_36/Day_embed/embedding_lookup}}]] [Op:__inference_train_function_120496]