In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import math
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split,KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train =pd.read_csv('/kaggle/input/playground-series-s5e5/train.csv',index_col='id')
test=pd.read_csv('/kaggle/input/playground-series-s5e5/test.csv',index_col='id')

In [None]:
train.Sex=train.Sex.map({'male':1,'female':0}).astype('float32')
test.Sex=test.Sex.map({'male':1,'female':0}).astype('float32')

In [None]:
train.info()

In [None]:
plt.hist(train.Calories,bins=10)
plt.show()

In [None]:
train.Calories.value_counts()

In [None]:
numeric_cols = train.columns
num_cols = len(numeric_cols)

cols_per_row = 4
rows_needed = math.ceil(num_cols / cols_per_row)

fig, axes = plt.subplots(nrows=rows_needed, ncols=min(num_cols, cols_per_row), figsize=(15, 5 * rows_needed))
fig.tight_layout(pad=3.0)

# Flatten the axes array for easy indexing
axes = axes.flatten()

for i, col in enumerate(numeric_cols):
    ax = axes[i]
    train[col].hist(ax=ax)
    ax.set_title(col)
    ax.set_xlabel(col)
    ax.set_ylabel('Frequency')
    ax.grid(True, linestyle='--', alpha=0.6)

# Remove any extra subplots if the number of features
# is not a perfect multiple of cols_per_row
if num_cols < len(axes):
    for i in range(num_cols, len(axes)):
        fig.delaxes(axes[i])

plt.show()

In [None]:
for i in numeric_cols:
    print(f'{i} :{train[i].skew()}')

In [None]:
X,y=train.drop('Calories',axis=1),train['Calories']

X_train,X_val,y_train,y_val=train_test_split(X,y,test_size=0.1,random_state=42)

In [None]:
X_train.shape[1]

In [None]:
def rmsle(y_true, y_pred):
    """
    Calculates the Root Mean Squared Logarithmic Error (RMSLE).

    Args:
        y_true: The ground truth values.
        y_pred: The predicted values.

    Returns:
        The RMSLE value.
    """
    # Ensure both y_true and y_pred are greater than 0
    # Use tf.clip_by_value instead of K.clip
    y_true = tf.clip_by_value(y_true, tf.keras.backend.epsilon(), tf.reduce_max(y_true))  
    y_pred = tf.clip_by_value(y_pred, tf.keras.backend.epsilon(), tf.reduce_max(y_pred)) 
    # Calculate the squared logarithmic error
    log_diff = tf.math.log(tf.math.sqrt(y_pred) + 1) - tf.math.log(tf.math.sqrt(y_true) + 1) 
    # Calculate the mean squared logarithmic error
    msle = tf.reduce_mean(tf.square(log_diff))
    # Calculate the root mean squared logarithmic error
    rmsle = tf.math.sqrt(msle)  
    return rmsle
tf.random.set_seed(42)
input_shape = (X_train.shape[1],)  # Enclose X_train.shape[1] in a tuple

# Create Input layer
inputs = keras.Input(shape=input_shape)

# Define your model
model = keras.Sequential([
    inputs,  # Use Input layer as the first layer
    layers.Dense(32, activation='swish'), 
    layers.BatchNormalization(),
    layers.Dense(64, activation='swish'),
    layers.BatchNormalization(),
    layers.Dense(32, activation='swish'),
    layers.BatchNormalization(),
    layers.Dense(1, activation='linear') 
])


Early = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

optimizer = tf.keras.optimizers.Adam(clipvalue=1.0)  # Clip gradients to [-1, 1]
model.compile(optimizer=optimizer, loss=rmsle)

In [None]:
his = model.fit(X_train, y_train, epochs=100, batch_size=512, validation_data=(X_val, y_val),callbacks=Early)

In [None]:
his=pd.DataFrame(his.history)
his['val_loss'].min()

In [None]:
his.iloc[:,0:].plot()

In [None]:
y_pred = model.predict(X_val)
msle = mean_squared_error(y_val, abs(y_pred))
np.sqrt(msle)

In [None]:
y_test = model.predict(test)

In [None]:
pd.DataFrame(y_test,index=test.index,columns=['rainfall']).to_csv('submission.csv')