In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error as mse
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.svm import SVR

In [None]:
train = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')
sub = pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
test.head()

In [None]:
print("Training data shape:",train.shape)
print("Test data shape:",test.shape)

In [None]:
#checking for null values
print(train.isna().sum().sum())
print(test.isna().sum().sum())

In [None]:
train.drop('id',axis=1,inplace=True)
test.drop('id',axis=1,inplace=True)

In [None]:
cols = train.columns.tolist()

In [None]:
plt.figure(figsize=(24, 6*(104/4)))
for i in range(len(train.columns.tolist())):
    plt.subplot(26, 4, i+1)
    if i <= 99:
        plt.hist(train[f'f{i}'])
        plt.xlabel(f'f{i}')
    else:
        plt.hist(train['loss'])
        plt.xlabel('Loss')
plt.show()

As we can see from the above plots lots of features are left and right skewed.
And looking at the plots we can say none of the feature is a categorial feature all are continous.

In [None]:
corr = train.corr()
plt.figure(figsize=(20,20))
sns.heatmap(corr)
plt.show()

Relationship between the features and loss is very low.

In [None]:
print("Training data shape after droping ID colunmn:",train.shape)
print("Test data shape after droppig ID column:",test.shape)

In [None]:
cols = test.columns

In [None]:
X = train[cols]
y = train['loss']
test = test

In [None]:
X.head()

In [None]:
test.head()

In [None]:
y.head()

In [None]:
#scaling the data 
ss = StandardScaler()
X_scaled = ss.fit_transform(X)
test_scaled = ss.fit_transform(test)

#### Model Training 

In [None]:
train_oof = np.zeros((train.shape[0],))
test_preds = np.zeros((test.shape[0],))

In [None]:
train_oof.shape

In [None]:
test_preds.shape

Linear Regresssion Model

In [None]:
n_splits = 5
kf = KFold(n_splits=n_splits, random_state=137, shuffle=True)

for j, (train_index, val_index) in enumerate(kf.split(train)):
    print("Fitting fold", j+1)
    train_features = X_scaled[train_index]
    train_target = y[train_index]

    
    val_features = X_scaled[val_index]
    val_target = y[val_index]

    
    model = LinearRegression()
    model.fit(train_features, train_target)
    val_pred = model.predict(val_features)
    train_oof[val_index] = val_pred.flatten()
    test_preds += model.predict(test_scaled).flatten()
test_preds = test_preds/n_splits

In [None]:
print("RMSE for Linear Regression model",np.sqrt(mse(y,train_oof)))

Ridge Regression Model

In [None]:
train_oof_ridge = np.zeros((train.shape[0],))
test_preds_ridge = np.zeros((test.shape[0],))

In [None]:
kf = KFold(n_splits=n_splits, random_state=137, shuffle=True)

for j, (train_index, val_index) in enumerate(kf.split(train)):
    print("Fitting fold", j+1)
    train_features = X_scaled[train_index]
    train_target = y[train_index]

    
    val_features = X_scaled[val_index]
    val_target = y[val_index]

    
    model = Ridge(alpha = 0.25)
    model.fit(train_features, train_target)
    val_pred = model.predict(val_features)
    train_oof_ridge[val_index] = val_pred.flatten()
    test_preds_ridge += model.predict(test_scaled).flatten()
test_preds_ridge = test_preds_ridge / n_splits

In [None]:
print("RMSE for Ridge Regression model",np.sqrt(mse(y,train_oof_ridge)))

Lasso Regersssion

In [None]:
train_oof_lasso = np.zeros((train.shape[0],))
test_preds_lasso = np.zeros((test.shape[0],))

In [None]:
kf = KFold(n_splits=n_splits, random_state=137, shuffle=True)

for j, (train_index, val_index) in enumerate(kf.split(train)):
    print("Fitting fold", j+1)
    train_features = X_scaled[train_index]
    train_target = y[train_index]

    
    val_features = X_scaled[val_index]
    val_target = y[val_index]

    
    model = Lasso(alpha = 0.25)
    model.fit(train_features, train_target)
    val_pred = model.predict(val_features)
    train_oof_lasso[val_index] = val_pred.flatten()
    test_preds_lasso += model.predict(test_scaled).flatten()
test_preds_lasso = test_preds_lasso / n_splits

In [None]:
print("RMSE for lasso Regression model",np.sqrt(mse(y,train_oof_lasso)))

Neural Network Model

In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(1024,input_dim=X_scaled.shape[1],kernel_initializer='normal',activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(512,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(256,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(128,activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1,activation='relu')
])
model.summary()

In [None]:
adam = tf.keras.optimizers.Adam()
model.compile(loss='mean_squared_error',optimizer=adam)

In [None]:
model.fit(X_scaled,y,validation_split=0.25,epochs=5,verbose=1,shuffle=True)

In [None]:
train_pred = model.predict(X_scaled)

In [None]:
train_pred.shape

In [None]:
print("RMSE for Neural Network Model",np.sqrt(mse(y,train_pred)))

In [None]:
y_pred = model.predict(test_scaled)

In [None]:
sub

In [None]:
sub['loss'] = y_pred

In [None]:
sub

In [None]:
sub.to_csv('submission.csv',index=False)

A lot more can be done using neural network.
* Increasing the no. of layers.
* Increasing no. of neurons in the layers.
* Using dropout to handle overfitting.

Till now NN works better the the basic regression model such as Linear Regression, Ridge or Lasso.

Let me know in the comment if you have some idea to imporve the model.