# **TPS- 08 EDA+ Autokeras neural network**

# **Importing Required Libraries:**

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_squared_error
# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)


In [None]:
%pip install autokeras

In [None]:
import tensorflow as tf
import autokeras as ak
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

print(tf.__version__)


# **Basic Data Exploration:**

In [None]:
train_data=pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
test_data=pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')
submisssion_data=pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
train_data.isnull().sum()

In [None]:
test_data.isnull().sum()

In [None]:
print(f'Train Shape :  {train_data.shape}')
print(f'Test Shape :  {test_data.shape}')

In [None]:
train_data = train_data.drop('id', axis=1)
test_data = test_data.drop('id', axis=1)

# Simple EDA

In [None]:
# histogramse for all variables with KDE
plt.figure(figsize=(24, 6*(104/4)))
for i in range(len(train_data.columns.tolist())):
    plt.subplot(26, 4, i+1)
    if i <= 99:
        sns.histplot(train_data[f'f{i}'], kde=True)
    else:
        sns.histplot(train_data['loss'], kde=True)
plt.show()

In [None]:
# correlation matrix with heat map
corr = train_data.corr()
plt.figure(figsize=(20, 20))
sns.heatmap(corr)
plt.show()


In [None]:
# Top ten highest correlated features for each feature
cols = train_data.columns.tolist()
for col in cols:
    print(col)
    print(corr[col].sort_values(ascending=False)[1:11])
    print('****************************************')



In [None]:
trainX=train_data.drop(["loss"],axis=1)
trainy=train_data["loss"]
testX=test_data
features=trainX.columns
features

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
trainX[features] = scaler.fit_transform(trainX[features])
testX[features] = scaler.transform(testX[features])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(trainX, trainy, train_size=0.8, test_size=0.2,
                                                      random_state=0)

# Basic Dense NN with tensorflow and keras

In [None]:
normalizer = preprocessing.Normalization(axis=-1)
normalizer.adapt(np.array(X_train))



In [None]:
linear_model = tf.keras.Sequential([
    normalizer,
      layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
      layers.Dense(64, activation='relu'),
      layers.Dense(1)
])

In [None]:
linear_model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=0.05),
    loss='mean_squared_error')


In [None]:

history = linear_model.fit(
    X_train, y_train, 
    epochs=40,
    verbose=1,
    validation_data=(X_valid, y_valid),
    batch_size=64)


In [None]:
#plot training and validation loss
def plot_loss(history):
  plt.plot(np.sqrt(history.history['loss']), label='loss')
  plt.plot(np.sqrt(history.history['val_loss']), label='val_loss')
  #plt.ylim([63, 64])
  plt.xlabel('Epoch')
  plt.ylabel('Error [loss]')
  plt.legend()
  plt.grid(True)


In [None]:
prdct1=linear_model.predict(test_data)
final_pred1=pd.read_csv("../input/tabular-playground-series-aug-2021/sample_submission.csv")
final_pred1["loss"]=prdct1
final_pred1.to_csv('submission_basicnn1.csv',index=False)

In [None]:
plot_loss(history)


# **Auto keras model tuning**

In [None]:
search = ak.StructuredDataRegressor(max_trials=5, loss='mean_squared_error',objective="val_loss")

In [None]:
search.fit(X_train, y_train, verbose=1,validation_data=(X_valid, y_valid),use_multiprocessing=True,epochs=10,)

In [None]:
lm=search.predict(test_data)
final_pred=pd.read_csv("../input/tabular-playground-series-aug-2021/sample_submission.csv")
final_pred["loss"]=lm
final_pred.to_csv('submission_basicnn.csv',index=False)

In [None]:
# #save model
# mdl=search.export_model()
# try:
#     mdl.save("model_autokeras", save_format="tf")
# except Exception:
#     mdl.save("model_autokeras.h5")

In [None]:
# import shutil
# shutil.make_archive('model_autokeras', 'zip', './')