In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
test=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
submit=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv')

In [None]:
df.head()

In [None]:
features = [feature for feature in df.columns if df[feature].dtypes!='O']
features

In [None]:
import matplotlib.pyplot as plt
for column in features:
    plt.plot(df[column])
    plt.title(column)
    plt.show()


In [None]:
import datetime
df['date_time']=pd.to_datetime(df['date_time'])
df["day"] = df['date_time'].map(lambda x: x.day)
df["month"] = df['date_time'].map(lambda x: x.month)
df["year"] = df['date_time'].map(lambda x: x.year)
df["hour"] = df['date_time'].map(lambda x: x.hour)

In [None]:
df.head()

In [None]:
import seaborn as sns
sns.heatmap(df.corr())

In [None]:
target=df.iloc[:,9:12]
target

In [None]:
df.head()

In [None]:
#X_features = [feature for feature in df.columns if 'target' not in feature]
X_features = [feature for feature in df.columns if 'target' not in feature ]

In [None]:
X=df[X_features]
X.head()

In [None]:
X=X.drop('date_time', axis=1)
X.head()

In [None]:
y_co=df['target_carbon_monoxide']
y_be=df['target_benzene']
y_no=df['target_nitrogen_oxides']

In [None]:
# Creating Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, yco_train, yco_test = train_test_split(X,y_co, test_size=0.3, random_state=0)

In [None]:
import datetime
test['date_time']=pd.to_datetime(test['date_time'])
test["day"] = test['date_time'].map(lambda x: x.day)
test["month"] = test['date_time'].map(lambda x: x.month)
test["year"] = test['date_time'].map(lambda x: x.year)
test["hour"] = test['date_time'].map(lambda x: x.hour)

In [None]:
test.head()

In [None]:
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)
test=test.drop('date_time', axis=1)
test=sc.transform(test)

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
!pip install -q -U keras-tuner
import keras_tuner as kt

In [None]:
def model_builder(hp):
  model = keras.Sequential()
  for i in range(hp.Int('num_layers',2,20)):
    model.add(keras.layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
  # Tune the number of units in the first Dense layer
  # Choose an optimal value between 32-512
  model.add(keras.layers.Dense(1, activation='linear'))
  # Tune the learning rate for the optimizer
  # Choose an optimal value from 0.01, 0.001, or 0.0001
  hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='mean_squared_error',
                metrics=['mean_squared_error'])

  return model

In [None]:
from keras_tuner import RandomSearch
tuner = RandomSearch(
    model_builder,
    objective='val_mean_squared_error',
    max_trials=5,
    executions_per_trial=5,
    directory='co',
    project_name='Co')

In [None]:
tuner.search_space_summary

In [None]:
tuner.search(X_train, yco_train,
             epochs=50,
             validation_data=(X_test, yco_test))

In [None]:
tuner.results_summary()

In [None]:
best_model = tuner.get_best_models(num_models=1)[0]
loss, mse = best_model.evaluate(X_test, yco_test)

In [None]:
#Predict values for CO test
Y_CO=best_model.predict(X_test)

In [None]:
Y_CO.shape

In [None]:
yco_test.shape

In [None]:
#Calculating rmse on test set
from sklearn.metrics import mean_squared_error
from math import sqrt
rms_co = sqrt(mean_squared_error(yco_test, Y_CO))
rms_co #1.38 when 10epochs, 5 trails, 3 exe

In [None]:
#Predicting actual test X using the best model
co_sub=best_model.predict(test)
co_sub

In [None]:
#Creating a dir for Benzene in the similar way
from keras_tuner import RandomSearch
tuner_be = RandomSearch(
    model_builder,
    objective='val_mean_squared_error',
    max_trials=5,
    executions_per_trial=5,
    directory='be',
    project_name='Be')

In [None]:
# Creating Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, ybe_train, ybe_test = train_test_split(X,y_be, test_size=0.3, random_state=4)
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [None]:
tuner_be.search(X_train, ybe_train,
             epochs=50,
             validation_data=(X_test, ybe_test))
tuner_be.results_summary()

In [None]:
tuner_be.results_summary()

In [None]:
best_model_be = tuner_be.get_best_models(num_models=1)[0]
loss, mse = best_model_be.evaluate(X_test, ybe_test)

In [None]:
#Predicted value for benzene
Y_benzene=best_model_be.predict(X_test)

In [None]:
#Calculating Accuracy
rms_be = sqrt(mean_squared_error(ybe_test, Y_benzene))
rms_be #1.12 when 50epochs, 5 trails, 5 exe

In [None]:
be_sub=best_model_be.predict(test)
be_sub

In [None]:
#Creating a dir for Nitrogen in the similar way
from keras_tuner import RandomSearch
tuner_no = RandomSearch(
    model_builder,
    objective='val_mean_squared_error',
    max_trials=5,
    executions_per_trial=5,
    directory='no',
    project_name='No')

In [None]:
# Creating Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, yno_train, yno_test = train_test_split(X,y_no, test_size=0.3, random_state=0)
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

In [None]:
tuner_no.search(X_train, yno_train,
             epochs=50,
             validation_data=(X_test, yno_test))
tuner_no.results_summary()

In [None]:
best_model_no = tuner_no.get_best_models(num_models=1)[0]
loss, mse = best_model_no.evaluate(X_test, yno_test)

In [None]:
#Predicted value for NO
Y_NO=best_model_no.predict(X_test)
#Calculating Accuracy
rms_no = sqrt(mean_squared_error(yno_test, Y_NO))
rms_no #83.93 when 10epochs, 5 trails, 3 exe

In [None]:
# Predicting actual test X for Nitrogen
no_sub=best_model_no.predict(test)
no_sub


## Creating Submission

In [None]:
submit.head()

In [None]:
# Coverting the predictions to a dataframe
y_sub_co=pd.DataFrame(co_sub, columns=['target_carbon_monoxide'])
y_sub_co
y_sub_benz=pd.DataFrame(be_sub, columns=['target_benzene'])
print(y_sub_benz)
y_sub_NO=pd.DataFrame(no_sub, columns=['target_nitrogen_oxides'])
y_sub_NO

In [None]:
sub=pd.concat([submit['date_time'], y_sub_co, y_sub_benz, y_sub_NO], axis=1)
sub

In [None]:
#Downloading the file
sub.to_csv('submission.csv', index=False)
print("Your submission was successfully saved!")