In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import tensorflow as tf
from tensorflow import keras
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten,BatchNormalization, Conv2D, MaxPool2D
from keras.optimizers import Adam

from sklearn.model_selection import train_test_split

Data check

In [None]:
train_data=pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/train.csv")
test_data=pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/test.csv")

In [None]:
train_data.head()

In [None]:
test_data.head()

In [None]:
X=train_data[["sensor_1","sensor_2","sensor_3","sensor_4","sensor_5"]]
y=train_data[["target_carbon_monoxide","target_benzene","target_nitrogen_oxides"]]

In [None]:
print("Shape of X:",X.shape)#従属変数
X.head(3)

In [None]:
print("Shape of y:",y.shape)#目的変数
y.head(3)

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X,y,test_size=0.2, shuffle=False)#デフォルトだとデータがシャッフルされる

In [None]:
#モデルを作る
model=Sequential()
model.add(keras.Input(shape=(5,)))#5はX_trainのColumnの数、
model.add(Dense(2**3,activation="swish")) #swishにしたらreluより、やや悪化した。一般的にはreluの方がいいと言われている
model.add(BatchNormalization())
model.add(Dense(2**2, activation="swish"))
model.add(BatchNormalization())
model.add(Dense(2**3,activation="swish"))
model.add(BatchNormalization())
model.add(Dense(3,activation="swish"))

#モデルのDenseの数を減らした方がいい

In [None]:
#custom objectな損失関数
import keras.backend as K

def RMSLE(y_true,y_pred):
    msle=keras.losses.MeanSquaredLogarithmicError()
    return K.sqrt(msle(y_true,y_pred))

In [None]:
#学習プロセスの設定
model.compile(
optimizer="adam",
loss="MeanSquaredLogarithmicError",
metrics=[RMSLE])#metricsには"mae”のような定義された関数だけでなく、自分で設定する関数も使える

In [None]:
epochs=300 #epochは400よりも300の方がよかった。
history=model.fit(X_train,y_train,validation_data=(X_val,y_val),epochs=epochs,batch_size=256,verbose=0)

In [None]:
fig,ax=plt.subplots(2,1)
ax[0].plot(range(epochs),history.history['loss'],color='b',label="training loss")
ax[0].plot(range(epochs), history.history['val_loss'],color='r',label="validation loss")

ax[1].plot(range(epochs),history.history['RMSLE'],color='b',label="training accuracy")
ax[1].plot(range(epochs), history.history['val_RMSLE'],color='r',label="validation accuracy")

legend=ax[1].legend(loc="best", shadow=True)

modelは確かに学習できたみたいね

In [None]:
Xtest=test_data[["sensor_1","sensor_2","sensor_3","sensor_4","sensor_5"]]

In [None]:
preds=model.predict(Xtest)
preds=pd.DataFrame(data=preds, columns=["target_carbon_monoxide","target_benzene","target_nitrogen_oxides"],index=Xtest.index)

In [None]:
preds.head()

In [None]:
sample_submission=pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv")
sample_submission

In [None]:
a=pd.concat([sample_submission["date_time"],preds],axis=1)
a

In [None]:
a.to_csv('my_submission#2.csv',index=False)