In [None]:
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras as keras

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

df = pd.read_csv('../input/water-potability/water_potability.csv')

In [None]:
df.info()

In [None]:
MIN = -1
MAX = 1

df = df.dropna()
for c in df.columns:
    if c == "Potability":
        continue
    
    df[c] = df[c].transform(lambda dc: (dc - dc.min()) / (dc.max() - dc.min()) * (MAX - MIN) + MIN)

In [None]:
def get_data(test_size=0.1, random_state=0):
    df_c = df.copy(deep=True)
    
    Y = df_c.pop("Potability").values
    X = df_c.values

    X, Y = SMOTE().fit_resample(X, Y)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state)
    return X_train, X_test, Y_train, Y_test

In [None]:
def get_model():
    model = keras.Sequential([
        keras.layers.Input(shape=(9,)),
        keras.layers.Reshape(target_shape=(9, 1)),

    #     keras.layers.BatchNormalization(),
        keras.layers.Conv1D(16*2, 3, strides=1, padding='same', activation='relu', data_format="channels_last"),
        keras.layers.Conv1D(16*2, 3, strides=1, padding='same', activation='relu', data_format="channels_last"),
        keras.layers.Conv1D(16*2, 3, strides=1, padding='same', activation='relu', data_format="channels_last"),
        keras.layers.Dropout(0.1),

        keras.layers.Flatten(),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(100, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid'),
    ])
    return model

In [None]:
get_model().summary()

In [None]:
accs = []

for i in range(10):
    X_train, X_test, Y_train, Y_test = get_data(random_state=i)
    
    optimizer = keras.optimizers.Adam(learning_rate=0.001)

    model = get_model()
    model.compile(optimizer=optimizer,
                  loss=tf.keras.losses.BinaryCrossentropy(from_logits=True, label_smoothing=0.1),
                  metrics=["accuracy"],)
    #               run_eagerly=True)
    model.fit(X_train, Y_train, shuffle=True, epochs=100, batch_size=10, validation_data=(X_test, Y_test), validation_batch_size=1, validation_freq=1, verbose=0)
    
    l, acc = model.evaluate(X_test, Y_test)
    accs.append(acc)

In [None]:
print(f"Accuracy: {np.mean(accs)*100:.2f}%")