In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from sklearn.metrics import accuracy_score
import optuna

import warnings
warnings.filterwarnings('ignore')

# Greetings! :)
This my first public notebook so please be patient and please let me know if you find any errors/bugs or if you have any idea about improving the notebook. Important remark that this notebook won't find the optimal values for a neural network (because of time and resource constraints) but it can be a good start.

In [None]:
df_train = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/train.csv')
df_test = pd.read_csv('/kaggle/input/tabular-playground-series-feb-2022/test.csv')

In [None]:
df_train=df_train.drop('row_id', axis = 1)

# Remove of duplicated rows:
**The remove method of the duplicated rows is based on AmbrosM's notebook**: https://www.kaggle.com/ambrosm/tpsfeb22-02-postprocessing-against-the-mutants

In [None]:
vc = df_train.value_counts()
dedup_train = pd.DataFrame([list(tup) for tup in vc.index.values], columns=df_train.columns)
dedup_train['sample_weight'] = vc.values
dedup_train.head()

In [None]:
target_dictionary = {
    'Streptococcus_pyogenes': 0,
    'Salmonella_enterica': 1,
    'Enterococcus_hirae': 2, 
    'Escherichia_coli': 3, 
    'Campylobacter_jejuni': 4,
    'Streptococcus_pneumoniae': 5, 
    'Staphylococcus_aureus': 6,
    'Escherichia_fergusonii': 7, 
    'Bacteroides_fragilis': 8,
    'Klebsiella_pneumoniae': 9
}

In [None]:
dedup_train['target'] = dedup_train['target'].replace(target_dictionary)

In [None]:
features = [c for c in df_train.columns if c not in ('row_id', 'target')]

In [None]:
X = dedup_train[features]
y = dedup_train['target']
sample_weight = dedup_train['sample_weight']

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid, sample_weight_train, sample_weight_valid  = train_test_split(X, y, sample_weight, test_size = 0.2, random_state = 0)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_valid = sc.transform(X_valid)

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
        patience=30,
        min_delta=0.001,
        restore_best_weights=True,
    )

In [None]:
# Detect and init the TPU
try: # detect TPUs
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver.connect() # TPU detection
    strategy = tf.distribute.TPUStrategy(tpu)
except ValueError: # detect GPUs
    strategy = tf.distribute.get_strategy() # default strategy that works on CPU and single GPU
print("Number of accelerators: ", strategy.num_replicas_in_sync)

In [None]:
def runNN(trial):
    batch_size=trial.suggest_int("batch_size", 1024,4096)
    n_layers = trial.suggest_int("n_layers", 1, 10)
    dropout = trial.suggest_uniform('dropout', 0.0, 0.5)
    optimizer = trial.suggest_categorical("optimizer", ['adam', 'rmsprop'])
    activation = trial.suggest_categorical("activation", ['relu', 'sigmoid'])
    
    model = tf.keras.Sequential()
    for i in range(n_layers):
        num_hidden = trial.suggest_int("n_units_l{}".format(i), 512, 2048, log=True)
        model.add(tf.keras.layers.Dense(num_hidden, activation="relu"))
        model.add(tf.keras.layers.BatchNormalization())
        model.add(tf.keras.layers.Dropout(dropout))
        
    tf.keras.layers.Dense(10, activation='softmax')

    model.compile(optimizer = optimizer, loss="sparse_categorical_crossentropy", metrics=['accuracy'])

    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=batch_size, epochs=200, sample_weight = sample_weight_train, callbacks=early_stopping, verbose=0)

    valid_pred=model.predict(X_valid)
    valid_pred = np.argmax(valid_pred, axis=-1)
    valid_score = accuracy_score(y_valid,valid_pred, sample_weight = sample_weight_valid)
    return valid_score

# Final remarks:
Unfortunately it takes too much time to find the optimal values and the TPU time is limited so I set the number of trials (n_trials) to 30.

In [None]:
with strategy.scope():
    study=optuna.create_study(direction="maximize")
    study.optimize(runNN,n_trials=30)
    study.best_params