In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf

import matplotlib.pyplot as plt

from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dropout, Input, Dense
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

In [None]:
os.listdir('../input/lish-moa')

In [None]:
train_features = pd.read_csv('../input/lish-moa/train_features.csv')
train_targets = pd.read_csv('../input/lish-moa/train_targets_scored.csv')
test_features = pd.read_csv('../input/lish-moa/test_features.csv')
#train_targets_nonscored = pd.read_csv('../input/lish-moa/train_targets_nonscored.csv')
#submission = pd.read_csv('../input/lish-moa/sample_submission.csv')

In [None]:
print('train_features.shape:', train_features.shape)
print('train_targets.shape:', train_targets.shape)
print('test_features.shape:', test_features.shape)

In [None]:
train = train_features.merge(train_targets, on='sig_id')
target_cols = [c for c in train_targets.columns if c not in ['sig_id']]
cols = target_cols + ['cp_type']
train[cols].groupby('cp_type').sum().sum(1)

In [None]:
print(train_features.shape, test_features.shape)
x_train = train[train['cp_type']!='ctl_vehicle'].reset_index(drop=True)
x_test = test_features[test_features['cp_type']!='ctl_vehicle'].reset_index(drop=True)
print(x_train.shape, x_test.shape)

In [None]:
#Формирование тестовой и валидационной выборки
num_train_samples = int(0.8 * len(train_features))

full_train_features_ids = train_features.pop('sig_id')
full_test_features_ids = test_features.pop('sig_id')
train_targets.pop('sig_id')

full_train_features_df = train_features.copy()
full_train_targets_df = train_targets.copy()

val_features_df = train_features[num_train_samples:]
train_features_df = train_features[:num_train_samples]
val_targets_df = train_targets[num_train_samples:]
train_targets_df = train_targets[:num_train_samples]

print('Total training samples:', len(full_train_features_df))
print('Training split samples:', len(train_features_df))
print('Validation split samples:', len(val_features_df))

In [None]:
feature_names = list(train_features_df)
categorical_feature_names = ['cp_type', 'cp_dose']
numerical_feature_names = [name for name in feature_names if name not in categorical_feature_names]

In [None]:
#мерджим числовые фичи
def merge_numerical_features(feature_dict):
    categorical_features = {name: feature_dict[name] for name in categorical_feature_names}
    numerical_features = tf.stack([tf.cast(feature_dict[name], 'float32') for name in numerical_feature_names])
    feature_dict = categorical_features
    feature_dict.update({'numerical_features': numerical_features})
    return feature_dict

In [None]:
train_features_ds = tf.data.Dataset.from_tensor_slices(dict(train_features_df))
train_features_ds = train_features_ds.map(lambda x: merge_numerical_features(x))
train_targets_ds = tf.data.Dataset.from_tensor_slices(np.array(train_targets_df))
train_ds = tf.data.Dataset.zip((train_features_ds, train_targets_ds))

In [None]:
full_train_features_ds = tf.data.Dataset.from_tensor_slices(dict(full_train_features_df))
full_train_features_ds = full_train_features_ds.map(lambda x: merge_numerical_features(x))
full_train_targets_ds = tf.data.Dataset.from_tensor_slices(np.array(full_train_targets_df))
full_train_ds = tf.data.Dataset.zip((full_train_features_ds, full_train_targets_ds))

In [None]:
val_features_ds = tf.data.Dataset.from_tensor_slices(dict(val_features_df))
val_features_ds = val_features_ds.map(lambda x: merge_numerical_features(x))
val_targets_ds = tf.data.Dataset.from_tensor_slices(np.array(val_targets_df))
val_ds = tf.data.Dataset.zip((val_features_ds, val_targets_ds))

In [None]:
train_ds = train_ds.shuffle(1024).batch(64).prefetch(8)
full_train_ds = full_train_ds.shuffle(1024).batch(64).prefetch(8)
val_ds = val_ds.batch(64).prefetch(8)

In [None]:
full_train_ds

In [None]:
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.layers.experimental.preprocessing import CategoryEncoding
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature


def encode_categorical_feature(feature, name, dataset):
    # Create a Lookup layer which will turn strings into integer indices
    index = StringLookup()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])

    # Learn the set of possible feature values and assign them a fixed integer index
    index.adapt(feature_ds)

    # Turn the values into integer indices
    encoded_feature = index(feature)

    # Create a CategoryEncoding for our integer indices
    encoder = CategoryEncoding(output_mode="binary")

    # Prepare a dataset of indices
    feature_ds = feature_ds.map(index)

    # Learn the space of possible indices
    encoder.adapt(feature_ds)

    # Apply one-hot encoding to our indices
    encoded_feature = encoder(encoded_feature)
    return encoded_feature

In [None]:
all_inputs = []
all_encoded_features = []

print('Processing categorical features...')
for name in categorical_feature_names:
    inputs = keras.Input(shape=(1,), name=name, dtype='string')
    encoded = encode_categorical_feature(inputs, name, train_ds)
    all_inputs.append(inputs)
    all_encoded_features.append(encoded)

print('Processing numerical features...')
numerical_inputs = keras.Input(shape=(len(numerical_feature_names),), name='numerical_features')
encoded_numerical_features = encode_numerical_feature(numerical_inputs, 'numerical_features', train_ds)

all_inputs.append(numerical_inputs)
all_encoded_features.append(encoded_numerical_features)
features = layers.Concatenate()(all_encoded_features)

In [None]:
print(all_inputs)
print(all_encoded_features)
print(features)

In [None]:
features.shape

In [None]:
#Формирование модели НС и вывод её структуры в консоль

x = layers.Dropout(0.2)(features)
outputs = layers.Dense(206, activation='sigmoid')(x)
model = keras.Model(all_inputs, outputs)

model.summary()

In [None]:
#Компиляция НС с оптимизацией по Adam и криетриям- категориальная кросс-энтропия
#model.compile(optimizer='adam',
#loss='categorical_crossentropy',
 #              metrics=['accuracy'])

In [None]:
#Второй вариант
model.compile(optimizer=keras.optimizers.RMSprop(),
                    loss=keras.losses.BinaryCrossentropy())

In [None]:
history = model.fit(full_train_ds, epochs=10, validation_data=val_ds)

In [None]:
model.evaluate(full_train_ds)

In [None]:
plt.plot(history.history['loss'])
plt.grid(True)
plt.show()