In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import random
import warnings
import gc

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

warnings.filterwarnings("ignore")


In [None]:
seed=47

In [None]:
def evaluate_model(model, x, y):
    y_pred_prob = model.predict(x)
    acc = accuracy_score(y, y_pred_prob)
    return {'accuracy' : acc}

In [None]:
import numpy as np

def split_sequences(sequences, n_steps):
	X, y = list(), list()
	for i in range(len(sequences)):
		# find the end of this pattern
		end_ix = i + n_steps
		# check if we are beyond the dataset
		if end_ix > len(sequences):
			break
		# gather input and output parts of the pattern
		seq_x, seq_y = sequences[i:end_ix, :-1], sequences[end_ix-1, -1]
		X.append(seq_x)
		y.append(seq_y)
	return np.array(X), np.array(y)

In [None]:
import tensorflow as tf

from tensorflow.keras import layers
from tensorflow import keras

# source https://keras.io/examples/audio/transformer_asr/

class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, num_heads, feed_forward_dim, dropout_rate=0.1, activation="selu"):
        super().__init__()
        self.attn = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(feed_forward_dim, activation=activation),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout_rate)
        self.dropout2 = layers.Dropout(dropout_rate)

    def call(self, inputs, training):
        attn_output = self.attn(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)


class Transformer(keras.Model):
    def __init__(
            self,
            num_hid=64,  # embed_dim - num of features
            time_steps=1,
            nb_classes=7,
            num_head=2,
            num_feed_forward=128,  # pointwise dim
            num_layers_enc=4,
            dropout_rate=0.1,
            activation="relu"
    ):
        super().__init__()
        self.numlayers_enc = num_layers_enc
        self.enc_input = layers.Input((time_steps, num_hid))
        self.encoder = keras.Sequential(
            [self.enc_input]
            + [
                TransformerEncoder(num_hid, num_head, num_feed_forward, dropout_rate, activation)
                for _ in range(num_layers_enc)
            ]
        )
        self.GlobalAveragePooling1D = layers.GlobalAveragePooling1D(data_format='channels_last')
        self.out = layers.Dense(units=nb_classes, activation='softmax')

    def call(self, inputs):
        #x =  Time2Vector(x.shape[-1])
        x = self.encoder(inputs)
        x = self.GlobalAveragePooling1D(x)
        y = self.out(x)
        return y

In [None]:
train_df = pd.read_csv('/kaggle/input/tabular-playground-series-dec-2021/train.csv', sep=',')

In [None]:
x_train = train_df.drop(['Id', 'Soil_Type7','Soil_Type15', 'Cover_Type'], axis=1)
y_train = train_df['Cover_Type']
y_train = y_train.apply(lambda x : x - 1)
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=seed, shuffle=True)

In [None]:
# https://www.kaggle.com/c/tabular-playground-series-dec-2021/discussion/293612

def r(x):
    if x+180>360:
        return x-180
    else:
        return x+180

def fe(df):
    df['EHiElv'] = df['Horizontal_Distance_To_Roadways'] * df['Elevation']
    df['EViElv'] = df['Vertical_Distance_To_Hydrology'] * df['Elevation']
    df['Aspect2'] = df.Aspect.map(r)
    ### source: https://www.kaggle.com/c/tabular-playground-series-dec-2021/discussion/293373
    df["Aspect"][df["Aspect"] < 0] += 360
    df["Aspect"][df["Aspect"] > 359] -= 360
    df.loc[df["Hillshade_9am"] < 0, "Hillshade_9am"] = 0
    df.loc[df["Hillshade_Noon"] < 0, "Hillshade_Noon"] = 0
    df.loc[df["Hillshade_3pm"] < 0, "Hillshade_3pm"] = 0
    df.loc[df["Hillshade_9am"] > 255, "Hillshade_9am"] = 255
    df.loc[df["Hillshade_Noon"] > 255, "Hillshade_Noon"] = 255
    df.loc[df["Hillshade_3pm"] > 255, "Hillshade_3pm"] = 255
    ########
    df['Highwater'] = (df.Vertical_Distance_To_Hydrology < 0).astype(int)
    df['EVDtH'] = df.Elevation - df.Vertical_Distance_To_Hydrology
    df['EHDtH'] = df.Elevation - df.Horizontal_Distance_To_Hydrology * 0.2
    df['Euclidean_Distance_to_Hydrolody'] = (df['Horizontal_Distance_To_Hydrology']**2 + df['Vertical_Distance_To_Hydrology']**2)**0.5
    df['Manhattan_Distance_to_Hydrolody'] = df['Horizontal_Distance_To_Hydrology'] + df['Vertical_Distance_To_Hydrology']
    df['Hydro_Fire_1'] = df['Horizontal_Distance_To_Hydrology'] + df['Horizontal_Distance_To_Fire_Points']
    df['Hydro_Fire_2'] = abs(df['Horizontal_Distance_To_Hydrology'] - df['Horizontal_Distance_To_Fire_Points'])
    df['Hydro_Road_1'] = abs(df['Horizontal_Distance_To_Hydrology'] + df['Horizontal_Distance_To_Roadways'])
    df['Hydro_Road_2'] = abs(df['Horizontal_Distance_To_Hydrology'] - df['Horizontal_Distance_To_Roadways'])
    df['Fire_Road_1'] = abs(df['Horizontal_Distance_To_Fire_Points'] + df['Horizontal_Distance_To_Roadways'])
    df['Fire_Road_2'] = abs(df['Horizontal_Distance_To_Fire_Points'] - df['Horizontal_Distance_To_Roadways'])
    df['Hillshade_3pm_is_zero'] = (df.Hillshade_3pm == 0).astype(int)
    return df

In [None]:
x_train = fe(x_train)
x_test = fe(x_test)

# Summed features pointed out by @craigmthomas (https://www.kaggle.com/c/tabular-playground-series-dec-2021/discussion/292823)
soil_features = [x for x in x_train.columns if x.startswith("Soil_Type")]
wilderness_features = [x for x in x_train.columns if x.startswith("Wilderness_Area")]

x_train["soil_type_count"] = x_train[soil_features].sum(axis=1)
x_test["soil_type_count"] = x_test[soil_features].sum(axis=1)

x_train["wilderness_area_count"] = x_train[wilderness_features].sum(axis=1)
x_test["wilderness_area_count"] = x_test[wilderness_features].sum(axis=1)

x_train['std'] = np.std(x_train, axis=1)
x_test['std'] = np.std(x_test, axis=1)

In [None]:
sc = StandardScaler()

x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
x_train = x_train[:, np.newaxis, :]
x_test = x_test[:, np.newaxis, :]
nb_classes = train_df['Cover_Type'].nunique()
time_steps = 1
num_features = x_train.shape[-1]

In [None]:
num_heads=2
num_layers_enc=1
num_feed_forward=64

model = Transformer(num_hid=num_features,
                        time_steps=time_steps,
                        nb_classes=nb_classes,
                        num_head=num_heads,
                        num_layers_enc=num_layers_enc,
                        num_feed_forward=num_feed_forward)

opt = tf.keras.optimizers.Adam()
loss = tf.keras.losses.SparseCategoricalCrossentropy()
model.compile(optimizer=opt, loss=loss, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=400, batch_size=1024, verbose=1)
print()
results = model.evaluate(x_test, y_test)[1]
print(results)

# Submission

In [None]:
del train_df, x_train, y_train, x_test, y_test
gc.collect()

In [None]:
test_df = pd.read_csv('/kaggle/input/tabular-playground-series-dec-2021/test.csv', sep=',')
x_test = test_df.drop(['Id', 'Soil_Type7','Soil_Type15'], axis=1)
x_test = fe(x_test)
x_test["soil_type_count"] = x_test[soil_features].sum(axis=1)
x_test["wilderness_area_count"] = x_test[wilderness_features].sum(axis=1)
x_test['std'] = np.std(x_test, axis=1)
x_test = sc.transform(x_test)
x_test = x_test[:, np.newaxis, :]

In [None]:
target = model.predict(x_test)
class_preds = np.argmax(target, axis=-1) + 1
ids = test_df['Id'].values
submission = pd.DataFrame({'Id' : ids, 'Cover_Type' : class_preds})

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)