In [None]:
import os,gc,pickle
import numpy as np
from numba import njit 
import pandas as pd 
import matplotlib.pyplot as plt 

from sklearn.model_selection import GroupKFold
from sklearn.metrics import roc_auc_score,roc_curve 
from sklearn.preprocessing import StandardScaler

from tqdm.notebook import tqdm 

import tensorflow as tf 
import tensorflow_addons as tfa
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import Callback,ReduceLROnPlateau,ModelCheckpoint,EarlyStopping

# Configuration

In [None]:
VERSION = 1 
NFOLD = 5 
SEED = 42
FEATURES = [f"feature_{i}" for i in range(130)] 
TARGET = [f"action{i}" for i in range(1,5)] + ["action"]
PROP = 0.25 
GAP = 20

# Utils

In [None]:
def set_seed(seed=200):
    tf.random.set_seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    
@njit(fastmath = True)
def utility_score_numba(date, weight, resp, action):
    Pi = np.bincount(date, weight * resp * action)
    t = np.sum(Pi) / np.sqrt(np.sum(Pi ** 2)) * np.sqrt(250 / len(Pi))
    u = min(max(t, 0), 6) * np.sum(Pi)
    return u

# CV

CV Strategy 
- 5fold and 20gap PurgedGroupTimeSeriesSplit 
- Remove first 85 days 
- Remove weight == 0

In [None]:
def weighted_average(a):
    #PurgedTImeseiresSplitの重みの付け方
    w = []
    n = len(a) 
    for j in range(1,n + 1):
        j = 2 if j == 1 else j 
        w.append(1/(2**(n + 1 - j)))
    return np.average(a,weights = w)
    
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

# https://github.com/getgaurav2/scikit-learn/blob/d4a3af5cc9da3a76f0266932644b884c99724c57/sklearn/model_selection/_split.py#L2243
class GroupTimeSeriesSplit(_BaseKFold):
    """Time Series cross-validator variant with non-overlapping groups.
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals according to a
    third-party provided group.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_size : int, default=None
        Maximum size for a single training set.
    Examples
    --------
    >>> import numpy as np
    >>> from sklearn.model_selection import GroupTimeSeriesSplit
    >>> groups = np.array(['a', 'a', 'a', 'a', 'a', 'a',\
                           'b', 'b', 'b', 'b', 'b',\
                           'c', 'c', 'c', 'c',\
                           'd', 'd', 'd'])
    >>> gtss = GroupTimeSeriesSplit(n_splits=3)
    >>> for train_idx, test_idx in gtss.split(groups, groups=groups):
    ...     print("TRAIN:", train_idx, "TEST:", test_idx)
    ...     print("TRAIN GROUP:", groups[train_idx],\
                  "TEST GROUP:", groups[test_idx])
    TRAIN: [0, 1, 2, 3, 4, 5] TEST: [6, 7, 8, 9, 10]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a']\
    TEST GROUP: ['b' 'b' 'b' 'b' 'b']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] TEST: [11, 12, 13, 14]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b']\
    TEST GROUP: ['c' 'c' 'c' 'c']
    TRAIN: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]\
    TEST: [15, 16, 17]
    TRAIN GROUP: ['a' 'a' 'a' 'a' 'a' 'a' 'b' 'b' 'b' 'b' 'b' 'c' 'c' 'c' 'c']\
    TEST GROUP: ['d' 'd' 'd']
    """
    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_size=None
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_size = max_train_size

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))
        group_test_size = n_groups // n_folds
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []
            for train_group_idx in unique_groups[:group_test_start]:
                train_array_tmp = group_dict[train_group_idx]
                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)
            train_end = train_array.size
            if self.max_train_size and self.max_train_size < train_end:
                train_array = train_array[train_end -
                                          self.max_train_size:train_end]
            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)
            yield [int(i) for i in train_array], [int(i) for i in test_array]

import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

# modified code for group gaps; source
# https://github.com/getgaurav2/scikit-learn/blob/d4a3af5cc9da3a76f0266932644b884c99724c57/sklearn/model_selection/_split.py#L2243
class PurgedGroupTimeSeriesSplit(_BaseKFold):
    """Time Series cross-validator variant with non-overlapping groups.
    Allows for a gap in groups to avoid potentially leaking info from
    train into test if the model has windowed or lag features.
    Provides train/test indices to split time series data samples
    that are observed at fixed time intervals according to a
    third-party provided group.
    In each split, test indices must be higher than before, and thus shuffling
    in cross validator is inappropriate.
    This cross-validation object is a variation of :class:`KFold`.
    In the kth split, it returns first k folds as train set and the
    (k+1)th fold as test set.
    The same group will not appear in two different folds (the number of
    distinct groups has to be at least equal to the number of folds).
    Note that unlike standard cross-validation methods, successive
    training sets are supersets of those that come before them.
    Read more in the :ref:`User Guide <cross_validation>`.
    Parameters
    ----------
    n_splits : int, default=5
        Number of splits. Must be at least 2.
    max_train_group_size : int, default=Inf
        Maximum group size for a single training set.
    group_gap : int, default=None
        Gap between train and test
    max_test_group_size : int, default=Inf
        We discard this number of groups from the end of each train split
    """

    @_deprecate_positional_args
    def __init__(self,
                 n_splits=5,
                 *,
                 max_train_group_size=np.inf,
                 max_test_group_size=np.inf,
                 group_gap=None,
                 verbose=False
                 ):
        super().__init__(n_splits, shuffle=False, random_state=None)
        self.max_train_group_size = max_train_group_size
        self.group_gap = group_gap
        self.max_test_group_size = max_test_group_size
        self.verbose = verbose

    def split(self, X, y=None, groups=None):
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where n_samples is the number of samples
            and n_features is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Group labels for the samples used while splitting the dataset into
            train/test set.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """
        if groups is None:
            raise ValueError(
                "The 'groups' parameter should not be None")
        X, y, groups = indexable(X, y, groups)
        n_samples = _num_samples(X)
        n_splits = self.n_splits
        group_gap = self.group_gap
        max_test_group_size = self.max_test_group_size
        max_train_group_size = self.max_train_group_size
        n_folds = n_splits + 1
        group_dict = {}
        u, ind = np.unique(groups, return_index=True)
        unique_groups = u[np.argsort(ind)]
        n_samples = _num_samples(X)
        n_groups = _num_samples(unique_groups)
        for idx in np.arange(n_samples):
            if (groups[idx] in group_dict):
                group_dict[groups[idx]].append(idx)
            else:
                group_dict[groups[idx]] = [idx]
        if n_folds > n_groups:
            raise ValueError(
                ("Cannot have number of folds={0} greater than"
                 " the number of groups={1}").format(n_folds,
                                                     n_groups))

        group_test_size = min(n_groups // n_folds, max_test_group_size)
        group_test_starts = range(n_groups - n_splits * group_test_size,
                                  n_groups, group_test_size)
        for group_test_start in group_test_starts:
            train_array = []
            test_array = []

            group_st = max(0, group_test_start - group_gap - max_train_group_size)
            for train_group_idx in unique_groups[group_st:(group_test_start - group_gap)]:
                train_array_tmp = group_dict[train_group_idx]
                
                train_array = np.sort(np.unique(
                                      np.concatenate((train_array,
                                                      train_array_tmp)),
                                      axis=None), axis=None)

            train_end = train_array.size
 
            for test_group_idx in unique_groups[group_test_start:
                                                group_test_start +
                                                group_test_size]:
                test_array_tmp = group_dict[test_group_idx]
                test_array = np.sort(np.unique(
                                              np.concatenate((test_array,
                                                              test_array_tmp)),
                                     axis=None), axis=None)

            test_array  = test_array[group_gap:]
            
            
            if self.verbose > 0:
                    pass
                    
            yield [int(i) for i in train_array], [int(i) for i in test_array]    

# Preprocess

- Fill Nan by 0
- I used Feature Neutralization (proportion = 0.25)
- Feature0 was not neutralized(I think it is categorical feature and should not be neutralized) 
- then, use autoencoder to create features

In [None]:
class NeutralizeTransform:
    def __init__(self,proportion=1.0):
        self.proportion = proportion
    
    def fit(self,X,y):
        self.lms = []
        self.mean_exposure = np.mean(y,axis=0)
        self.y_shape = y.shape[-1]
        for x in X.T:
            scores = x.reshape((-1,1))
            exposures = y
            exposures = np.hstack((exposures, np.array([np.mean(scores)] * len(exposures)).reshape(-1, 1)))
            
            transform = np.linalg.lstsq(exposures, scores, rcond=None)[0]
            self.lms.append(transform)
            
    def transform(self,X,y=None):
        out = []
        for i,transform in enumerate(self.lms):
            x = X[:,i]
            scores = x.reshape((-1,1))
            exposures = np.repeat(self.mean_exposure,len(x),axis=0).reshape((-1,self.y_shape))
            exposures = np.concatenate([exposures,np.array([np.mean(scores)] * len(exposures)).reshape((-1,1))],axis=1)
            correction = self.proportion * exposures.dot(transform)
            out.append(x - correction.ravel())
        return np.asarray(out).T
    
    def fit_transform(self,X,y):
        self.fit(X,y)
        return self.transform(X,y)

In [None]:
def preprocess(X_tr,X_val):
    y_tr = X_tr["resp"].values 
    X_tr = X_tr[FEATURES].values 
    X_val = X_val[FEATURES].values 
    X_tr = np.nan_to_num(X_tr)
    X_val = np.nan_to_num(X_val)
    gc.collect()
    for i in range(1,len(FEATURES)):
        nt = NeutralizeTransform(proportion=PROP)
        X_tr[:,i] = nt.fit_transform(X_tr[:,i].reshape(-1,1),y_tr.reshape(-1,1)).ravel()
        X_val[:,i] = nt.transform(X_val[:,i].reshape(-1,1)).ravel()
    return X_tr,X_val

In [None]:
def create_autoencoder(input_dim,output_dim,hidden_units,dropout_rate,learning_rate,noise_ratio = 0.05):
    i = tf.keras.layers.Input(input_dim)
    encoded = tf.keras.layers.BatchNormalization()(i) 
    encoded = tf.keras.layers.GaussianNoise(noise_ratio)(encoded)
    encoded = tf.keras.layers.Dense(hidden_units[0],activation = "relu")(encoded)
    decoded = tf.keras.layers.Dropout(dropout_rate[0])(encoded) 
    decoded = tf.keras.layers.Dense(input_dim,name = "decoded")(decoded)
    x = tf.keras.layers.Dense(hidden_units[1],activation="relu")(decoded)
    x = tf.keras.layers.BatchNormalization()(x) 
    x = tf.keras.layers.Dropout(dropout_rate[1])(x) 
    x = tf.keras.layers.Dense(output_dim,activation = "sigmoid",name = "label_output")(x) 

    encoder = Model(inputs = i,outputs = decoded)
    autoencoder = Model(inputs = i,outputs = [decoded,x])

    autoencoder.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate),
                        loss = {"decoded" : "mse","label_output":"binary_crossentropy"})
    return autoencoder,encoder

# Model

- I used Densenet. [this notebook is helpful](https://www.kaggle.com/snippsy/jane-street-densenet-neutralizing-features)

In [None]:
def mish(x):
    return tf.keras.layers.Lambda(lambda x: x*tf.keras.backend.tanh(tf.keras.backend.softplus(x)))(x)
tf.keras.utils.get_custom_objects().update({'mish': tf.keras.layers.Activation(mish)})


def create_densenet(input_dim,output_dim,n_layers,hidden_units,dropout_rate,learning_rate,label_smoothing,encoder):
    inp = tf.keras.layers.Input(input_dim)
    tmp = encoder(inp)
    tmp = tf.keras.layers.Concatenate()([inp,tmp]) 
    tmp = tf.keras.layers.BatchNormalization()(tmp)
    xs = [tmp]
    for i in range(n_layers):
        if len(xs) > 1:
            tmp = tf.keras.layers.Concatenate(axis=-1)(xs)
        else:
            tmp = xs[0]
        tmp = tf.keras.layers.Dense(hidden_units[i],activation='mish')(tmp)
        tmp = tf.keras.layers.BatchNormalization()(tmp)
        tmp = tf.keras.layers.Dropout(dropout_rate[i])(tmp)
        xs.append(tmp)
    
    output = tf.keras.layers.Dense(output_dim,activation='sigmoid')(tf.keras.layers.Concatenate()(xs))
    model = tf.keras.models.Model(inp,output)
    optimizer = tfa.optimizers.RectifiedAdam(learning_rate = learning_rate) 
    model.compile(optimizer, loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=label_smoothing),
                    metrics=[tf.keras.metrics.AUC(name='auc')])
    return model

# Training

In [None]:
train = pd.read_csv("../input/jane-street-market-prediction/train.csv")
train = train.query("date > 85").reset_index(drop = True)
train = train.astype({c : np.float32 for c in train.select_dtypes(include = "float64").columns})  
train = train.query("weight > 0").reset_index(drop = True)

train["action"] = (train["resp"] > 0).astype("int") 
for i in range(1,5):
    train[f"action{i}"] = (train[f"resp_{i}"] > 0).astype('int')

## Training Encoder 
- training encoder with all data
- I noticed this approach caused leakage after the competition...

In [None]:
def encode_feature(params):
    input_dim = 130 
    output_dim = 5 
    hidden_units = [params[f"hidden_units{i}"] for i in range(2)]  
    dropout_rate = [params[f"dropout_rate{i}"] for i in range(2)]
    learning_rate = params["learning_rate"] 
    set_seed(params["seed"]) 

    X_tr = train.loc[train.date <= 433,FEATURES + ["resp"]]
    X_val = train.loc[train.date > 433,FEATURES + ["resp"]]

    X_tr,X_val = preprocess(X_tr,X_val) 
    y_tr = train.loc[train.date <= 433,TARGET].values
    y_val = train.loc[train.date > 433,TARGET].values 
    gc.collect()   
    
    autoencoder,encoder = create_autoencoder(input_dim,output_dim,hidden_units,dropout_rate,learning_rate,noise_ratio=params["noise_ratio"]) 
    es = EarlyStopping("val_loss",patience=10,restore_best_weights=True)
    autoencoder.fit(X_tr,(X_tr,y_tr),
                    validation_data = (X_val,(X_val,y_val)),
                    epochs = params["epochs"],
                    batch_size = params["batch_size"],
                    callbacks = [es])
    path = f"weights/encoder_weight_{params['version']}.hdf5"
    autoencoder.compile(tf.keras.optimizers.Adam(learning_rate/100),loss = {"decoded" : "mse","label_output":"binary_crossentropy"})
    autoencoder.fit(X_val,(X_val,y_val),
                    epochs = params["tune_epochs"],
                    batch_size = params["batch_size"])
    encoder.save_weights(path)
    return encoder

In [None]:
param_encode = {"seed" : SEED,
                "noise_ratio" : 0.05,   
                "epochs" : 200, 
                "batch_size" : 4096, 
                "hidden_units0" : 64,
                "hidden_units1" : 32,
                "dropout_rate0" : 0.2,
                "dropout_rate1" : 0.2,
                "learning_rate" : 0.0001,
                "tune_epochs": 3,
                "version" : VERSION}  
#encoder = encode_feature(param_encode)

## Training CV

In [None]:
def cv_tuner(params,encoder):
    print(f"params : {params}") 
    input_dim = 130  
    output_dim = 5 
    n_layers = params["n_layers"] 
    hidden_units = [params[f"hidden_units{i}"] for i in range(n_layers)]
    dropout_rate = [params[f"dropout_rate{i}"] for i in range(n_layers + 1)]
    learning_rate = params["learning_rate"]
    label_smoothing = params["label_smoothing"]
    set_seed(params["seed"]) 

    gkf = PurgedGroupTimeSeriesSplit(n_splits = params["nfold"],group_gap = GAP) 
    oof = np.zeros(shape = (len(train),output_dim)) 
    scores = []
    util_scores = [] 

    for fold,(tr,val) in enumerate(gkf.split(train.date.values,groups = train.date.values)):
        print("-"*50)
        print(f"FOLD : {fold}")
        X_tr,X_val = train.loc[tr,FEATURES + ["resp"]],train.loc[val,FEATURES + ["resp"]] 
        y_tr,y_val = train.loc[tr,TARGET].values,train.loc[val,TARGET].values

        X_tr,X_val = preprocess(X_tr,X_val) 
        gc.collect()
        
        ## Training 
        model = create_densenet(input_dim,output_dim,n_layers,hidden_units,dropout_rate,learning_rate,label_smoothing,encoder)

        ckp_path = f"weights/nn{params['version']}_{fold}"
        rlr = ReduceLROnPlateau(monitor = "val_auc",factor = 0.1,patience = 10,verbose = 0,
                            min_delta = 1e-5,mode = "max")
        es = EarlyStopping(monitor = "val_auc",min_delta = 1e-5,patience = 5,mode = 'max',
                    baseline = None,restore_best_weights = True,verbose = 0) 
        ckp = ModelCheckpoint(ckp_path+".hdf5",monitor = "val_auc",verbose = 0,
                        save_best_only = True,save_weights_only = True,mode = 'max')

        model.fit(X_tr,y_tr,
                validation_data = (X_val,y_val),
                epochs = params["epochs"],
                batch_size = params["batch_size"],
                callbacks = [rlr,es,ckp],
                verbose = 1 
                )
        
        ## Validation 
        oof[val,:] += model.predict(X_val,batch_size = 4*params["batch_size"])
        action = np.where(np.median(oof[val],axis = 1) > params["th"],1,0) 
        score = roc_auc_score(y_val[:,-1],action) 
        date = train.loc[val,"date"].values 
        weight = train.loc[val,"weight"].values 
        resp = train.loc[val,"resp"].values 
        utility_score = utility_score_numba(date,weight,resp,action) 
        scores.append(score)
        util_scores.append(utility_score) 
        print(f"ROC AUC : {score}") 
        print(f"UTILITY SCORE : {utility_score}")
        
        ## Fine Tuning 
        model.compile(tf.keras.optimizers.Adam(learning_rate/100),loss = "binary_crossentropy") 
        model.fit(X_val,y_val,
                  epochs = params["tune_epochs"],
                  batch_size = params["batch_size"],
                  verbose = 1)
        model.save_weights(ckp_path+"tune.hdf5")

        del X_tr,X_val,y_tr,y_val,model 
        gc.collect()
    
    score_avg = weighted_average(scores)
    utility_score = utility_score_numba(train.date.values, train.weight.values, train.resp.values, np.where(oof[:,-1] > params["th"],1,0))
    print(f"utility score : {utility_score}")
    print(f"score : {score_avg}") 

In [None]:
params_predict_best = {"n_layers" : 10, 
                      "learning_rate" : 0.001,
                      "label_smoothing" : 0.056961346402280545,
                      "seed" : SEED,
                      "epochs" : 1000,
                      "batch_size" : 16382,
                      "tune_epochs" : 3,
                      "nfold" : 5,           
                      "th" : 0.505,
                      "version" : VERSION}    

hidden = [64]*10
drop = [0.10] + [0.2]*10
for i in range(params_predict_best["n_layers"]):
    params_predict_best[f"hidden_units{i}"] = hidden[i] 
for i in range(params_predict_best["n_layers"]+1):
    params_predict_best[f"dropout_rate{i}"] = drop[i]
#cv_tuner(params_predict_best,encoder)

## Training All data

In [None]:
def train_all(params,encoder):
    print(f"params : {params}") 
    input_dim = 130 
    output_dim = 5 
    n_layers = params["n_layers"] 
    hidden_units = [params[f"hidden_units{i}"] for i in range(n_layers)]
    dropout_rate = [params[f"dropout_rate{i}"] for i in range(n_layers + 1)]
    learning_rate = params["learning_rate"]
    label_smoothing = params["label_smoothing"]
    set_seed(params["seed"])
    
    # Preprocess
    X_tr = train[FEATURES].values
    y_tr = train["resp"].values 
    X_tr = np.nan_to_num(X_tr) 
    for i in range(1,len(FEATURES)):
        nt = NeutralizeTransform(proportion=PROP)
        X_tr[:,i] = nt.fit_transform(X_tr[:,i].reshape(-1,1),y_tr.reshape(-1,1)).ravel()
    
    model = create_densenet(input_dim,output_dim,n_layers,hidden_units,dropout_rate,learning_rate,label_smoothing,encoder)
    ckp_path =  f"weights/nn_trainall{params['version']}.hdf5"
    ckp = ModelCheckpoint(ckp_path,
                          monitor = "val_auc",
                          verbose = 0,
                          save_best_only = True,
                          save_weights_only = True,
                          mode = 'max')
    model.fit(X_tr,y_tr,
              epochs = 20,
              batch_size = params["batch_size"],
              verbose = 1,
              callbacks = [ckp])
    return model

In [None]:
#train_all(params_predict_best,encoder)