<a href="https://colab.research.google.com/github/mhdSharuk/Kaggle-Ion-Switching/blob/master/Ion_Switching_dum1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing the libraries

In [0]:
import numpy as np 
import numba
import pandas as pd
import os
import librosa
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import sklearn
from collections import Counter
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)
import pywt
from scipy import stats,signal
import matplotlib.pyplot as plt
plt.rc('figure',figsize=(19,6))
import seaborn as sns
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import f1_score
from sklearn import metrics
import lightgbm as lgb
from tqdm import tqdm_notebook
from functools import partial
from scipy import optimize
from sklearn.metrics import f1_score
import gc
import warnings 
warnings.filterwarnings('ignore')
from sklearn.linear_model import LinearRegression
from copy import deepcopy
import logging
logger = logging.getLogger('matplotlib.pyplot')
logger.setLevel(logging.CRITICAL)

import tensorflow as tf
from keras.layers import Dense,Dropout, Conv1D, BatchNormalization, Activation, AveragePooling1D, GlobalAveragePooling1D, Lambda, Input, Concatenate, UpSampling1D, Multiply
from keras.models import Model, Sequential
from keras import backend as K
from keras.losses import categorical_crossentropy
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from keras.initializers import random_normal
from keras.optimizers import Adam, SGD, RMSprop
from keras.callbacks import Callback

# Connect to Kaggle CLI

In [0]:
os.chdir('./drive/My Drive/Ion Switching')
os.listdir()

In [0]:
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle
!chmod 600 ~/.kaggle/kaggle.json

# Helper Functions

In [0]:
def reduce_mem_usage(df: pd.DataFrame,verbose: bool = True) -> pd.DataFrame:
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes

        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()

            if str(col_type)[:3] == 'int':
                if (c_min > np.iinfo(np.int8).min
                        and c_max < np.iinfo(np.int8).max):
                    df[col] = df[col].astype(np.int8)
                elif (c_min > np.iinfo(np.int16).min
                      and c_max < np.iinfo(np.int16).max):
                    df[col] = df[col].astype(np.int16)
                elif (c_min > np.iinfo(np.int32).min
                      and c_max < np.iinfo(np.int32).max):
                    df[col] = df[col].astype(np.int32)
                elif (c_min > np.iinfo(np.int64).min
                      and c_max < np.iinfo(np.int64).max):
                    df[col] = df[col].astype(np.int64)
            else:
                if (c_min > np.finfo(np.float16).min
                        and c_max < np.finfo(np.float16).max):
                    df[col] = df[col].astype(np.float16)
                elif (c_min > np.finfo(np.float32).min
                      and c_max < np.finfo(np.float32).max):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    reduction = (start_mem - end_mem) / start_mem

    msg = f'Mem. usage decreased to {end_mem:5.2f} MB ({reduction * 100:.1f} % reduction)'
    if verbose:
        print(msg)

    return df


In [0]:
def MacroF1Metric(preds, dtrain):
    labels = dtrain.get_label()
    preds = np.round(np.clip(preds, 0, 10)).astype(int)
    score = f1_score(labels, preds, average = 'macro')
    return ('MacroF1Metric', score, True)

In [0]:
def mean_abs_dev(val):
  return np.mean(np.absolute(val - np.mean(val)))

In [0]:
def trend(df,abs_value=False):
  idx = np.array(range(len(df)))
  if abs:
    df = np.abs(df)
  lr = LinearRegression()
  lr.fit(idx.reshape(-1,1),df)
  return lr.coef_[0]

In [0]:
def change_rate(df):
    change = (np.diff(df) / df[:-1])
    change = change[np.nonzero(change)[0]]
    change = change[~np.isnan(change)]
    change = change[change != -np.inf]
    change = change[change != np.inf]
    return np.mean(change)

In [0]:
def get_whole_rolling_feat(df,window):
  first_batch = df['batch'].values[0]
  mean_ = df[df['batch'] == first_batch]['signal'].mean()
  std_ = df[df['batch'] == first_batch]['signal'].std()
  var_ = df[df['batch'] == first_batch]['signal'].var()
  min_ = df[df['batch'] == first_batch]['signal'].min()
  max_ = df[df['batch'] == first_batch]['signal'].max()

  for i in tqdm_notebook(window):
    df[f'rolling_{i}_signal_mean'] = df['signal'].rolling(i).mean().replace([-np.nan,np.nan],mean_)
    df[f'rolling_{i}_signal_std'] = df['signal'].rolling(i).std().replace([-np.nan,np.nan],std_)
    df[f'rolling_{i}_signal_variance'] = df['signal'].rolling(i).var().replace([-np.nan,np.nan],var_)
    df[f'rolling_{i}_signal_min'] = df['signal'].rolling(i).min().replace([-np.nan,np.nan],min_)
    df[f'rolling_{i}_signal_max'] = df['signal'].rolling(i).max().replace([-np.nan,np.nan],max_)
    df[f'rolling_{i}_abs_max_min_distance'] = np.absolute(df[f'rolling_{i}_signal_max'] - df[f'rolling_{i}_signal_min'])
    df[f'rolling_{i}_maxtomin_ratio'] = np.absolute(np.divide(df[f'rolling_{i}_signal_max'],df[f'rolling_{i}_signal_min']))

    cols = df.columns[-25:]
    df[cols].fillna(0,inplace=True)
  return df

In [0]:
def create_whole_3_window_feat(df):
    df['signal_whole_forward_mean'] = np.mean(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_sum'] = np.sum(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_median'] = np.median(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_min'] = np.min(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_max'] = np.max(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_var'] = np.var(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)
    df['signal_whole_forward_std'] = np.std(df[['signal_shift_-1','signal_shift_-2','signal_shift_-3']],axis=1)

    df['signal_whole_backward_mean'] = np.mean(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_sum'] = np.sum(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_median'] = np.median(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_min'] = np.min(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_max'] = np.max(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_var'] = np.var(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)
    df['signal_whole_backward_std'] = np.std(df[['signal_shift_1','signal_shift_2','signal_shift_3']],axis=1)

    cols = df.columns[-14:]
    df[cols].fillna(0,inplace=True)

    return df

In [0]:
def get_whole_shift_window_feat(df,shift):
  for s in tqdm_notebook(shift):
    df[f'signal_shift_{s}'] = df['signal'].shift(s)
  
  return df

In [0]:
def update_dict(dict1,dict2):
  return dict1.update((k, dict2[k]) for k in dict1.keys() & dict2.keys())

In [0]:
def update_df(df,keyval,columns):
  for x in tqdm_notebook(columns):
    if('batch_slices' in str(x)):
      df[x] = df['batch_slices'].map(keyval[x])
    else:
      df[x] = df['batch'].map(keyval[x])
  return df

In [0]:
def get_feat(df, replace=True, create_3_state_feat=True, rolling_feat=True, shift_feat=True):
  if create_3_state_feat:
    for x in tqdm_notebook(['batch','batch_slices','group']):
      df[f'{x}_backward_s1'] = df.groupby(x)['signal'].shift(1)
      df[f'{x}_backward_s2'] = df.groupby(x)['signal'].shift(2)
      df[f'{x}_backward_s3'] = df.groupby(x)['signal'].shift(3)

      df[f'{x}_backward_mean'] = np.mean(df[[f'{x}_backward_s1',f'{x}_backward_s2',f'{x}_backward_s3']],axis=1)
      df[f'{x}_backward_sum'] = np.sum(df[[f'{x}_backward_s1',f'{x}_backward_s2',f'{x}_backward_s3']],axis=1)
      df[f'{x}_backward_min'] = np.min(df[[f'{x}_backward_s1',f'{x}_backward_s2',f'{x}_backward_s3']],axis=1)
      df[f'{x}_backward_max'] = np.max(df[[f'{x}_backward_s1',f'{x}_backward_s2',f'{x}_backward_s3']],axis=1)

      df[f'{x}_forward_s1'] = df.groupby(x)['signal'].shift(-1)
      df[f'{x}_forward_s2'] = df.groupby(x)['signal'].shift(-2)
      df[f'{x}_forward_s3'] = df.groupby(x)['signal'].shift(-3)

      df[f'{x}_forward_mean'] = np.mean(df[[f'{x}_forward_s1',f'{x}_forward_s2',f'{x}_forward_s3']],axis=1)
      df[f'{x}_forward_sum'] = np.sum(df[[f'{x}_forward_s1',f'{x}_forward_s2',f'{x}_forward_s3']],axis=1)
      df[f'{x}_forward_min'] = np.min(df[[f'{x}_forward_s1',f'{x}_forward_s2',f'{x}_forward_s3']],axis=1)
      df[f'{x}_forward_max'] = np.max(df[[f'{x}_forward_s1',f'{x}_forward_s2',f'{x}_forward_s3']],axis=1)

  """if rolling_feat:
      for x in [50,100,5000,10000]:
        df[f'rolling_{x}_mean'] = df['signal'].rolling(x).mean()"""
  if replace:
    df.fillna(0,inplace=True)
  return df

# Helper Classes

In [0]:
class FastSignal():
  def __init__(self,df):
    self.df = df

  def create_mav(self):
    for x in ['batch']:
      self.df[f'{x}_mav'] = self.df.groupby(x)['signal'].transform(lambda x:np.mean(np.abs(x)))
    
  def create_iav(self):
    for x in ['batch']:  
      self.df[f'{x}_iav'] = self.df.groupby(x)['signal'].transform(lambda x: np.sum(np.abs(x)))

  def create_mav_slope(self):
    for x in ['batch']:
      self.df[f'{x}_mav_slope'] = self.df[f'{x}_mav'].shift(-1) - self.df[f'{x}_mav']
  
  def create_rms(self):
    for x in ['batch']:
      self.df[f'{x}_rms'] = self.df.groupby(x)['signal'].transform(lambda x:np.sqrt(np.mean(np.power(x,2))))
      
  def get_zero_crossing(self):
    self.df[f'batch_zer_crossing'] = self.df.groupby('batch')['signal'].apply(lambda x:np.sum(librosa.zero_crossings(np.array(x),pad=False)))

  def get_shape_factor(self):
    self.df[f'batch_shape_factor'] = self.df.groupby('batch')['signal'].transform(lambda x: np.divide(np.sqrt(np.mean(np.power(x,2))),np.mean(np.abs(x))))

  def get_crest_factor(self):
    self.df[f'batch_crest_factor'] = self.df.groupby('batch')['signal'].transform(lambda x: np.divide(np.max(np.abs(x)),np.sqrt(np.mean(np.power(x,2)))))  

  def get_margin_factor(self):
    for x in ['batch']:
      self.df[f'{x}_margin_factor'] = self.df.groupby(x)['signal'].transform(lambda x:np.divide(np.max(np.abs(x)), np.power(np.mean(np.sqrt(np.abs(x))),2)))

  def get_impulse_factor(self):
    self.df[f'batch_impulse_factor'] = self.df.groupby('batch')['signal'].transform(lambda x:np.divide(np.max(np.abs(x)), np.mean(np.abs(x))))

  def create_features(self):
    self.get_impulse_factor()
    self.get_margin_factor()
    self.get_shape_factor()
    self.get_crest_factor()
    self.get_zero_crossing()
    self.create_rms()
    self.create_mav()
    self.create_mav_slope()
    self.create_iav()

In [0]:
class IonDataset:
  pass

In [0]:
class DataPipeLine:
  def __init__(self):
    pass

  def get_batch(self,df):
    df.sort_values(by=['time'],inplace=True)
    df.index = ((df.time * 10_000) - 1).values
    df['batch'] = df.index // 50_000
    df['batch_slices'] = (df.index - (df.batch * 50_000)) // 5000
    return df

  def group_data(self,df_train,df_test,group):
    print(f"Grouping Data {group}")
    self.group = group
    data_groups = (df_train.query(f"group == {self.group}"),df_test.query(f"group == {self.group}"))
    return data_groups
    
  def get_signal_shifting(self,df,window_sizes):
    self.window_shift = window_sizes
    self.df = df
    for i in self.window_shift:
      i = int(i)
      print(f"Shifting Signals {i} size")
      self.df[f'signal_shift_{i}'] = self.df.signal.shift(i)
    return self.df

  def get_features_per_group(self,df,feat_per_batch = False):
    self.data = df
    for x in tqdm_notebook(self.data.group.unique()):
      d = {}
      if not feat_per_batch:
        for i in ['mean','median','std','var','min','max','sem','sum','cumsum','skew','mad']:
          d[f'group_{i}'] = self.data.groupby('group')['signal'].agg(i).to_dict()
        d[f'group_max_neg_signal'] = self.data[self.data['signal']<0].groupby('group')['signal'].min().to_dict()
        d[f'group_min_neg_signal'] = self.data[self.data['signal']<0].groupby('group')['signal'].max().to_dict()
        d[f'group_min_pos_signal'] = self.data[self.data['signal']>0].groupby('group')['signal'].min().to_dict()
        d[f'group_max_pos_signal'] = self.data[self.data['signal']>0].groupby('group')['signal'].max().to_dict()

      else:
        for i in ['batch','batch_slices']:
          for j in ['mean','median','std','var','min','max','sem','sum','cumsum','skew','mad']:
            d[f'group_{i}_{j}'] = self.data.groupby(['group',i])['signal'].agg(j).to_dict()
    return d
  
  def get_batch_feat(self,df):
    d = {}
    for x in tqdm_notebook(['batch','batch_slices']):
      for i in ['mean','median','std','var','min','max','sem','sum','cumsum','skew','mad']:
        d[f'{x}_{i}'] = df.groupby(x)['signal'].agg(i).to_dict()
    return d
  def get_nearest_feat(self,df):
    print("Creating Previous Features")
    df['c1'] = df.groupby('group')['signal'].shift(1) 
    df['c2'] = df.groupby('group')['signal'].shift(2) 
    df['c3'] = df.groupby('group')['signal'].shift(3)

    df['group_prev_3_sum'] = df[['c1','c2','c3']].sum(axis=1)
    df['group_prev_3_mean'] = df[['c1','c2','c3']].mean(axis=1)
    df['group_prev_3_min'] = df[['c1','c2','c3']].min(axis=1)
    df['group_prev_3_max'] = df[['c1','c2','c3']].max(axis=1)
    df['group_prev_3_std'] = df[['c1','c2','c3']].std(axis=1)
    df['group_prev_3_var'] = df[['c1','c2','c3']].var(axis=1)
    df['group_prev_3_mad'] = df[['c1','c2','c3']].mad(axis=1)

    print("Creating Forward Features")
    df['c1'] = df.groupby('group')['signal'].shift(-1) 
    df['c2'] = df.groupby('group')['signal'].shift(-2) 
    df['c3'] = df.groupby('group')['signal'].shift(-3)
    df['group_forw_3_sum'] = df[['c1','c2','c3']].sum(axis=1)
    df['group_forw_3_mean'] = df[['c1','c2','c3']].mean(axis=1)
    df['group_forw_3_min'] = df[['c1','c2','c3']].min(axis=1)
    df['group_forw_3_max'] = df[['c1','c2','c3']].max(axis=1)
    df['group_forw_3_std'] = df[['c1','c2','c3']].std(axis=1)
    df['group_forw_3_var'] = df[['c1','c2','c3']].var(axis=1)
    df['group_forw_3_mad'] = df[['c1','c2','c3']].mad(axis=1)
    df.drop(columns=['c1','c2','c3'],inplace=True)
    return df

pipe = DataPipeLine()

In [0]:
class LGB:
  def __init__(self,train_data,test_data,params,metric):
    self.train = train_data
    self.test = test_data
    self.params = params
    self.metric = metric

  def remove_cols(self,cols):
    print("Removing the inappropriate columns")
    self.used_cols = [x for x in self.train.columns if x not in cols]
    self.train = self.train[self.used_cols]
    self.used_cols.remove('open_channels')
    self.test = self.test[self.used_cols]

  def split_data(self,split_val,for_train=False,for_test=True):
    print("Splitting the data")
    if (for_train == for_test):
      raise "Data Cannot be splitted for both Train and Test Data"
    elif(for_test):
      self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.train[self.used_cols],self.train['open_channels'],test_size=split_val)
    else:
      self.x_train, self.x_val, self.y_train, self.y_val = train_test_split(self.train[self.used_cols],self.train['open_channels'],train_size=split_val)
  
  def get_params(self):
    return self.params

  def set_params(self,params):
    print("Updated the params values")
    self.params.update(params)

  def eval_metric(self,metric):
    self.eval_metric = metric

  def start_training(self):
    print("Started Training")
    self.model = lgb.train(self.params,
                           lgb.Dataset(self.x_train,self.y_train),
                           2000,lgb.Dataset(self.x_val,self.y_val),
                           verbose_eval=100,
                           early_stopping_rounds=500,feval=self.metric)
  def predict(self):
    print("Predictting the Unknown Data")
    predict_dict = {int(x):0 for x in self.test.index}
    self.predictions = self.model.predict(self.test)
    for (x,y) in enumerate(predict_dict.keys()):
      predict_dict[y] = self.predictions[x] 
    return predict_dict
    

In [0]:
class OptimizedRounder(object):
  def __int__(self):
    self.coef_ = 0
  
  def _MacroF1_loss(self,coef,X,y):
    X_p = pd.cut(X,[-np.inf] + list(np.sort(coef)) + [np.inf], labels=np.linspace(0,10,11).tolist())

    return -f1_score(y, X_p, average = 'macro')

  def fit(self, X, y):
    loss_partial = partial(self._MacroF1_loss,X=X, y=y)
    initial_coef = np.linspace(0,10,21).tolist()[:-1]
    self.coef_ = optimize.minimize(loss_partial,initial_coef,method='nelder-mead')

  def predict(self, X, coef):
    return pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels=np.linspace(0,10,11).tolist())

  def round_prediction(self,prediction,coefficients):
    prediction = np.array(prediction)
    prediction[prediction <= coefficients[0]] = 0
    prediction[np.where(np.logical_and(prediction > coefficients[0], prediction <= coefficients[1]))] = 1
    prediction[np.where(np.logical_and(prediction > coefficients[1], prediction <= coefficients[2]))] = 2
    prediction[np.where(np.logical_and(prediction > coefficients[2], prediction <= coefficients[3]))] = 3
    prediction[np.where(np.logical_and(prediction > coefficients[3], prediction <= coefficients[4]))] = 4
    prediction[np.where(np.logical_and(prediction > coefficients[4], prediction <= coefficients[5]))] = 5
    prediction[np.where(np.logical_and(prediction > coefficients[5], prediction <= coefficients[6]))] = 6
    prediction[np.where(np.logical_and(prediction > coefficients[6], prediction <= coefficients[7]))] = 7
    prediction[np.where(np.logical_and(prediction > coefficients[7], prediction <= coefficients[8]))] = 8
    prediction[np.where(np.logical_and(prediction > coefficients[8], prediction <= coefficients[9]))] = 9
    prediction[prediction > coefficients[9]] = 10
    
    return prediction

In [0]:
class macroF1(Callback):
    def __init__(self, model, inputs, targets):
        self.model = model
        self.inputs = inputs
        self.targets = np.argmax(targets, axis=2).reshape(-1)

    def on_epoch_end(self, epoch, logs):
        pred = np.argmax(self.model.predict(self.inputs), axis=2).reshape(-1)
        f1_val = f1_score(self.targets, pred, average="macro")
        print("val_f1_macro_score: ", f1_val)
                
def model_fit(model, train_inputs, train_targets, val_inputs, val_targets, n_epoch, batch_size=32):
    hist = model.fit_generator(
        Datagen(train_inputs, train_targets, batch_size, is_train=True),
        steps_per_epoch = len(train_inputs) // batch_size,
        epochs = n_epoch,
        validation_data=Datagen(val_inputs, val_targets, batch_size),
        validation_steps = len(val_inputs) // batch_size,
        callbacks = [lr_schedule, macroF1(model, val_inputs, val_targets)],
        shuffle = False,
        verbose = 1
        )
    return hist


def lrs(epoch):
    if epoch<35:
        lr = learning_rate
    elif epoch<50:
        lr = learning_rate/10
    else:
        lr = learning_rate/100
    return lr

# Loading the data

In [0]:
train = pd.read_csv('train_clean.csv')
test = pd.read_csv('test_clean.csv')
submission = pd.read_csv('sample_submission.csv.zip')

test.reset_index(drop=True,inplace=True)

#### Sampling

In [0]:
pipe = DataPipeLine()

In [0]:
train = pipe.get_batch(train)
test = pipe.get_batch(test)

In [0]:
train['group'] = -1
x = [(0,500000),(1000000,1500000),(1500000,2000000),(2500000,3000000),(2000000,2500000)]
for k in range(5): train.loc[x[k][0]:x[k][1],'group'] = k
#train = train[train['group'] != -1]

In [0]:
test['group'] = -1
x = [[(0,100000),(300000,400000),(800000,900000),(1000000,2000000)],[(400000,500000)], 
     [(100000,200000),(900000,1000000)],[(200000,300000),(600000,700000)],[(500000,600000),(700000,800000)]]
for k in range(5):
    for j in range(len(x[k])): test.iloc[x[k][j][0]:x[k][j][1],4] = k

In [0]:
train.shape
test.shape

In [0]:
gc.collect()

# Data Generation

#### OOP Way

In [0]:
train = pipe.get_signal_shifting(train,(np.linspace(-3,3,7)))

agg_group_feat = pipe.get_features_per_group(train)
for x in tqdm_notebook(agg_group_feat.keys()):
  train[x] = train['group'].map(agg_group_feat[x])
batch_features = pipe.get_batch_feat(train)
for x in tqdm_notebook(batch_features.keys()):
  if 'batch_slices' in x:
    train[x] = train['batch_slices'].map(batch_features[x])
  else:
    train[x] = train['batch'].map(batch_features[x])
train = pipe.get_nearest_feat(train)

In [0]:
test = pipe.get_signal_shifting(test,(np.linspace(-3,3,7)))
agg_group_feat = pipe.get_features_per_group(test)
for x in tqdm_notebook(agg_group_feat.keys()):
  test[x] = test['group'].map(agg_group_feat[x])
batch_features = pipe.get_batch_feat(test)
for x in tqdm_notebook(batch_features.keys()):
  if 'batch_slices' in x:
    test[x] = test['batch_slices'].map(batch_features[x])
  else:
    test[x] = test['batch'].map(batch_features[x])
test = pipe.get_nearest_feat(test)

In [0]:
for df in tqdm_notebook([train,test]):
  for x in ['batch','batch_slices']:
    df[f'{x}_range'] = df[f'{x}_max'] - df[f'{x}_min']
    df[f'{x}_min_max_ratio'] = df[f'{x}_max']/df[f'{x}_min']
    df[f'{x}_abs_max'] = np.absolute(df[f'{x}_max'])
    df[f'{x}_abs_min'] = np.absolute(df[f'{x}_min'])
    df[f'{x}_abs_avg'] = (df[f'{x}_abs_max'] + df[f'{x}_abs_min'])//2 

In [0]:
columns = train.columns[6:]
for df in [train,test]:
  for x in tqdm_notebook(columns):
    df[str(x)+'_msig'] = df[str(x)] - df['signal']

In [0]:
train.fillna(0,inplace=True)
test.fillna(0,inplace=True)

In [0]:
data_groups = []
for i in [0,1,2,3,4]:
  data_groups.append(pipe.group_data(train,test,i))

In [0]:
gc.collect()
assert train.shape[1]-1 == test.shape[1],"Train and Test shape doesn't match"

In [0]:
[x for x in train.columns if x not in test.columns]

In [0]:
train.head()
test.head()

In [0]:
a = int(train.shape[0] - train.shape[0]*0.1)
a

In [0]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [0]:
params = {'learning_rate': 0.07, 
          'max_depth': -1, 
          'num_leaves': 200,
          'metric': 'logloss', 
          'random_state': 7, 
          'n_jobs':-1, 
          'sample_fraction':0.33}
gc.collect()

In [0]:
for i in range(0,5):
  print(data_groups[i][0].group.unique(),
        data_groups[i][1].group.unique())

In [0]:
cols = ['time', 'batch', 'batch_index', 'batch_slices', 'batch_slices2']
model_group_0 = LGB(data_groups[0][0],data_groups[0][1],params,MacroF1Metric)
model_group_1 = LGB(data_groups[1][0],data_groups[1][1],params,MacroF1Metric)
model_group_2 = LGB(data_groups[2][0],data_groups[2][1],params,MacroF1Metric)
model_group_3 = LGB(data_groups[3][0],data_groups[3][1],params,MacroF1Metric)
model_group_4 = LGB(data_groups[4][0],data_groups[4][1],params,MacroF1Metric)

In [0]:
model_group_0.remove_cols(cols)
model_group_1.remove_cols(cols)
model_group_2.remove_cols(cols)
model_group_3.remove_cols(cols)
model_group_4.remove_cols(cols)

In [0]:
SPLIT = 0.15
model_group_0.split_data(SPLIT)
model_group_1.split_data(SPLIT)
model_group_2.split_data(SPLIT)
model_group_3.split_data(SPLIT)
model_group_4.split_data(SPLIT)

In [0]:
predictions_dict = {x:0 for x in test.index}
len(predictions_dict.values())

In [0]:
model_group_0.start_training()
predictions_dict.update(model_group_0.predict())

In [0]:
model_group_1.start_training()
predictions_dict.update(model_group_1.predict())
model_group_2.start_training()
predictions_dict.update(model_group_2.predict())
model_group_3.start_training()
predictions_dict.update(model_group_3.predict())
model_group_4.start_training()
predictions_dict.update(model_group_4.predict())

In [0]:
preds = [x for (y,x) in predictions_dict.items()]

In [0]:
round_pred = np.round(np.clip(preds[:2000000], 0, 10)).astype(int)
sub['open_channels'] = round_pred
sub['open_channels'].value_counts()
sub.to_csv('submission.csv', index=False, float_format='%.4f')

#### Functional Way

In [0]:
train, test = get_group_channel_agg(train,test)

In [0]:
train.fillna(0,inplace=True)
test.fillna(0,inplace=True)

In [0]:
train = get_feat(train,replace=True,create_3_state_feat=True,rolling_feat=True,shift_feat=True)
gc.collect()

In [0]:
test = get_feat(test,replace=True,create_3_state_feat=True,rolling_feat=True,shift_feat=True)
gc.collect()

In [0]:
train = get_whole_shift_window_feat(train,np.linspace(-10,10,21,dtype=np.int))
test = get_whole_shift_window_feat(test,np.linspace(-10,10,21,dtype=np.int))
gc.collect()

In [0]:
train = create_whole_3_window_feat(train)
gc.collect()

In [0]:
test = create_whole_3_window_feat(test)
gc.collect()

In [0]:
"""train = get_whole_rolling_feat(train,[10,50,100,1000,10000])
gc.collect()
test = get_whole_rolling_feat(test,[10,50,100,1000,10000])
gc.collect()"""

In [0]:
train.fillna(0,inplace=True)
gc.collect()

In [0]:
test.fillna(0,inplace=True)
gc.collect()

In [0]:
sub = list(set(x for x in train.columns if 'batch_slices' in x).union(set(x for x in train.columns if 'batch' in x)))
for x in tqdm_notebook(sub):
  train[f'{x}_msignal'] = train[x] - train['signal']

for x in tqdm_notebook(sub):
  test[f'{x}_msignal'] = test[x] - test['signal']

In [0]:
train = reduce_mem_usage(train)
test = reduce_mem_usage(test)

In [0]:
print(f'train shape => {train.shape}')
print(f'test shape => {test.shape}')

In [0]:
train.head()
test.head()

# Training

#### If save data

In [0]:
SAVE = False
if SAVE:
  train.to_csv("train_all_feat.csv",index=False)
  test.to_csv("test_all_feat.csv",index=False)

#### Splitting the data

In [0]:
def 

In [0]:
split_value = train.shape[0] - train.shape[0]*0.1
split_value

In [0]:
use_cols = [x for x in train.columns if x not in ['time', 'open_channels', 'batch', 'batch_slices', 'group']]

In [0]:
x_train,x_val,y_train,y_val = train_test_split(train[use_cols],train['open_channels'],test_size=0.15)
print(f'x_train shape => {x_train.shape[0]}, y_train shape => {y_train.shape}')

In [0]:
params = {'learning_rate': 0.08, 
          'max_depth': -1, 
          'num_leaves': 250,
          'metric': 'rmse', 
          'random_state': 7, 
          'n_jobs':-1, 
          'sample_fraction':0.43}

gc.collect()

# Model

## LightGBM Algorithm

In [0]:
model = lgb.train(params, lgb.Dataset(x_train, y_train), 2000,  lgb.Dataset(x_val, y_val), verbose_eval=50, early_stopping_rounds=500, feval=MacroF1Metric)

In [0]:
SAVE_MODEL = False
if SAVE_MODEL:
  model.save_model('lgb_cv_93844.txt')

In [0]:
preds = model.predict(test[use_cols], num_iteration=model.best_iteration)

## Unet Architecture

In [0]:
K.clear_session()
model = Unet()
print(model.summary())

learning_rate=0.005
n_epoch=60
batch_size=32

lr_schedule = LearningRateScheduler(lrs)

#classifier
model.compile(loss=categorical_crossentropy, 
              optimizer=Adam(lr=learning_rate), 
              metrics=["accuracy"])

hist = model_fit(model, train_input, train_target, val_input, val_target, n_epoch, batch_size)

# Submtting the Predictions

In [0]:
round_pred = np.round(np.clip(preds, 0, 10)).astype(int)
submission['open_channels'] = round_pred
submission['open_channels'].value_counts()

In [0]:
submission.to_csv('submission_2.csv', index=False, float_format='%.4f')

In [0]:
!kaggle competitions submit -c liverpool-ion-switching -f submission_2.csv -m "0.93844 "

# Feature Importance Scale

In [0]:
len(train.columns)

In [0]:
lgb.plot_importance(model,importance_type='split', max_num_features=30)

# ANALYSIS

#### For Train Data

In [0]:
train.head()
train2 = train.copy()

In [0]:
for x in train.group.unique():
  plt.plot(train[train['group'] == x]['signal'][::100],'.',label=f'signal group {x}')
plt.plot(train['group'],label='groups',color='black')
for i in range(10):
  plt.plot([i*5e5,i*5e5],[-4,12],'r')
for i in range(10):
  plt.text(i*5e5+2e5,10,str(i+1),size=20)
plt.yticks(np.linspace(-4,12,17))
plt.legend()
plt.grid(True)
plt.show()

In [0]:
%matplotlib inline
plt.figure(figsize=(19,6))
res = 1000
plt.plot(range(0,train.shape[0],res),train['open_channels'][0::res])
plt.yticks([0,1,2,3,4,5,6,7,8,9,10])
for i in range(10):
  plt.plot([i*5e5,i*5e5],[0,10],'r')
for i in range(10):
  plt.text(i*500000+200000,10,str(i+1),size=20)
plt.grid()
plt.show()

In [0]:
%matplotlib inline
plt.figure(figsize=(19,5)) 
res = 1000
plt.plot(range(0,train.shape[0],res),train['signal'][0::res])
for i in range(10):
  plt.plot([i*500000,i*500000],[-4,12],'r')
for j in range(10):
    plt.plot([j*100000,j*100000],[-4,12],'r:')
for i in range(10):
  plt.text(i*500000+200000,10,str(i+1),size=20)
plt.xticks(np.linspace(0,5e6,11))
plt.grid()
plt.show()

In [0]:
df1 = train[train['signal'] == -1.817]
df2 = train[train['signal'] == 3.186]
df3 = train[train['signal'] == 325]

plt.plot(train['signal'][::1000],alpha=1)
plt.plot(df1['signal'],alpha=1,label='df1')
plt.plot(df2['signal'],alpha=1,label='df2')
plt.plot(df3['signal'],alpha=1,label='df3')
plt.legend()

In [0]:
slope, intercept, rval, pval, stderr = stats.linregress(train['time'][5e5:6e5],np.array(pd.Series(train['signal'][5e5:6e5].rolling(1000).mean()).replace(np.nan,0)))
print(slope," ",intercept)

In [0]:
train2 = train.copy()
a = 5e5;b1=6e5;b2=1e6
train2.loc[a:b1,'signal'] = train2.loc[a:b1,'signal'] - ((np.ceil(slope*10)/10)*(train2.loc[a:b1,'time']) - 15)
plt.figure(figsize=(19,5))
plt.plot(train2.loc[a:b2,'signal'])

In [0]:
plt.figure(figsize=(19,5))
plt.title("Signals with Drift in Batch 2",color='white',size=20)
plt.plot(train['signal'][::1000])
for i in range(10):
  plt.plot([i*5e5,i*5e5],[-4,12],'r')

plt.figure(figsize=(19,5))
plt.title("Signals without Drift in Batch 2",color='white',size=20)
plt.plot(train2['signal'][::1000])
for i in range(10):
  plt.plot([i*5e5,i*5e5],[-4,12],'r')
plt.show()

In [0]:
plt.plot(train[train['batch'] == 6]['signal'][::100].rolling(1000).mean())

In [0]:
train_c = pd.read_csv('train_clean.csv')
train_c = get_batch(train_c)
train_c.head()

In [0]:
sns.distplot(train[train['batch'] == 6]['signal'],label='not cleaned')
sns.distplot(train_c[train_c['batch'] == 6]['signal'],label='cleaned')
plt.legend()
plt.show()

In [0]:
plt.figure(figsize=(20,10))
for i in [0,3,4,5,6]:
  sns.distplot(train[train['batch'] == i]['signal'][::1000],label=f'batch {i}')
plt.legend()
plt.show()

In [0]:
sns.distplot(train_c['signal'],label='cleaned')
sns.distplot(train['signal'],label='not cleaned')
plt.legend()

In [0]:
#train['drift_dis'] = train['signal'] - train_c['signal']
plt.plot(train_c['signal'][::10000])

In [0]:
plt.figure(figsize=(50,5))
sns.scatterplot(train.index,train['open_channels'])

In [0]:
train = pd.read_csv('train_clean.csv')
test = pd.read_csv('test_clean.csv')

In [0]:
train = get_batch(train)
test = get_batch(test)

In [0]:
vals_gauss = signal.gausspulse(np.array(train[train['batch'] == 3]['signal'][::100]))
vals_org = train[train['batch'] == 3]['signal'][::100].values
vals_chnls = train[train['batch'] == 3]['open_channels'][::100].values
plt.plot(vals_org,label='org data')
plt.plot(vals_gauss,label='gausspulse')
plt.plot(vals_chnls,label='channels')
plt.legend()

In [0]:
#vals = np.array(train['signal']) * 0.3
plt.plot(train['signal'],'o',label='current')
plt.plot(vals,'^',label='volatge')
plt.plot(train['open_channels'],label='channels')
plt.legend()

In [0]:
plt.plot(train['signal'],label='signal')
plt.plot(train['open_channels'],label='channels')
plt.legend()

In [0]:
plt.plot(train['signal'],'+',label='signal')
plt.plot(train['open_channels'],'.',label='channels')
plt.xticks(np.linspace(0,5e6,21))
plt.legend()
plt.show()

In [0]:
plt.plot(train['signal'][::1000])

In [0]:
a = np.sqrt(((4*1.308*10e-23)*230*10*10e3/10e9))
train['c'] = a/train['signal']
plt.plot(train['c'],label='noise')

In [0]:
for i in range(11):
  print(i," => ",train[train['batch'] == i]['signal'].mean(),train[train['batch'] == i]['signal'].std())

In [0]:
plt.plot(train[train['batch'] == 3]['signal'][::100])

In [0]:
i=7
plt.scatter(np.roll(train[train['batch'] == i]['signal'],-1),train[train['batch'] == i]['signal'],s=0.01,label=f'signal {i}')
plt.legend()
plt.grid()
plt.show()
print(train[train['batch'] == i]['open_channels'].value_counts(normalize=True))

In [0]:
plt.plot(train[train['batch'] == 7]['signal'],label='signal')
plt.legend()
plt.show()

In [0]:
def smooth(x,window_len=11,window='hanning'):
    s=np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
    #print(len(s))
    if window == 'flat': #moving average
        w=np.ones(window_len,'d')
    else:
        w=eval('np.'+window+'(window_len)')

    y=np.convolve(w/w.sum(),s,mode='valid')
    return y


In [0]:
a = train.signal.shift(-1) - train.signal

In [0]:
0.19*np.sqrt(2)

In [0]:
train['signal'][:10]
a[:10]

In [0]:
a = np.where(train['signal']<0,train['signal'] + 0.19,train['signal'] - 0.19)
plt.plot(train.signal)
plt.figure(figsize=(19,5))
plt.plot(a[::1000])

In [0]:
plt.plot(train['signal'][:500].rolling(50).mean(),label='original')

#### For Test Data

In [0]:
test2 = test.copy()

In [0]:
plt.figure(figsize=(20,5))
let = ['A','B','C','D','E','F','G','H','I','J']
r = test.signal.rolling(30000).mean()
plt.plot(test.time.values,r)
for i in range(21): plt.plot([500+i*10,500+i*10],[-3,6],'r:')
for i in range(5): plt.plot([500+i*50,500+i*50],[-3,6],'r')
for k in range(4): plt.text(525+k*50,5.5,str(k+1),size=20)
for k in range(10): plt.text(505+k*10,4,let[k],size=16)
plt.show()

In [0]:
a = 0;b=100000
slope, intercept, _, _, _ = stats.linregress(test.loc[test.index[a:b],'time'], test2.signal.values[a:b])

In [0]:
test2.loc[a:b,'signal'] = test2.loc[a:b,'signal'] - (np.round(slope,2)*test2.loc[a:b,'time']) + intercept
plt.plot(test2['signal'])

In [0]:
plt.plot(test['signal'].rolling(1000).mean())

# Garbage

In [0]:
%%capture
"""def get_group_channel_agg(train,test):
  groups = train.group.unique().tolist()
  groups.sort()
  group_max = {}
  group_min = {}
  group_mean = {}
  group_std = {}
  group_var = {}
  for i in tqdm_notebook(groups):
    group_mean[i] = {}
    group_mean[i].update(train[train['group'] == i].groupby('open_channels')['signal'].mean().to_dict())

    group_max[i] = {}
    group_max[i].update(train[train['group'] == i].groupby('open_channels')['signal'].max().to_dict())

    group_min[i] = {}
    group_min[i].update(train[train['group'] == i].groupby('open_channels')['signal'].min().to_dict())

    group_std[i] = {}
    group_std[i].update(train[train['group'] == i].groupby('open_channels')['signal'].std().to_dict())

    group_var[i] = {}
    group_var[i].update(train[train['group'] == i].groupby('open_channels')['signal'].var().to_dict())

  for df in tqdm_notebook([train,test]):
    for x in groups:
      df[f'group_{x}_channel_sig_mean'] = df['group'].map(group_mean[x])
      df[f'group_{x}_channel_sig_max'] = df['group'].map(group_max[x])
      df[f'group_{x}_channel_sig_min'] = df['group'].map(group_min[x])
      df[f'group_{x}_channel_sig_std'] = df['group'].map(group_std[x])
      df[f'group_{x}_channel_sig_var'] = df['group'].map(group_mean[x])

  return train, test"""

In [0]:
%%capture
"""for x in tqdm_notebook(train.columns[6:57]):
  train[f'{x}_msignal'] = train[x] - train['signal']

for x in tqdm_notebook(train.columns[6:57]):
  test[f'{x}_msignal'] = test[x] - test['signal']

for x in tqdm_notebook(train.columns[57:108]):
  train[f'{x}_msignal'] = train[x] - train['signal']

for x in tqdm_notebook(train.columns[57:108]):
  test[f'{x}_msignal'] = test[x] - test['signal']"""

In [0]:
def get_features(df):
    df['rolling_signal_mean'] = df['signal'].rolling(10,min_periods=1).mean()
    df['rolling_signal_median'] = df['signal'].rolling(10,min_periods=1).median()

    for x in tqdm_notebook(['batch','batch_slices2']):
      sig_mean = df.groupby(x)['signal'].mean().to_dict()
      sig_median = df.groupby(x)['signal'].median().to_dict()
      sig_std = df.groupby(x)['signal'].std().to_dict()
      sig_max = df.groupby(x)['signal'].max().to_dict()
      sig_min = df.groupby(x)['signal'].min().to_dict()
      sig_mean_abs_diff = df.groupby(x)['signal'].apply(lambda x: np.mean(np.absolute(np.diff(x)))).to_dict()
      sig_mean_non_abs_diff = df.groupby(x)['signal'].apply(lambda x:np.mean(np.diff(x))).to_dict()
      sig_skew = df.groupby(x)['signal'].skew().to_dict()
      sig_kurt = df.groupby(x)['signal'].apply(pd.DataFrame.kurt).to_dict()
      sig_kurt_zscore = {x:y[0] for (x,y) in df.groupby(x)['signal'].apply(stats.kurtosistest).to_dict().items()}
      sig_kurt_pvalue = {x:y[0] for (x,y) in df.groupby(x)['signal'].apply(stats.kurtosistest).to_dict().items()}
      sig_sem = df.groupby(x)['signal'].apply(stats.sem).to_dict()
      sig_skew_zscore = {x:y[0] for (x,y) in df.groupby(x)['signal'].apply(stats.skewtest).to_dict().items()}
      sig_skew_pvalue = {x:y[1] for (x,y) in df.groupby(x)['signal'].apply(stats.skewtest).to_dict().items()}
      sig_zscore_mean = {x:np.mean(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_zscore_max = {x:np.max(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_zscore_min = {x:np.min(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_zscore_std = {x:np.std(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_zscore_var = {x:np.var(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_zscore_median = {x:np.median(y) for (x,y) in df.groupby(x)['signal'].apply(stats.zscore).to_dict().items()}
      sig_relfreq_freq_mean = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_freq_median = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_freq_max = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_freq_min = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_freq_std = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_freq_var = {x:np.mean(y[0]) for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_lower_limit = {x:y[1] for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}
      sig_relfreq_binsize = {x:y[2] for (x,y) in df.groupby(x)['signal'].apply(stats.relfreq).to_dict().items()}

      sig_variance = df.groupby(x)['signal'].var().to_dict()

      if x is not 'batch':
        sig_mad = df.groupby(x)['signal'].apply(mean_abs_dev).to_dict()
        sig_sum = df.groupby(x)['signal'].sum().to_dict()
        df[f'{x}_sig_mad'] = df[x].map(sig_mad)
        df[f'{x}_sig_sum'] = df[x].map(sig_sum)

      df[f'{x}_sig_mean'] =  df[x].map(sig_mean)
      df[f'{x}_sig_median'] =  df[x].map(sig_median)
      df[f'{x}_sig_std'] = df[x].map(sig_std)
      df[f'{x}_sig_max'] =  df[x].map(sig_max)
      df[f'{x}_sig_min'] =  df[x].map(sig_min)
      df[f'{x}_sig_abs_max'] = 0 - df[f'{x}_sig_min']
      df[f'{x}_sig_abs_min'] = 0 - df[f'{x}_sig_max']
      df[f'{x}_sig_mean_abs_diff'] = df[x].map(sig_mean_abs_diff)
      df[f'{x}_sig_mean_non_abs_diff'] = df[x].map(sig_mean_non_abs_diff)
      df[f'{x}_range'] = df[f'{x}_sig_max'] - df[f'{x}_sig_min']
      df[f'{x}_max_by_min'] = df[f'{x}_sig_max'] / df[f'{x}_sig_min']
      df[f'{x}_abs_min_max_avg'] = (df[f'{x}_sig_abs_max'] + df[f'{x}_sig_abs_min'])/2.0
      df[f'{x}_min_max_avg'] = (df[f'{x}_sig_max'] + df[f'{x}_sig_min'])/2.0
      df[f'{x}_sig_shift_pos'] = df.groupby(x)['signal'].shift()
      df[f'{x}_sig_shift_neg'] = df.groupby(x)['signal'].shift(-1)
      df[f'{x}_max_to_abs_min_diff'] = df[f'{x}_sig_max'] - np.absolute(df[f'{x}_sig_min'])
      df[f'{x}_sig_kurtosis'] = df[x].map(sig_kurt)
      df[f'{x}_sig_skew'] = df[x].map(sig_skew)
      df[f'{x}_sig_kurt_zscore'] = df[x].map(sig_kurt_zscore)
      df[f'{x}_sig_kurt_pvalue'] = df[x].map(sig_kurt_pvalue)
      df[f'{x}_sig_skew_zscore'] = df[x].map(sig_skew_zscore)
      df[f'{x}_sig_skew_pvalue'] = df[x].map(sig_skew_pvalue)
      df[f'{x}_sig_sem'] = df[x].map(sig_sem)
      df[f'{x}_sig_zscore_mean'] = df[x].map(sig_zscore_mean)
      df[f'{x}_sig_zscore_min'] = df[x].map(sig_zscore_min)
      df[f'{x}_sig_zscore_max'] = df[x].map(sig_zscore_max)
      df[f'{x}_sig_zscore_median'] = df[x].map(sig_zscore_median)
      df[f'{x}_sig_zscore_std'] = df[x].map(sig_zscore_std)
      df[f'{x}_sig_zscore_var'] = df[x].map(sig_zscore_var)
      df[f'{x}_sig_relfreq_freq_mean'] = df[x].map(sig_relfreq_freq_mean)
      df[f'{x}_sig_relfreq_freq_min'] = df[x].map(sig_relfreq_freq_min)
      df[f'{x}_sig_relfreq_freq_max'] = df[x].map(sig_relfreq_freq_max)
      df[f'{x}_sig_relfreq_freq_std'] = df[x].map(sig_relfreq_freq_std)
      df[f'{x}_sig_relfreq_freq_median'] = df[x].map(sig_relfreq_freq_median)
      df[f'{x}_sig_relfreq_freq_var'] = df[x].map(sig_relfreq_freq_var)
      df[f'{x}sig_relfreq_lower_limit'] = df[x].map(sig_relfreq_lower_limit)
      df[f'{x}_sig_relfreq_binsize'] = df[x].map(sig_relfreq_binsize)

      
      if x is not 'batch':
        df[f'{x}_abs_min_range_from_mean'] = df[f'{x}_sig_abs_min'] - df[f'{x}_sig_mean']
        df[f'{x}_abs_max_range_from_mean'] = df[f'{x}_sig_abs_max'] - df[f'{x}_sig_mean']
        df[f'{x}_abs_min_range_from_mad'] = df[f'{x}_sig_abs_min'] - df[f'{x}_sig_mad']
        df[f'{x}_abs_max_range_from_mad'] = df[f'{x}_sig_abs_max'] - df[f'{x}_sig_mad']


    df['signal_rolling_10_sum'] = df['signal'].rolling(10,min_periods=1).sum()
    df['signal_rolling_10_mean'] = df['signal'].rolling(10,min_periods=1).mean()
    df['signal_shift_pos'] = df['signal'].shift()
    df['signal_shift_neg'] = df['signal'].shift(-1)

    for c in [c for c in df.columns if c not in ['time','signal','open_channels','batch_index','batch','batch_slices2','batch_slices']]:
      df[f'{c}_msignal'] = df[c] - df['signal']
    return df