In [1]:
from typing import List, NoReturn, Union, Tuple, Optional, Text, Generic, Callable, Dict

import numpy as np 
import pandas as pd
from sklearn import *
import lightgbm as lgb
import optuna.integration.lightgbm as lgbopt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression

import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
sns.set()

pd.set_option("display.precision", 8)

from functools import partial
import scipy as sp

import os
from sklearn.metrics import f1_score

import pywt 
from statsmodels.robust import mad

import scipy
from scipy import signal
from scipy.signal import butter, deconvolve, find_peaks, peak_widths, peak_prominences

from numpy.fft import *

import time
import math
from numba import jit
from math import log, floor
from sklearn.neighbors import KDTree

import itertools
import warnings
import time
import pywt
import os
import gc

train = pd.read_csv('/Users/siero5335/channel/train.csv')
test = pd.read_csv('/Users/siero5335/channel/test.csv')
train2 = pd.read_csv('/Users/siero5335/channel/train2.csv')
test2 = pd.read_csv('/Users/siero5335/channel/test2.csv')


sample_submission = pd.read_csv('/Users/siero5335/channel/sample_submission.csv')

train2 = train2.iloc[:,1]
test2 = test2.iloc[:,1]

train = pd.concat([train, train2], axis=1)
test = pd.concat([test, test2], axis=1)

In [2]:
train = train.drop('signal', axis = 1)
test = test.drop('signal', axis = 1)

train = train.rename(columns={'signal_chris': 'signal'})
test = test.rename(columns={'signal_chris': 'signal'})

del train2, test2
gc.collect()

37

In [3]:
def maddest(d, axis=None):
    return np.mean(np.absolute(d - np.mean(d, axis)), axis)

def high_pass_filter(x, low_cutoff=1000, sample_rate=10000):

    nyquist = 0.5 * sample_rate
    norm_low_cutoff = low_cutoff / nyquist
    print(norm_low_cutoff)
    sos = butter(10, Wn=[norm_low_cutoff], btype='highpass', output='sos')
    filtered_sig = signal.sosfilt(sos, x)

    return filtered_sig

def denoise_signal( x, wavelet='db4', level=1):
    
    coeff = pywt.wavedec( x, wavelet, mode="per" )
    sigma = (1/0.6745) * maddest( coeff[-level] )
    uthresh = sigma * np.sqrt( 2*np.log( len( x ) ) )
    coeff[1:] = ( pywt.threshold( i, value=uthresh, mode='hard' ) for i in coeff[1:] )
    return pywt.waverec( coeff, wavelet, mode='per' )

train['signal_wave'] = denoise_signal(train['signal'])
test['signal_wave'] = denoise_signal(test['signal'])

def filter_signal(signal, threshold=1e8):
    fourier = rfft(signal)
    frequencies = rfftfreq(signal.size, d=20e-3/signal.size)
    fourier[frequencies > threshold] = 0
    return irfft(fourier)

train['signal_FFT_1e5'] = filter_signal(train['signal'], threshold=5e3)
test['signal_FFT_1e5'] = filter_signal(test['signal'], threshold=5e3)

In [4]:
def _embed(x, order=3, delay=1):
    N = len(x)
    if order * delay > N:
        raise ValueError("Error: order * delay should be lower than x.size")
    if delay < 1:
        raise ValueError("Delay has to be at least 1.")
    if order < 2:
        raise ValueError("Order has to be at least 2.")
    Y = np.zeros((order, N - (order - 1) * delay))
    for i in range(order):
        Y[i] = x[i * delay:i * delay + Y.shape[1]]
    return Y.T

all = ['perm_entropy', 'spectral_entropy', 'svd_entropy', 'app_entropy',
       'sample_entropy']


def perm_entropy(x, order=3, delay=1, normalize=False):
    x = np.array(x)
    ran_order = range(order)
    hashmult = np.power(order, ran_order)
    # Embed x and sort the order of permutations
    sorted_idx = _embed(x, order=order, delay=delay).argsort(kind='quicksort')
    # Associate unique integer to each permutations
    hashval = (np.multiply(sorted_idx, hashmult)).sum(1)
    # Return the counts
    _, c = np.unique(hashval, return_counts=True)
    # Use np.true_divide for Python 2 compatibility
    p = np.true_divide(c, c.sum())
    pe = -np.multiply(p, np.log2(p)).sum()
    if normalize:
        pe /= np.log2(factorial(order))
    return pe

In [5]:
def _log_n(min_n, max_n, factor):
    max_i = int(floor(log(1.0 * max_n / min_n) / log(factor)))
    ns = [min_n]
    for i in range(max_i + 1):
        n = int(floor(min_n * (factor ** i)))
        if n > ns[-1]:
            ns.append(n)
    return np.array(ns, dtype=np.int64)

def _higuchi_fd(x, kmax):
    n_times = x.size
    lk = np.empty(kmax)
    x_reg = np.empty(kmax)
    y_reg = np.empty(kmax)
    for k in range(1, kmax + 1):
        lm = np.empty((k,))
        for m in range(k):
            ll = 0
            n_max = floor((n_times - m - 1) / k)
            n_max = int(n_max)
            for j in range(1, n_max):
                ll += abs(x[m + j * k] - x[m + (j - 1) * k])
            ll /= k
            ll *= (n_times - 1) / (k * n_max)
            lm[m] = ll
        # Mean of lm
        m_lm = 0
        for m in range(k):
            m_lm += lm[m]
        m_lm /= k
        lk[k - 1] = m_lm
        x_reg[k - 1] = log(1. / k)
        y_reg[k - 1] = log(m_lm)
    higuchi, _ = _linear_regression(x_reg, y_reg)
    return higuchi


def higuchi_fd(x, kmax=10):
    x = np.asarray(x, dtype=np.float64)
    kmax = int(kmax)
    return _higuchi_fd(x, kmax)

def _linear_regression(x, y):
    n_times = x.size
    sx2 = 0
    sx = 0
    sy = 0
    sxy = 0
    for j in range(n_times):
        sx2 += x[j] ** 2
        sx += x[j]
        sxy += x[j] * y[j]
        sy += y[j]
    den = n_times * sx2 - (sx ** 2)
    num = n_times * sxy - sx * sy
    slope = num / den
    intercept = np.mean(y) - slope * np.mean(x)
    return slope, intercept

def katz_fd(x):
    x = np.array(x)
    dists = np.abs(np.ediff1d(x))
    ll = dists.sum()
    ln = np.log10(np.divide(ll, dists.mean()))
    aux_d = x - x[0]
    d = np.max(np.abs(aux_d[1:]))
    return np.divide(ln, np.add(ln, np.log10(np.divide(d, ll))))

In [6]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        if col!='open_channels':
            col_type = df[col].dtypes
            if col_type in numerics:
                c_min = df[col].min()
                c_max = df[col].max()
                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        df[col] = df[col].astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        df[col] = df[col].astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        df[col] = df[col].astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        df[col] = df[col].astype(np.int64)  
                else:
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        df[col] = df[col].astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        df[col] = df[col].astype(np.float32)
                    else:
                        df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [7]:
def p1( x : pd.Series) -> pd.Series : return x.quantile(0.01)
def p5(x : pd.Series) -> pd.Series : return x.quantile(0.05)
def p95( x : pd.Series) -> pd.Series : return x.quantile(0.95)
def p99(x : pd.Series) -> pd.Series : return x.quantile(0.99)

In [8]:
zscore = lambda x: (x - x.mean()) / x.std()

window_sizes = [5, 10, 25, 50, 100, 500, 1000, 5000]

for window in window_sizes:
    train["rolling_mean_" + str(window)] = train['signal'].rolling(window=window).mean()
    train["rolling_std_" + str(window)] = train['signal'].rolling(window=window).std()
    train["rolling_min_" + str(window)] = train['signal'].rolling(window=window).min()
    train["rolling_max_" + str(window)] = train['signal'].rolling(window=window).max()
    
for window in window_sizes:
    test["rolling_mean_" + str(window)] = test['signal'].rolling(window=window).mean()
    test["rolling_std_" + str(window)] = test['signal'].rolling(window=window).std()
    test["rolling_min_" + str(window)] = test['signal'].rolling(window=window).min()
    test["rolling_max_" + str(window)] = test['signal'].rolling(window=window).max()
    
def features(df):
    df = df.sort_values(by=['time']).reset_index(drop=True)
    df.index = ((df.time * 10_000) - 1).values
    df['batch'] = df.index // 50_000
    df['batch_index'] = df.index  - (df.batch * 50_000)
    df['batch_slices'] = df['batch_index']  // 5_000
    df['batch_slices2'] = df.apply(lambda r: '_'.join([str(r['batch']).zfill(3), str(r['batch_slices']).zfill(3)]), axis=1)
    
    for c in ['batch','batch_slices2']:
        d = {}
                                            
        d['mean'+c] = df.groupby([c])['signal'].mean()
        d['median'+c] = df.groupby([c])['signal'].median()
        d['max'+c] = df.groupby([c])['signal'].max()
        d['min'+c] = df.groupby([c])['signal'].min()
        d['std'+c] = df.groupby([c])['signal'].std()
        d['skew'+c] = df.groupby([c])['signal'].skew()
        
        d['q1'+c] = df.groupby([c])['signal'].apply(lambda x:p1(x))
        d['q5'+c] = df.groupby([c])['signal'].apply(lambda x:p5(x))
        d['q95'+c] = df.groupby([c])['signal'].apply(lambda x:p95(x))       
        d['q99'+c] = df.groupby([c])['signal'].apply(lambda x:p99(x))                   
        
        d['signal_batch'+c] = df.groupby([c])['signal'].transform(zscore)
        d['perm'+c] = df.groupby([c])['signal'].apply(lambda x:perm_entropy(x))
        d['higuchi'+c] = df.groupby([c])['signal'].apply(lambda x:higuchi_fd(x))
        d['katz'+c] = df.groupby([c])['signal'].apply(lambda x:katz_fd(x))
                
        d['mean_abs_chg'+c] = df.groupby([c])['signal'].apply(lambda x: np.mean(np.abs(np.diff(x))))
        d['abs_max'+c] = df.groupby([c])['signal'].apply(lambda x: np.max(np.abs(x)))
        d['abs_min'+c] = df.groupby([c])['signal'].apply(lambda x: np.min(np.abs(x)))
        for v in d:
            df[v] = df[c].map(d[v].to_dict())
        df['range'+c] = df['max'+c] - df['min'+c]
        df['maxtomin'+c] = df['max'+c] / df['min'+c]
        df['abs_avg'+c] = (df['abs_min'+c] + df['abs_max'+c]) / 2
    
    #add shifts
    df['signal_shift_+1'] = [0,] + list(df['signal'].values[:-1])
    df['signal_shift_-1'] = list(df['signal'].values[1:]) + [0]
    for i in df[df['batch_index']==0].index:
        df['signal_shift_+1'][i] = np.nan
    for i in df[df['batch_index']==49999].index:
        df['signal_shift_-1'][i] = np.nan
        
    df['signal_shift_wave_+1'] = [0,] + list(df['signal_wave'].values[:-1])
    df['signal_shift_wave_-1'] = list(df['signal_wave'].values[1:]) + [0]
    for i in df[df['batch_index']==0].index:
        df['signal_shift_wave_+1'][i] = np.nan
    for i in df[df['batch_index']==49999].index:
        df['signal_shift_wave_-1'][i] = np.nan
        
        
    df['signal_shift_+1_FFT'] = [0,] + list(df['signal_FFT_1e5'].values[:-1])
    df['signal_shift_-1_FFT'] = list(df['signal_FFT_1e5'].values[1:]) + [0]
    for i in df[df['batch_index']==0].index:
        df['signal_shift_+1_FFT'][i] = np.nan
    for i in df[df['batch_index']==49999].index:
        df['signal_shift_-1_FFT'][i] = np.nan
        
    for c in [c1 for c1 in df.columns if c1 not in ['time', 'signal', 'open_channels', 'batch', 'batch_index', 'batch_slices', 'batch_slices2', 'signal_wave']]:
        df[c+'_msignal'] = df[c] - df['signal']
        
    return df

train = features(train)
test = features(test)

train['signal_shift+2'] = train.groupby(['batch']).shift(2)['signal']
test['signal_shift+2'] = test.groupby(['batch']).shift(2)['signal']
train['signal_shift-2'] = train.groupby(['batch']).shift(-2)['signal']
test['signal_shift-2'] = test.groupby(['batch']).shift(-2)['signal']


train['signal_shift+2_wave_-1'] = train.groupby(['batch']).shift(2)['signal_wave']
test['signal_shift+2_wave_-1'] = test.groupby(['batch']).shift(2)['signal_wave']
train['signal_shift-2_wave_-1'] = train.groupby(['batch']).shift(-2)['signal_wave']
test['signal_shift-2_wave_-1'] = test.groupby(['batch']).shift(-2)['signal_wave']


train.fillna(0, inplace=True)
test.fillna(0, inplace=True)

train_y = train['open_channels']



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexi

In [9]:
train.head()

Unnamed: 0,time,open_channels,signal,signal_wave,signal_FFT_1e5,rolling_mean_5,rolling_std_5,rolling_min_5,rolling_max_5,rolling_mean_10,...,signal_shift_+1_msignal,signal_shift_-1_msignal,signal_shift_wave_+1_msignal,signal_shift_wave_-1_msignal,signal_shift_+1_FFT_msignal,signal_shift_-1_FFT_msignal,signal_shift+2,signal_shift-2,signal_shift+2_wave_-1,signal_shift-2_wave_-1
0.0,0.0001,0,-2.76,-2.49931891,0.36733109,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.0957,0.0,-0.7858058,0.0,3.12709392,0.0,-2.4074,0.0,-0.95419929
1.0,0.0002,0,-2.8557,-3.5458058,0.36709392,0.0,0.0,0.0,0.0,0.0,...,0.0957,0.4483,0.35638109,1.90150071,3.22303109,3.22255675,0.0,-3.1404,0.0,-1.32866244
2.0,0.0003,0,-2.4074,-0.95419929,0.36685675,0.0,0.0,0.0,0.0,0.0,...,-0.4483,-0.733,-1.1384058,1.07873756,2.77449392,2.77401957,-2.76,-3.1525,-2.49931891,-3.60514371
3.0,0.0004,0,-3.1404,-1.32866244,0.36661957,0.0,0.0,0.0,0.0,0.0,...,0.733,-0.0121,2.18620071,-0.46474371,3.50725675,3.5067824,-2.8557,-2.6418,-3.5458058,-4.06727689
4.0,0.0005,0,-3.1525,-3.60514371,0.3663824,-2.8632,0.30781263,-3.1525,-2.4074,0.0,...,0.0121,0.5107,1.82383756,-0.91477689,3.51911957,3.51864523,-2.4074,-2.6993,-0.95419929,-3.59025109


In [10]:
test.head()

Unnamed: 0,time,signal,signal_wave,signal_FFT_1e5,rolling_mean_5,rolling_std_5,rolling_min_5,rolling_max_5,rolling_mean_10,rolling_std_10,...,signal_shift_+1_msignal,signal_shift_-1_msignal,signal_shift_wave_+1_msignal,signal_shift_wave_-1_msignal,signal_shift_+1_FFT_msignal,signal_shift_-1_FFT_msignal,signal_shift+2,signal_shift-2,signal_shift+2_wave_-1,signal_shift-2_wave_-1
5000000.0,500.0001,-2.64983,-2.70763568,-2.68908707,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,-0.19963,0.0,-0.05779149,0.0,-0.03925862,0.0,-2.86009,0.0,-2.7076072
5000001.0,500.0002,-2.84946,-2.70762149,-2.68908862,0.0,0.0,0.0,0.0,0.0,0.0,...,0.19963,-0.01063,0.14182432,0.1418528,0.16037293,0.16036982,0.0,-2.43512,0.0,-2.70759352
5000002.0,500.0003,-2.86009,-2.7076072,-2.68909018,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01063,0.42497,0.15246851,0.15249648,0.17100138,0.17099828,-2.64983,-2.61565,-2.70763568,-2.70758051
5000003.0,500.0004,-2.43512,-2.70759352,-2.68909172,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.42497,-0.18053,-0.2724872,-0.27246051,-0.25397018,-0.25397327,-2.84946,-2.56608,-2.70762149,-2.70756785
5000004.0,500.0005,-2.61565,-2.70758051,-2.68909327,-2.68203,0.17758509,-2.86009,-2.43512,0.0,0.0,...,0.18053,0.04957,-0.09194352,-0.09191785,-0.07344172,-0.07344481,-2.86009,-2.73801,-2.7076072,-2.70755576


In [11]:
#train.to_csv('/Users/siero5335/channel/train_mod.csv')
#test.to_csv('/Users/siero5335/channel/test_mod.csv')

In [12]:
col = [c for c in train.columns if c not in ['time', 'open_channels', 'batch', 'batch_index', 'batch_slices', 'batch_slices2']]

train = train[col]
test = test[col]

n_fold = 5
folds = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=42)

In [16]:
from catboost import Pool,CatBoostRegressor

model_cat = CatBoostRegressor(task_type = "CPU",
                          iterations=10000,
                          learning_rate=0.2,
                          random_seed = 71,
                          min_data_in_leaf = 20,    
                          depth=8,
                          max_leaves = 256,
                          eval_metric='MAE',
                          early_stopping_rounds = 50,
                          verbose=100)

oof_cat = np.zeros(len(train))
prediction_cat = np.zeros(len(test))
scores = []

for i, (train_index, valid_index) in enumerate(folds.split(train, train_y)):
    
    # Create data for this fold
    y_train, y_valid = train_y.iloc[train_index], train_y.iloc[valid_index]
    X_train, X_valid = train.iloc[train_index], train.iloc[valid_index]
    print( "\nFold ", i)
    
    # Run model for this fold

    fit_model = model_cat.fit( X_train, y_train, 
                               eval_set=(X_valid, y_valid),
                               use_best_model=True
                             )

        
    # Generate validation predictions for this fold
    y_pred_valid_cat = fit_model.predict(X_valid)
    y_pred_cat = fit_model.predict(test)

    oof_cat[valid_index] = y_pred_valid_cat.reshape(-1,)
    scores.append(mean_absolute_error(y_valid, y_pred_valid_cat))

    prediction_cat += y_pred_cat

prediction_cat /= n_fold


Fold  0
0:	learn: 1.7578942	test: 1.7578686	best: 1.7578686 (0)	total: 4.53s	remaining: 12h 34m 39s
100:	learn: 0.0652654	test: 0.0653574	best: 0.0653574 (100)	total: 4m 39s	remaining: 7h 37m 13s
200:	learn: 0.0599919	test: 0.0602533	best: 0.0602533 (200)	total: 8m 31s	remaining: 6h 55m 53s
300:	learn: 0.0577835	test: 0.0582344	best: 0.0582344 (300)	total: 12m 42s	remaining: 6h 53m 50s
400:	learn: 0.0564593	test: 0.0571196	best: 0.0571196 (400)	total: 16m 6s	remaining: 6h 32m 35s
500:	learn: 0.0555456	test: 0.0564319	best: 0.0564319 (500)	total: 18m 59s	remaining: 6h 5m 8s
600:	learn: 0.0548789	test: 0.0559877	best: 0.0559868 (598)	total: 21m 37s	remaining: 5h 42m 16s
700:	learn: 0.0543327	test: 0.0556605	best: 0.0556605 (700)	total: 24m 18s	remaining: 5h 25m 46s
800:	learn: 0.0538154	test: 0.0553612	best: 0.0553610 (799)	total: 27m 31s	remaining: 5h 18m 49s
900:	learn: 0.0534430	test: 0.0552029	best: 0.0552009 (896)	total: 30m 37s	remaining: 5h 11m 42s
1000:	learn: 0.0531121	test: 0.

In [17]:
class OptimizedRounder(object):
    """
    An optimizer for rounding thresholds
    to maximize F1 (Macro) score
    # https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved
    """
    def __init__(self):
        self.coef_ = 0

    def _f1_loss(self, coef, X, y):
        """
        Get loss according to
        using current coefficients
        
        :param coef: A list of coefficients that will be used for rounding
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        X_p = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

        return -f1_score(y, X_p, average = 'macro')

    def fit(self, X, y):
        """
        Optimize rounding thresholds
        
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        loss_partial = partial(self._f1_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]
        self.coef_ = sp.optimize.minimize(loss_partial, initial_coef, method='nelder-mead')

    def predict(self, X, coef):
        """
        Make predictions with specified thresholds
        
        :param X: The raw predictions
        :param coef: A list of coefficients that will be used for rounding
        """
        return pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])


    def coefficients(self):
        """
        Return the optimized coefficients
        """
        return self.coef_['x']

In [18]:
optR = OptimizedRounder()
optR.fit(oof_cat.reshape(-1,), train_y)
coefficients = optR.coefficients()
print(coefficients)

[0.5111888  1.5230456  2.49941259 3.51441169 4.49577515 5.48484888
 6.51607377 7.5201313  8.47953926 9.47507369]


In [22]:
opt_preds = optR.predict(oof_cat.reshape(-1,), coefficients)
f1_score(train_y, opt_preds, average = 'macro')

0.9381343361035874

In [24]:
prediction_cat[prediction_cat <= coefficients[0]] = 0
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[0], prediction_cat <= coefficients[1]))] = 1
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[1], prediction_cat <= coefficients[2]))] = 2
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[2], prediction_cat <= coefficients[3]))] = 3
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[3], prediction_cat <= coefficients[4]))] = 4
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[4], prediction_cat <= coefficients[5]))] = 5
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[5], prediction_cat <= coefficients[6]))] = 6
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[6], prediction_cat <= coefficients[7]))] = 7
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[7], prediction_cat <= coefficients[8]))] = 8
prediction_cat[np.where(np.logical_and(prediction_cat > coefficients[8], prediction_cat <= coefficients[9]))] = 9
prediction_cat[prediction_cat > coefficients[9]] = 10

In [25]:
sample_submission['open_channels'] = prediction_cat.astype(np.int)
sample_submission.to_csv('maeopt10.csv', index=False, float_format='%.4f')