In [None]:
import sys
!cp ../input/rapids/rapids.0.13.0 /opt/conda/envs/rapids.tar.gz
!cd /opt/conda/envs/ && tar -xzvf rapids.tar.gz > /dev/null
sys.path = ["/opt/conda/envs/rapids/lib/python3.6/site-packages"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib/python3.6"] + sys.path
sys.path = ["/opt/conda/envs/rapids/lib"] + sys.path 
!cp /opt/conda/envs/rapids/lib/libxgboost.so /opt/conda/lib/

In [None]:
import pandas as pd
import numpy as np

#from sklearn.ensemble import RandomForestClassifier
from cuml.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin

import cudf

In [None]:
class ShiftedFeatureMaker(BaseEstimator, TransformerMixin):
    
    def __init__(self, periods=[1], column="signal", add_minus=False, fill_value=None, copy=True):
        self.periods = periods
        self.column = column
        self.add_minus = add_minus
        self.fill_value = fill_value
        self.copy = copy
        
    def fit(self, X, y):
        """Mock method"""
        return self
    
    def transform(self, X: pd.DataFrame, y=None):
        periods = np.asarray(self.periods, dtype=np.int32)
        
        if self.add_minus:
            periods = np.append(periods, -periods)
        
        X_transformed = X.copy() if self.copy else X
        
        for p in periods:
            X_transformed[f"{self.column}_shifted_{p}"] = X_transformed[self.column].shift(
                periods=p, fill_value=self.fill_value
            )
            
        
        X_cudf = cudf.from_pandas(X_transformed)
        #print( X_cudf.dtypes )
        return X_cudf


class ColumnDropper(BaseEstimator, TransformerMixin):
    
    def __init__(self, columns=None):
        self.columns = columns
    
    def fit(self, X, y):
        """Mock method"""
        return self
    
    def transform(self, X: pd.DataFrame, y=None):
        #print( type(X) )
        return X[[c for c in X.columns if c not in self.columns]]


def add_category(train, test):
    train["category"] = 0
    test["category"] = 0
    
    # train segments with more then 9 open channels classes
    train.loc[2_000_000:2_500_000-1, 'category'] = 1
    train.loc[4_500_000:5_000_000-1, 'category'] = 1
    
    # test segments with more then 9 open channels classes (potentially)
    test.loc[500_000:600_000-1, "category"] = 1
    test.loc[700_000:800_000-1, "category"] = 1
    
    train["category"] = train["category"].astype( np.float32 )
    test ["category"] = test ["category"].astype( np.float32 )
    
    return train, test

def read_input():
    #train = pd.read_csv("../input/remove-trends-giba-explained/train_clean_giba.csv").sort_values("time").reset_index(drop=True)
    #test  = pd.read_csv("../input/remove-trends-giba-explained/test_clean_giba.csv").sort_values("time").reset_index(drop=True)
    train = pd.read_csv("../input/data-without-drift/train_clean.csv").sort_values("time").reset_index(drop=True)
    test  = pd.read_csv("../input/data-without-drift/test_clean.csv").sort_values("time").reset_index(drop=True)
    
    train.signal = train.signal.astype('float32')
    train.open_channels = train.open_channels.astype('float32')
    test.signal = test.signal.astype('float32')

    #del train['type'],test['type']
    return train, test


def save_submission(y_test):
    submission = pd.read_csv("../input/liverpool-ion-switching/sample_submission.csv")
    submission["open_channels"] = np.asarray(y_test, dtype=np.int32)
    submission.to_csv("submission.csv", index=False, float_format="%.4f")

In [None]:
%%time

shifted_rfc = make_pipeline(
    ShiftedFeatureMaker(
        periods=range(1, 20),
        add_minus=True,
        fill_value=0
    ),
    ColumnDropper(
        columns=["open_channels", "time" ]
    ),
    RandomForestRegressor(
        n_estimators=150,
        max_depth=19,
        max_features=10,        
        split_algo=0,
        bootstrap=False
    )
)
train, test = read_input()
train, test = add_category(train, test)

shifted_rfc.fit(train, cudf.from_pandas(train[['open_channels']]).open_channels )

open_channels = np.round( shifted_rfc.predict(test).to_array() ).astype( np.int32 )
save_submission(open_channels)