In [0]:
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin



In [0]:

class ShiftedFeatureMaker(BaseEstimator, TransformerMixin):
    
    def __init__(self, periods=[1], column="signal", add_minus=False, fill_value=None, copy=True):
        self.periods = periods
        self.column = column
        self.add_minus = add_minus
        self.fill_value = fill_value
        self.copy = copy
        
    def fit(self, X, y):
        """Mock method"""
        return self
    
    def transform(self, X: pd.DataFrame, y=None):
        periods = np.asarray(self.periods, dtype=np.int32)
        
        if self.add_minus:
            periods = np.append(periods, -periods)
        
        X_transformed = X.copy() if self.copy else X
        
        for p in periods:
            X_transformed[f"{self.column}_shifted_{p}"] = X_transformed[self.column].shift(
                periods=p, fill_value=self.fill_value
            )
            
        return X_transformed


class ColumnDropper(BaseEstimator, TransformerMixin):
    
    def __init__(self, columns=None):
        self.columns = columns
    
    def fit(self, X, y):
        """Mock method"""
        return self
    
    def transform(self, X: pd.DataFrame, y=None):
        return X[[c for c in X.columns if c not in self.columns]]


def add_category(train, test):
    train["category"] = 0
    test["category"] = 0
    
    # train segments with more then 9 open channels classes
    train.loc[2_000_000:2_500_000-1, 'category'] = 1
    train.loc[4_500_000:5_000_000-1, 'category'] = 1
    
    # test segments with more then 9 open channels classes (potentially)
    test.loc[500_000:600_000-1, "category"] = 1
    test.loc[700_000:800_000-1, "category"] = 1
    
    return train, test


def read_input():
    train = pd.read_csv("./drive/My Drive/Colab Notebooks/liverpool-ion-switching/train_clean.csv")
    test = pd.read_csv("./drive/My Drive/Colab Notebooks/liverpool-ion-switching/test_clean.csv")
    return train, test


def save_submission(y_test):
    submission = pd.read_csv("./drive/My Drive/Colab Notebooks/liverpool-ion-switching/sample_submission.csv")
    submission["open_channels"] = np.asarray(y_test, dtype=np.int32)
    submission.to_csv("./drive/My Drive/Colab Notebooks/liverpool-ion-switching/submission_rfc.csv", index=False, float_format="%.4f")



In [0]:



shifted_rfc = make_pipeline(
    ShiftedFeatureMaker(
        periods=range(1, 20),
        add_minus=True,
        fill_value=0
    ),
    ColumnDropper(
        columns=["open_channels", "time"]
    ),
    RandomForestClassifier(
        n_estimators=150,
        max_depth=19,
        max_features=10,
        random_state=42,
        n_jobs=10,
        verbose=2
    )
)


In [0]:
train, test = read_input()
train, test = add_category(train, test)




In [5]:
train

Unnamed: 0,time,signal,open_channels,category
0,0.0001,-2.760000,0,0
1,0.0002,-2.855700,0,0
2,0.0003,-2.407400,0,0
3,0.0004,-3.140400,0,0
4,0.0005,-3.152500,0,0
...,...,...,...,...
4999995,499.9996,2.932577,7,1
4999996,499.9997,2.711208,7,1
4999997,499.9998,4.529640,8,1
4999998,499.9999,5.652972,9,1


In [0]:
shifted_rfc.fit(train, train.open_channels)
open_channels = shifted_rfc.predict(test)

save_submission(open_channels)