### Import all

In [1]:
%matplotlib inline


import pandas as pd
import numpy as np
import problem

data_train, y_train = problem.get_train_data()
data_test,y_test = problem.get_test_data()

## Feature extractor

In [2]:
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin


class FeatureExtractor(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass

    def fit(self, X_df, y):
        return self

    def transform(self, X_df):
        X_df_new = X_df.copy()
        X_df_new = compute_rolling_std(X_df_new, 'Beta', '2h')
        #X_df_new = compute_rolling_std(X_df_new, 'Beta', '2h')
        return X_df_new
    

def compute_rolling_std(data, feature, time_window, center=False):
    """
    For a given dataframe, compute the standard deviation over
    a defined period of time (time_window) of a defined feature

    Parameters
    ----------
    data : dataframe
    feature : str
        feature in the dataframe we wish to compute the rolling mean from
    time_indow : str
        string that defines the length of the time window passed to `rolling`
    center : bool
        boolean to indicate if the point of the dataframe considered is
        center or end of the window
    """
    name = '_'.join([feature, time_window, 'std'])
    data[name] = data[feature].rolling(time_window, center=center).std()
    data[name] = data[name].ffill().bfill()
    data[name].astype(data[feature].dtype)
    return data



## Classifier

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator

class Classifier(BaseEstimator):
    def __init__(self):
        print(self)
        final_estim = MLPClassifier(solver='adam', alpha=1e-5,  hidden_layer_sizes=(5, 2))
        self.model = make_pipeline(StandardScaler(), final_estim)

    def fit(self, X, y):
        self.model.fit(X, y)

    def predict_proba(self, X):
        return self.model.predict_proba(X)
    
    def __str__(self):
        return "Classifier object, version 1.3"

In [4]:
Classifier()

Classifier object, version 1.3


Classifier()

# Result

In [5]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_validate
from problem import get_cv

def evaluation(X, y):
    pipe = make_pipeline(FeatureExtractor(), Classifier())
    cv = get_cv(X, y)
    results = cross_validate(pipe, X, y, scoring=['neg_log_loss'], cv=cv,
                             verbose=1, return_train_score=True,
                             n_jobs=1)
    
    return results

In [8]:
results = evaluation(data_train, y_train)

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Classifier object, version 1.3
Classifier object, version 1.3


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)


Classifier object, version 1.3


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)


Classifier object, version 1.3


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)


Classifier object, version 1.3


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)


Classifier object, version 1.3


  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  3.5min finished


In [9]:
print("Training score Log Loss: {:.3f} +- {:.3f}".format(-np.mean(results['train_neg_log_loss']),
                                                        np.std(results['train_neg_log_loss'])))
print("Testing score Log Loss: {:.3f} +- {:.3f} \n".format(-np.mean(results['test_neg_log_loss']),
                                                          np.std(results['test_neg_log_loss'])))

Training score Log Loss: 0.191 +- 0.026
Testing score Log Loss: 0.224 +- 0.037 

