In [1]:
from binarySegmentation import binarySegmentation

import numpy as np

import pandas as pd

from sklearn import linear_model

from sklearn import ensemble

import seaborn as sns

from joblib import dump

from joblib import load

In [2]:
seeds = [100*i for i in range(0,100)]

In [3]:
ps = [100, 200, 300, 400, 500]

In [4]:
def experiment2segmentsLASSO(seed, covMatrixType, p):
    np.random.seed(seed)
    dimCovariates = p
    means = [0 for i in range(dimCovariates)]
    if covMatrixType == 'diagonal':
        covMatrix = [[1 if i == j else 0 for i in range(dimCovariates)] for j in range(dimCovariates)]
    elif covMatrixType == 'equi-correlation':
        covMatrix = [[1 if i == j else 0.2 for i in range(dimCovariates)] for j in range(dimCovariates)]
    elif covMatrixType == 'toeplitz':
        covMatrix = [[0.8**abs(i-j) for i in range(dimCovariates)] for j in range(dimCovariates)]
    else:
        return 'Please, specify a covariance matrix.'
    n = int(dimCovariates/2)
    data = pd.DataFrame(  np.random.multivariate_normal(  mean=means
                                                        , cov=covMatrix
                                                        , size = n
                          )
                        , columns=['X'+str(i) for i in range(dimCovariates)]
                       )

    data['y'] =   [data.iloc[i,0] + data.iloc[i,1] for i in range(int(n/2))]\
                + [data.iloc[i,dimCovariates - 2] + data.iloc[i,dimCovariates - 1] for i in range(int(n/2), n)]
    return binarySegmentation.binarySegmentation(  data
                                                 , X = ['X'+str(i) for i in range(dimCovariates)]
                                                 , y = 'y'
                                                 , model = linear_model.Lasso
                                                 , params = {'alpha':None, 'fit_intercept':False}
                                                 , fraqMinObs = 0.25
                                                 , maxSegments = 4
                                                )

In [12]:
def experiment2segmentsRFR(seed, covMatrixType, p):
    np.random.seed(seed)
    dimCovariates = p
    means = [0 for i in range(dimCovariates)]
    if covMatrixType == 'diagonal':
        covMatrix = [[1 if i == j else 0 for i in range(dimCovariates)] for j in range(dimCovariates)]
    elif covMatrixType == 'equi-correlation':
        covMatrix = [[1 if i == j else 0.2 for i in range(dimCovariates)] for j in range(dimCovariates)]
    elif covMatrixType == 'toeplitz':
        covMatrix = [[0.8**abs(i-j) for i in range(dimCovariates)] for j in range(dimCovariates)]
    else:
        return 'Please, specify a covariance matrix.'
    n = int(dimCovariates/2)
    data = pd.DataFrame(  np.random.multivariate_normal(  mean=means
                                                        , cov=covMatrix
                                                        , size = n
                          )
                        , columns=['X'+str(i) for i in range(dimCovariates)]
                       )

    data['y'] =   [data.iloc[i,0] + data.iloc[i,1] for i in range(int(n/2))]\
                + [data.iloc[i,dimCovariates - 2] + data.iloc[i,dimCovariates - 1] for i in range(int(n/2), n)]
    return binarySegmentation.binarySegmentation(  data
                                                 , X = ['X'+str(i) for i in range(dimCovariates)]
                                                 , y = 'y'
                                                 , model = ensemble.RandomForestRegressor
                                                 , params = { 'param_grid': { 'bootstrap': [False],
                                                                              'min_samples_leaf': [int(i*0.05*n) for i in range(1,5)],
                                                                              'n_estimators': range(1,5)
                                                                            },
                                                              'cv':4,
                                                              'n_jobs':16,
                                                              'scoring':'neg_mean_squared_error',
                                                              'refit':True
                                                            }
                                                 , fraqMinObs = 0.25
                                                 , maxSegments = 4
                                                 , crossValidation=True
                                                )

In [None]:
experimentsLASSOdiagonal = [[p, [experiment2segmentsLASSO(seed*p, 'diagonal', p) for seed in seeds]] for p in ps]
dump(experimentsLASSOdiagonal, filename='experimentsLASSOdiagonal2seg')

experimentsLASSOequicorrelation = [[p, [experiment2segmentsLASSO(seed*p, 'equi-correlation', p) for seed in seeds]] for p in ps]
dump(experimentsLASSOequicorrelation, filename='experimentsLASSOequicorrelation2seg')

experimentsLASSOtoeplitz = [[p, [experiment2segmentsLASSO(seed*p, 'toeplitz', p) for seed in seeds]] for p in ps]
dump(experimentsLASSOtoeplitz, filename='experimentsLASSOtoeplitz2seg')

In [None]:
experimentsRFRdiagonal = [[p, [experiment2segmentsRFR(seed*p, 'diagonal', p) for seed in seeds]] for p in ps]
dump(experimentsRFRdiagonal, filename='experimentsRFRdiagonal2seg')

experimentsRFRequicorrelation = [[p, [experiment2segmentsRFR(seed*p, 'equi-correlation', p) for seed in seeds]] for p in ps]
dump(experimentsRFRequicorrelation, filename='experimentsRFRequicorrelation2seg')

experimentsRFRtoeplitz = [[p, [experiment2segmentsRFR(seed*p, 'toeplitz', p) for seed in seeds]] for p in ps]
dump(experimentsRFRtoeplitz, filename='experimentsRFRtoeplitz2seg')