In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import setting
from sklearn import set_config
from sklearn.model_selection import check_cv
from sklearn.utils import indexable

from src import config
from src.common import util, viz
from src.features import dump_features, load_features

In [4]:
sns.set_theme(style="whitegrid")

In [5]:
features = [
    "cont1",
    "cont2",
    "cont3",
    "cont4",
    "cont5",
    "cont6",
    "cont7",
    "cont8",
    "cont9",
    "cont10",
    "cont11",
    "cont12",
    "cont13",
    "cont14",
]

In [6]:
df_train = load_features(
    features + ["target"],
    "train",
)

[DEBUG]2023-01-25 06:48:16,242:src.features:Load features.


In [11]:
from sklearn.ensemble import RandomForestRegressor, RandomTreesEmbedding
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_predict
from umap import UMAP
import lightgbm as lgb

In [12]:
steps = ( 
    RandomTreesEmbedding(
        n_estimators=10,
        min_samples_leaf=10,
        random_state=0,
        verbose=1,
        n_jobs=-1,
    ),
    UMAP(
        n_neighbors=15,
        n_components=10,
        verbose=1,
        random_state=0,
    ),
    lgb.LGBMRegressor()
 )

estimator = make_pipeline(
    *steps
)

estimator

Pipeline(steps=[('randomtreesembedding',
                 RandomTreesEmbedding(min_samples_leaf=10, n_estimators=10,
                                      n_jobs=-1, random_state=0, verbose=1)),
                ('umap', UMAP(n_components=10, random_state=0, verbose=1)),
                ('lgbmregressor', LGBMRegressor())])

In [8]:
df_train_ = df_train.sample(100)
X = df_train_[features]
y = df_train_["target"]

In [9]:
y_pred = cross_val_predict(
    estimator,
    X,
    y,
    n_jobs=-1,
    verbose=1,
)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished


UMAP(n_components=10, random_state=0, verbose=1)UMAP(n_components=10, random_state=0, verbose=1)

Wed Jan 25 06:52:30 2023 Construct fuzzy simplicial set
Wed Jan 25 06:52:30 2023Wed Jan 25 06:52:30 2023 Finding Nearest Neighbors
 Construct fuzzy simplicial set
Wed Jan 25 06:52:30 2023 Finding Nearest Neighbors
Wed Jan 25 06:52:34 2023 Finished Nearest Neighbor Search
Wed Jan 25 06:52:34 2023 Finished Nearest Neighbor Search
Wed Jan 25 06:52:39 2023 Construct embedding
Wed Jan 25 06:52:39 2023 Construct embedding


Epochs completed: 100%| ██████████ 500/500 [00:01]
Epochs completed:  67%| ██████▋    336/500 [00:01][Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.


Wed Jan 25 06:52:41 2023 Finished embedding
Wed Jan 25 06:52:41 2023 Finished embedding


Epochs completed:  94%| █████████▍ 470/500 [00:01][Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.1s
Epochs completed: 100%| ██████████ 500/500 [00:01]
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.1s finished
Epochs completed: 100%| ██████████ 100/100 [00:01]
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    

UMAP(n_components=10, random_state=0, verbose=1)
Wed Jan 25 06:52:51 2023 Construct fuzzy simplicial set
Wed Jan 25 06:52:51 2023 Finding Nearest Neighbors
Wed Jan 25 06:52:51 2023 Finished Nearest Neighbor Search
Wed Jan 25 06:52:51 2023 Construct embedding
UMAP(n_components=10, random_state=0, verbose=1)
Wed Jan 25 06:52:52 2023 Construct fuzzy simplicial set
Wed Jan 25 06:52:52 2023 Finding Nearest Neighbors
Wed Jan 25 06:52:52 2023 Finished Nearest Neighbor Search
Wed Jan 25 06:52:52 2023 Construct embedding


Epochs completed: 100%| ██████████ 500/500 [00:01]


Wed Jan 25 06:52:53 2023 Finished embedding
Wed Jan 25 06:52:53 2023 Finished embedding


Epochs completed: 100%| ██████████ 500/500 [00:01]
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.2s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished
Epochs completed: 100%| ██████████ 100/100 [00:01]
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Don

UMAP(n_components=10, random_state=0, verbose=1)
Wed Jan 25 06:52:56 2023 Construct fuzzy simplicial set
Wed Jan 25 06:52:56 2023 Finding Nearest Neighbors
Wed Jan 25 06:52:56 2023 Finished Nearest Neighbor Search
Wed Jan 25 06:52:56 2023 Construct embedding


Epochs completed: 100%| ██████████ 500/500 [00:01]
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed:    0.0s finished


Wed Jan 25 06:52:58 2023 Finished embedding


Epochs completed: 100%| ██████████ 100/100 [00:01]
[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done  46 tasks      | elapsed:    0.1s
[Parallel(n_jobs=2)]: Done 100 out of 100 | elapsed:    0.1s finished
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   45.7s finished


In [10]:
np.square((y_pred - y)).mean()

0.46510792881457946