In [1]:
import os
import re
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from scipy.stats import linregress
from pathlib import Path
from abc import ABCMeta, abstractmethod
from time import time
import scipy.sparse as sp
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LinearRegression

In [2]:
sys.path.append(os.path.abspath('..'))
from configs.config import *
from src.util import Logger, Util
from src.feature import *

ModuleNotFoundError: No module named 'configs'

In [None]:
# モジュールのリロード
import importlib
import src.feature
importlib.reload(src.feature)
from src.feature import *

In [None]:
import warnings
warnings.filterwarnings('ignore')

# 処理実行

In [None]:
def run_blocks(feature_blocks):
    print('start run blocks...')
    with Timer(prefix='run test'):
        for block in feature_blocks:
            with Timer(prefix='- {}'.format(str(block))):
                feature = block.create_feature()

In [None]:
feature_blocks = [
    Key(use_cache=False, save_cache=True, logger=None),
	Target(use_cache=False, save_cache=True, logger=None),
    CategoryFeature(use_cache=False, save_cache=True, logger=None),
	CareerFeature(use_cache=False, save_cache=True, logger=None),
	UdemyActivityFeature(use_cache=False, save_cache=True, logger=None),
    UdemyTimeseriesFeature(use_cache=False, save_cache=True, logger=None),
    UdemyTitleEmbedding(use_cache=False, save_cache=True, logger=None),
	UdemyIDEmbedding(use_cache=False, save_cache=True, logger=None),
    UdemyCategorySimilarityFeature(use_cache=True, save_cache=True, logger=None),
    UdemyTitleSimilarityFeature(use_cache=True, save_cache=True, logger=None),
    DxSimilarityFeature(use_cache=False, save_cache=True, logger=None),
	HrSimilarityFeature(use_cache=False, save_cache=True, logger=None),
	DxFeature(use_cache=False, save_cache=True, logger=None),
    HrCategoryEmbeddingFeature(use_cache=False, save_cache=True, logger=None),
	HrNameEmbeddingFeature(use_cache=False, save_cache=True, logger=None),
    DxCategoryEmbeddingFeature(use_cache=False, save_cache=True, logger=None),
	DxNameEmbeddingFeature(use_cache=False, save_cache=True, logger=None),
	HrFeature(use_cache=False, save_cache=True, logger=None),
	OvertimeWorkByMonthFeature(use_cache=False, save_cache=True, logger=None),
    OvertimeWorkByMonthTimeseriesFeature(use_cache=True, save_cache=True, logger=None),
	PositionHistoryFeature(use_cache=False, save_cache=True, logger=None),
]

In [None]:
run_blocks(feature_blocks)

start run blocks...
- <src.feature.Key object at 0x000001D349726670> 0.012[s]
- <src.feature.Target object at 0x000001D349726640> 0.004[s]
- <src.feature.CategoryFeature object at 0x000001D349726610> 0.006[s]
- <src.feature.CareerFeature object at 0x000001D3497265E0> 0.043[s]
- <src.feature.UdemyActivityFeature object at 0x000001D3497265B0> 0.802[s]
- <src.feature.UdemyTimeseriesFeature object at 0x000001D349726580> 0.843[s]
- <src.feature.UdemyTitleEmbedding object at 0x000001D349726550> 0.929[s]
- <src.feature.UdemyIDEmbedding object at 0x000001D349726520> 0.812[s]
★ UdemyCategorySimilarityFeatureの特徴量をキャッシュから読み込みました。 ★
- <src.feature.UdemyCategorySimilarityFeature object at 0x000001D3497264F0> 0.002[s]
★ UdemyTitleSimilarityFeatureの特徴量をキャッシュから読み込みました。 ★
- <src.feature.UdemyTitleSimilarityFeature object at 0x000001D3497264C0> 0.005[s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

- <src.feature.DxSimilarityFeature object at 0x000001D349726490> 18.661[s]


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

- <src.feature.HrSimilarityFeature object at 0x000001D349726460> 14.413[s]
- <src.feature.DxFeature object at 0x000001D349726430> 0.029[s]
- <src.feature.HrCategoryEmbeddingFeature object at 0x000001D349726400> 0.549[s]
- <src.feature.HrNameEmbeddingFeature object at 0x000001D3497263D0> 0.172[s]
- <src.feature.DxCategoryEmbeddingFeature object at 0x000001D3497263A0> 0.142[s]
- <src.feature.DxNameEmbeddingFeature object at 0x000001D349726370> 0.130[s]
- <src.feature.HrFeature object at 0x000001D349726340> 0.017[s]
- <src.feature.OvertimeWorkByMonthFeature object at 0x000001D349726310> 0.028[s]
★ OvertimeWorkByMonthTimeseriesFeatureの特徴量をキャッシュから読み込みました。 ★
- <src.feature.OvertimeWorkByMonthTimeseriesFeature object at 0x000001D3497262E0> 0.002[s]
- <src.feature.PositionHistoryFeature object at 0x000001D3497262B0> 0.022[s]
run test 37.624[s]


In [None]:
df_udemy = pd.read_pickle(os.path.join(DIR_INTERIM, "df_prep_udemy_activity.pkl"))

In [None]:
df_udemy

Unnamed: 0,社員番号,コースID,コースタイトル,レクチャーもしくはクイズ,レクチャー_クイズID,レクチャー_クイズの題名,開始日,終了日,推定完了率_,最終結果_クイズの場合_,マーク済み修了,コースカテゴリー
0,-1sqs0GXzpPJuAVKHUUFgg==,4615016,企業オリジナル講座,Quiz,5528090,企業オリジナル講座,2022-04-11 10:10:00,2022-04-11 10:10:00,100.0,0.0,1,企業オリジナル講座
1,-1sqs0GXzpPJuAVKHUUFgg==,4615016,企業オリジナル講座,Quiz,5528090,企業オリジナル講座,2022-04-11 10:11:00,2022-04-11 10:11:00,100.0,100.0,1,企業オリジナル講座
2,-1sqs0GXzpPJuAVKHUUFgg==,4615016,企業オリジナル講座,Quiz,5528100,企業オリジナル講座,2022-04-11 10:26:00,2022-04-11 10:26:00,100.0,0.0,1,企業オリジナル講座
3,-1sqs0GXzpPJuAVKHUUFgg==,4615016,企業オリジナル講座,Quiz,5528100,企業オリジナル講座,2022-04-11 10:27:00,2022-04-11 10:27:00,100.0,100.0,1,企業オリジナル講座
4,-1sqs0GXzpPJuAVKHUUFgg==,4615016,企業オリジナル講座,Quiz,5528102,企業オリジナル講座,2022-04-11 10:21:00,2022-04-11 10:21:00,100.0,100.0,1,企業オリジナル講座
...,...,...,...,...,...,...,...,...,...,...,...,...
539159,zxY0Eflwm1tYj1Wt6vo_1g==,5264112,企業オリジナル講座,Video_lecture,37311144,企業オリジナル講座,2023-06-14 09:56:00,2023-06-14 09:56:00,100.0,,1,企業オリジナル講座
539160,zxY0Eflwm1tYj1Wt6vo_1g==,5264112,企業オリジナル講座,Video_lecture,37311150,企業オリジナル講座,2023-06-14 09:57:00,2023-06-14 09:57:00,100.0,,1,企業オリジナル講座
539161,zxY0Eflwm1tYj1Wt6vo_1g==,6098315,企業オリジナル講座,Video_lecture,45012691,企業オリジナル講座,2024-09-27 15:39:00,2024-09-27 16:17:00,100.0,,1,企業オリジナル講座
539162,zxY0Eflwm1tYj1Wt6vo_1g==,6106205,企業オリジナル講座,Video_lecture,45088199,企業オリジナル講座,2024-12-16 16:21:00,2024-12-16 17:39:00,100.0,,1,企業オリジナル講座


In [None]:
list_ = [
    'Key',
    'Target',
    'CategoryFeature',
    'CareerFeature',
    'UdemyActivityFeature',
    'UdemyTimeseriesFeature',
    'UdemyTitleEmbedding',
    'UdemyIDEmbedding',
    'UdemyCategorySimilarityFeature',
    'UdemyTitleSimilarityFeature',
    'DxSimilarityFeature',
    'HrSimilarityFeature',
    'DxFeature',
    'DxCategoryEmbeddingFeature',
    'DxNameEmbeddingFeature',
    'HrFeature',
    'HrCategoryEmbeddingFeature',
    'HrNameEmbeddingFeature',
    'OvertimeWorkByMonthFeature',
    'OvertimeWorkByMonthTimeseriesFeature',
    'PositionHistoryFeature',
]
dict_shape = {}
for feature_name in list_:
    dict_shape[feature_name] = Util.load_feature(feature_name).shape
pd.DataFrame(dict_shape, index=['n_rows', 'n_cols']).T

Unnamed: 0,n_rows,n_cols
Key,18360,2
Target,7338,3
CategoryFeature,6,2
CareerFeature,375,49
UdemyActivityFeature,2232,22
UdemyTimeseriesFeature,2232,18
UdemyTitleEmbedding,2232,17
UdemyIDEmbedding,2232,17
UdemyCategorySimilarityFeature,18360,5
UdemyTitleSimilarityFeature,18360,5
