In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, DenseFeat, get_feature_names
import pickle
import sys

pd.set_option("display.max_columns", 500)

In [11]:
data = pd.read_csv("../data/train/trimmed_data.csv", index_col=0)

In [12]:
train = data.loc[data.취급액.isnull()==False]
test = data.loc[data.취급액.isnull()]

# DeepFM

한글은 허용되지 않음

In [13]:
train.columns

Index(['방송일시', '노출(분)', '마더코드', '상품코드', '상품명', '상품군', '판매단가', '취급액',
       'hour_rank', '공휴일여부', '연휴', 'group', 'within_group', '브랜드', 'NS상품군_대',
       'NS상품군_중', 'NS상품군_소', 'prodnames', '평균기온_서울', '최저기온_서울', '최고기온_서울',
       '강수량_서울', '최소습도_서울', '평균습도_서울', '평균기온_전국', '최저기온_전국', '최고기온_전국',
       '강수량_전국', '최소습도_전국', '평균습도_전국', '성별', '결제방법', '세트여부', 'LG', 'TV',
       '울트라HD', '쿠쿠전기밥솥', '3종', '쿠첸', '압력밥솥', '침대', 'LED', '6인용',
       'bsi_original', 'bsi_seasonal', 'ccsi', 'PC0', 'PC1', 'PC2', 'PC3',
       'PC4', 'PC5', 'PC6', 'cluster', 'TV_상품군_분', 'TV_상품군_평균', 'TV_상품군_최고',
       'TV_상품군_총합', 'TV_요일시_분', 'TV_요일시_평균', 'TV_요일시_최고', 'TV_요일시_총합',
       'TV_만원대_분', 'TV_만원대_평균', 'TV_만원대_최고', 'TV_만원대_총합', 'TV_월_분', 'TV_월_평균',
       'TV_월_최고', 'TV_월_총합', '연', '월', '일', '시간', '분', '요일', '판매단가_100',
       '판매단가_만원대', '일별방송순서', '월별방송순서', '노출_5', '노출_10', '계절',
       '('유동인구수', '남성', 20)', '('유동인구수', '남성', 30)', '('유동인구수', '남성', 40)',
       '('유동인구수', '남성', 50)', '('유동인구수', '남성', 60)', '

In [14]:
col = {x:str(y) for x,y in zip(train.columns,(range(0,len(train.columns))))}
train = train.rename(columns=col)

In [24]:
sparse_features = ['1','2','3','4','5','9','10','11','12','13','14','15','16','17',
                   '18','30','31','32','33','34','35','36','37','38','39','40','41','42',
                   '53',
                   '71','72','73','74','75',
                   '79','80','82'
                  ]
target = ['7']
no_use_features = ['0']
no_dense_features = sparse_features + target + no_use_features
dense_features = [elem for elem in train.columns.tolist() if elem not in no_dense_features]

train[sparse_features] = train[sparse_features].fillna('-1', )
train[dense_features] = train[dense_features].fillna(0, )

In [25]:
train[sparse_features]

Unnamed: 0,1,2,3,4,5,9,10,11,12,13,14,15,16,17,18,30,31,32,33,34,35,36,37,38,39,40,41,42,53,71,72,73,74,75,79,80,82
0,20.0,100346,201072,테이트 남성 셀린니트3종,의류,1,0,0,0,테이트,패션의류,남성의류,니트/스웨터/가디건,테이트 셀린니트 종,-5.0,1,2,0,0,0,0,0,0,0,0,0,0,0,4,1,1,6,0,1,1.0,20,겨울
1,20.0,100346,201079,테이트 여성 셀린니트3종,의류,1,0,0,0,테이트,패션의류,여성의류,니트/스웨터/가디건,테이트 셀린니트 종,-5.0,0,2,0,0,0,0,0,0,0,0,0,0,0,4,1,1,6,0,1,1.0,20,겨울
2,20.0,100346,201072,테이트 남성 셀린니트3종,의류,1,0,0,1,테이트,패션의류,남성의류,니트/스웨터/가디건,테이트 셀린니트 종,-5.0,1,2,0,0,0,0,0,0,0,0,0,0,0,4,1,1,6,20,1,2.0,20,겨울
3,20.0,100346,201079,테이트 여성 셀린니트3종,의류,1,0,0,1,테이트,패션의류,여성의류,니트/스웨터/가디건,테이트 셀린니트 종,-5.0,0,2,0,0,0,0,0,0,0,0,0,0,0,4,1,1,6,20,1,2.0,20,겨울
4,20.0,100346,201072,테이트 남성 셀린니트3종,의류,1,0,0,2,테이트,패션의류,남성의류,니트/스웨터/가디건,테이트 셀린니트 종,-5.0,1,2,0,0,0,0,0,0,0,0,0,0,0,4,1,1,6,40,1,3.0,20,겨울
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35373,20.0,100448,201384,무이자쿠첸압력밥솥 6인용,주방,0,0,10,1,쿠첸,주방/생활/건강,주방용품/식기,냄비/압력솥,쿠첸압력밥솥 인용,-7.9,2,0,0,0,0,0,0,0,0,0,0,0,1,2,12,31,23,40,1,26.0,20,겨울
35374,20.0,100448,201391,일시불쿠첸압력밥솥 6인용,주방,0,0,10,1,쿠첸,가전/디지털,주방가전,전기밥솥,쿠첸압력밥솥 인용,-7.9,2,1,0,0,0,0,0,0,0,0,0,0,1,2,12,31,23,40,1,26.0,20,겨울
35375,20.0,100448,201383,무이자쿠첸압력밥솥 10인용,주방,1,0,10,2,쿠첸,주방/생활/건강,주방용품/식기,냄비/압력솥,쿠첸압력밥솥 인용,-2.2,2,0,0,0,0,0,0,0,0,0,0,0,0,2,1,1,0,0,2,1.0,20,겨울
35377,20.0,100448,201384,무이자쿠첸압력밥솥 6인용,주방,1,0,10,2,쿠첸,주방/생활/건강,주방용품/식기,냄비/압력솥,쿠첸압력밥솥 인용,-2.2,2,0,0,0,0,0,0,0,0,0,0,0,1,2,1,1,0,0,2,1.0,20,겨울


In [26]:
train[dense_features]

Unnamed: 0,6,8,19,20,21,22,23,24,25,26,27,28,29,43,44,45,46,47,48,49,50,51,52,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,76,77,78,81,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97
0,39900,2,-8.2,-0.6,0.0,34.0,49.5,-1.856842,-5.662105,2.416842,0.072632,40.305263,58.012632,90.2,90.5,97,-0.69527,0.536955,0.075706,0.008203,-0.013670,-0.061421,-0.031838,19.827472,0.004357,0.017761,0.083738,19.550562,0.001708,0.009371,0.033938,18.788212,0.005091,0.017073,0.095699,20.478089,0.002667,0.012210,0.054928,2019,40000,30000.0,1.0,20.0,726160.0,824430.0,818520.0,745730.0,467370.0,312130.0,748720.0,808660.0,824250.0,807200.0,529980.0,436050.0,8049200.0,670766.666667,182482.161934
1,39900,2,-8.2,-0.6,0.0,34.0,49.5,-1.856842,-5.662105,2.416842,0.072632,40.305263,58.012632,90.2,90.5,97,-0.69527,0.536955,0.075706,0.008203,-0.013670,-0.061421,-0.031838,19.827472,0.004357,0.017761,0.083738,19.550562,0.001708,0.009371,0.033938,18.788212,0.005091,0.017073,0.095699,20.478089,0.002667,0.012210,0.054928,2019,40000,30000.0,1.0,20.0,726160.0,824430.0,818520.0,745730.0,467370.0,312130.0,748720.0,808660.0,824250.0,807200.0,529980.0,436050.0,8049200.0,670766.666667,182482.161934
2,39900,2,-8.2,-0.6,0.0,34.0,49.5,-1.856842,-5.662105,2.416842,0.072632,40.305263,58.012632,90.2,90.5,97,-0.69527,0.536955,0.075706,0.008203,-0.013670,-0.061421,-0.031838,19.827472,0.004357,0.017761,0.083738,19.550562,0.001708,0.009371,0.033938,18.788212,0.005091,0.017073,0.095699,20.478089,0.002667,0.012210,0.054928,2019,40000,30000.0,2.0,20.0,726160.0,824430.0,818520.0,745730.0,467370.0,312130.0,748720.0,808660.0,824250.0,807200.0,529980.0,436050.0,8049200.0,670766.666667,182482.161934
3,39900,2,-8.2,-0.6,0.0,34.0,49.5,-1.856842,-5.662105,2.416842,0.072632,40.305263,58.012632,90.2,90.5,97,-0.69527,0.536955,0.075706,0.008203,-0.013670,-0.061421,-0.031838,19.827472,0.004357,0.017761,0.083738,19.550562,0.001708,0.009371,0.033938,18.788212,0.005091,0.017073,0.095699,20.478089,0.002667,0.012210,0.054928,2019,40000,30000.0,2.0,20.0,726160.0,824430.0,818520.0,745730.0,467370.0,312130.0,748720.0,808660.0,824250.0,807200.0,529980.0,436050.0,8049200.0,670766.666667,182482.161934
4,39900,2,-8.2,-0.6,0.0,34.0,49.5,-1.856842,-5.662105,2.416842,0.072632,40.305263,58.012632,90.2,90.5,97,-0.69527,0.536955,0.075706,0.008203,-0.013670,-0.061421,-0.031838,19.827472,0.004357,0.017761,0.083738,19.550562,0.001708,0.009371,0.033938,18.788212,0.005091,0.017073,0.095699,20.478089,0.002667,0.012210,0.054928,2019,40000,30000.0,3.0,20.0,726160.0,824430.0,818520.0,745730.0,467370.0,312130.0,748720.0,808660.0,824250.0,807200.0,529980.0,436050.0,8049200.0,670766.666667,182482.161934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35373,158000,6,-10.9,-4.5,0.0,26.0,39.3,-3.544211,-6.306316,0.846316,0.160000,32.178947,47.684211,90.7,91.7,101,-0.01087,-0.472728,0.788651,0.033269,-0.049297,-0.101719,-0.047337,20.608735,0.004535,0.017691,0.093975,20.555556,0.004480,0.019184,0.094340,21.673770,0.004012,0.017208,0.094613,19.966879,0.006577,0.021492,0.131403,2019,158000,150000.0,2.0,20.0,742770.0,827780.0,813940.0,761130.0,482980.0,322080.0,717140.0,780250.0,809350.0,799320.0,530400.0,446410.0,8033550.0,669462.500000,174573.432537
35374,148000,6,-10.9,-4.5,0.0,26.0,39.3,-3.544211,-6.306316,0.846316,0.160000,32.178947,47.684211,90.7,91.7,101,-0.01087,-0.472728,0.788651,0.033269,-0.049297,-0.101719,-0.047337,20.608735,0.004535,0.017691,0.093975,20.555556,0.004480,0.019184,0.094340,19.989195,0.003769,0.016071,0.075929,19.966879,0.006577,0.021492,0.131403,2019,148000,140000.0,2.0,20.0,742770.0,827780.0,813940.0,761130.0,482980.0,322080.0,717140.0,780250.0,809350.0,799320.0,530400.0,446410.0,8033550.0,669462.500000,174573.432537
35375,178000,4,-6.5,0.3,0.1,37.0,64.4,-0.792632,-6.141053,4.009474,0.009474,43.094737,63.494737,90.1,89.6,101,-0.01087,-0.472728,0.788651,0.033269,-0.049297,-0.101719,-0.047337,20.608735,0.004535,0.017691,0.093975,21.552381,0.005623,0.021631,0.117341,20.167022,0.004601,0.017886,0.091598,20.478089,0.002667,0.012210,0.054928,2020,178000,170000.0,1.0,20.0,738430.0,829640.0,817980.0,751030.0,475910.0,317260.0,753490.0,810320.0,827430.0,811740.0,532310.0,440390.0,8105930.0,675494.166667,181804.026431
35377,158000,4,-6.5,0.3,0.1,37.0,64.4,-0.792632,-6.141053,4.009474,0.009474,43.094737,63.494737,90.1,89.6,101,-0.01087,-0.472728,0.788651,0.033269,-0.049297,-0.101719,-0.047337,20.608735,0.004535,0.017691,0.093975,21.552381,0.005623,0.021631,0.117341,21.673770,0.004012,0.017208,0.094613,20.478089,0.002667,0.012210,0.054928,2020,158000,150000.0,1.0,20.0,738430.0,829640.0,817980.0,751030.0,475910.0,317260.0,753490.0,810320.0,827430.0,811740.0,532310.0,440390.0,8105930.0,675494.166667,181804.026431


## Scaling

### Label Encoding

In [27]:
for feat in sparse_features:
    lbe=LabelEncoder()
    train[feat] = lbe.fit_transform(train[feat])

### MinMax Scaling

In [28]:
max_value = max(train.iloc[:,7]) 
min_value = min(train.iloc[:,7])

In [29]:
mms = MinMaxScaler(feature_range=(0,1))
train[dense_features] = mms.fit_transform(train[dense_features])

In [30]:
def min_max_inverse(max_value, min_value, value):
    original = value*(max_value - min_value) + min_value 
    return original

## Generate feature columns

In [31]:
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=train[feat].nunique(), embedding_dim=4)
                         for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1, ) 
                                                                     for feat in dense_features]

dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

## Training

In [32]:
import tensorflow as tf
mape = tf.keras.losses.MeanAbsolutePercentageError()

In [33]:
x_train, test = train_test_split(train, test_size=0.2)

x_train_model_input = {name:x_train[name].values for name in x_train[dense_features].columns.tolist() + x_train[sparse_features].columns.tolist()}
test_model_input = {name:test[name].values for name in x_train[dense_features].columns.tolist() + x_train[sparse_features].columns.tolist()}

model = DeepFM(linear_feature_columns, dnn_feature_columns, task="regression")
model.compile(optimizer="adam", loss= mape,
             metrics=[mape], )

history = model.fit(x_train_model_input, x_train[target].values,
                   batch_size=32, epochs=100, verbose=2, validation_split=0.1)


Epoch 1/100


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


717/717 - 5s - loss: 95.0447 - mean_absolute_percentage_error: 95.0322 - val_loss: 80.6289 - val_mean_absolute_percentage_error: 80.6516
Epoch 2/100
717/717 - 3s - loss: 75.9640 - mean_absolute_percentage_error: 75.9356 - val_loss: 73.4579 - val_mean_absolute_percentage_error: 73.4553
Epoch 3/100
717/717 - 3s - loss: 73.8673 - mean_absolute_percentage_error: 73.8261 - val_loss: 71.8875 - val_mean_absolute_percentage_error: 71.8704
Epoch 4/100
717/717 - 3s - loss: 71.8804 - mean_absolute_percentage_error: 71.8160 - val_loss: 69.6544 - val_mean_absolute_percentage_error: 69.6190
Epoch 5/100
717/717 - 3s - loss: 68.8962 - mean_absolute_percentage_error: 68.8147 - val_loss: 66.3344 - val_mean_absolute_percentage_error: 66.2751
Epoch 6/100
717/717 - 3s - loss: 64.2398 - mean_absolute_percentage_error: 64.1454 - val_loss: 61.2860 - val_mean_absolute_percentage_error: 61.2007
Epoch 7/100
717/717 - 3s - loss: 57.7240 - mean_absolute_percentage_error: 57.6076 - val_loss: 55.0648 - val_mean_abso

KeyboardInterrupt: 

# RandomForest

In [35]:
import warnings
import fire
import numpy as np
import pandas as pd
from tqdm import tqdm
from metrics import mape_transform, mape
from utils import *
from sklearn.ensemble import RandomForestRegressor as rf
from sklearn.model_selection import KFold, train_test_split
warnings.filterwarnings("ignore")



## Hyperparameter tuning

In [55]:
df = get_data('trimmed_data.csv')

In [56]:
df.columns


Index(['노출(분)', '마더코드', '상품코드', '상품명', '상품군', '판매단가', 'hour_rank', '공휴일여부',
       '연휴', 'group', 'within_group', '브랜드', 'NS상품군_대', 'NS상품군_중', 'NS상품군_소',
       'prodnames', '평균기온_서울', '최저기온_서울', '최고기온_서울', '강수량_서울', '최소습도_서울',
       '평균습도_서울', '평균기온_전국', '최저기온_전국', '최고기온_전국', '강수량_전국', '최소습도_전국',
       '평균습도_전국', '성별', '결제방법', '세트여부', 'LG', 'TV', '울트라HD', '쿠쿠전기밥솥', '3종',
       '쿠첸', '압력밥솥', '침대', 'LED', '6인용', 'bsi_original', 'bsi_seasonal',
       'ccsi', 'PC0', 'PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'cluster',
       'TV_상품군_분', 'TV_상품군_평균', 'TV_상품군_최고', 'TV_상품군_총합', 'TV_요일시_분',
       'TV_요일시_평균', 'TV_요일시_최고', 'TV_요일시_총합', 'TV_만원대_분', 'TV_만원대_평균',
       'TV_만원대_최고', 'TV_만원대_총합', 'TV_월_분', 'TV_월_평균', 'TV_월_최고', 'TV_월_총합',
       '연', '월', '일', '시간', '분', '요일', '판매단가_100', '판매단가_만원대', '일별방송순서',
       '월별방송순서', '노출_5', '노출_10', '계절', '('유동인구수', '남성', 20)',
       '('유동인구수', '남성', 30)', '('유동인구수', '남성', 40)', '('유동인구수', '남성', 50)',
       '('유동인구수', '남성', 60)', '('유동인구수', '남성',

In [57]:
params = {"max_depth":6,
              "min_samples_leaf": 2,
         'n_estimators': 100}

In [110]:
def train(trainset, testset, verbose_eval=False):
    test_result = {}
    
    from sklearn.preprocessing import LabelEncoder, MinMaxScaler
    lbe = LabelEncoder()
    mms = MinMaxScaler()
    cat_vars = ["상품코드","상품명", "상품군", "hour_rank","공휴일여부","연휴","group","within_group","브랜드", "NS상품군_대",
                "NS상품군_중", "NS상품군_소", "prodnames", "성별","결제방법","세트여부","LG","TV","울트라HD","쿠쿠전기밥솥","3종",
                "쿠첸","압력밥솥","침대","LED","6인용","cluster","연","월","일","시간","분","요일","일별방송순서","월별방송순서","계절"]
    for var in cat_vars:
        trainset[var] = lbe.fit_transform(trainset[var].astype(str))
        testset[var] = lbe.fit_transform(testset[var].astype(str))
    
    target = ["로그_취급액"]
    no_cont_vars = cat_vars + target
    cont_vars = [x for x in trainset.columns if x not in no_cont_vars]
    for var in cont_vars:
        trainset[var] = mms.fit_transform(np.array(np.log(trainset[var]+np.abs(trainset[var].min())+1)).reshape(-1,1))
        testset[var] = mms.fit_transform(np.array(np.log(testset[var]+np.abs(testset[var].min())+1)).reshape(-1,1))
        
    
    model = rf(**params)
    x_train = trainset.drop(columns=["로그_취급액"])
    y_train = trainset.loc[:, "로그_취급액"]
    rf.fit(model, x_train, y_train)
    pred = rf.predict(model, testset.drop(columns=["로그_취급액"]))
    true = testset.loc[:, "로그_취급액"]
    print(pred, true)
    scores = np.round(mape(np.exp(true), np.exp(pred)), 4)
    return model, scores

In [111]:
class randomforest:
    def __init__(self):
        self.model = None
        
    def tsCV(self, data, interval, imputation=False, params=params, verbose=False):
        
        data = get_data(data)
        scores = []
        normal_scores = []
        null_scores = []
        rounds = []
        
        data = data.loc[data.로그_취급액.isnull() != True]
        
        if verbose:
            row_format = "{:^15}|{:^15}|{:^15}|{:^15}"
            print(row_format.format("target_month", "normal mape", "null mape", "mape"))
            print(row_format.format("=" * 15, "=" * 15, "=" * 15, "=" * 15))
        min_month = data.loc[:,"월"].astype(int).min()
        max_month = data.loc[:,"월"].astype(int).max()
        
        for target_month in range(min_month+1, max_month+1):
            if interval == 0:
                start_month = min_month
            else:
                start_month = target_month - interval
                if start_month < min_month:
                    continue
                    
            # split dataset
            trainset, testset = ts_split(data, start_month, target_month)
            testset = testset.assign(is_normal=testset.상품명.isin(trainset.상품명))
            
            # imputation
            if imputation:
                testset = impute(trainset, testset)
                
            # encode categorical features
            cat_vars = [x for x in data.columns if isinstance(data[x].dtype, pd.CategoricalDtype)]
            trainset, testset = encode_categorical_features(trainset, testset, cat_vars)
            
            from sklearn.preprocessing import LabelEncoder
            lbe = LabelEncoder()
            cat_vars = ["상품명", "상품군", "브랜드", "NS상품군_대",
                        "NS상품군_중", "NS상품군_소", "prodnames", "계절"]
            for var in cat_vars:
                trainset[var] = lbe.fit_transform(trainset[var].astype(str))
                testset[var] = lbe.fit_transform(testset[var].astype(str))
            
            # split normal/null data
            normal = testset.query("is_normal == True").drop("is_normal", axis=1)
            null = testset.query("is_normal == False").drop("is_normal", axis=1)
            
            n_normal = normal.shape[0]
            n_null = null.shape[0]
            # train & valid w/ normal dataset
            model, normal_score = train(trainset, normal, verbose_eval = False)
#             print(model.predict(normal.drop("로그_취급액",axis=1)))
            # valid w/ null dataset
            true = null.로그_취급액.values
            pred = model.predict(null.drop(["로그_취급액"], axis=1))
#             print(pred)
            null_score = round(mape(np.exp(true), np.exp(pred)), 4)
            
            score = round((normal_score * n_normal + null_score*n_null)/(n_normal + n_null), 4 )
            
            normal_scores.append(normal_score)
            null_scores.append(null_score)
            scores.append(score)
            
            
            if verbose:
                print(row_format.format(target_month, normal_score, null_score, score))
                
            mean_score = np.round(np.mean(scores), 4)
            mean_normal_score = np.round(np.mean(normal_scores), 4)
            mean_null_score = np.round(np.mean(null_scores), 4)
            
        if verbose:
            print(row_format.format("=" * 15, "=" * 15, "=" * 15, "=" * 15))
            print(row_format.format("mean", mean_normal_score, mean_null_score, mean_score))
        return mean_score, mean_normal_score, mean_null_score
    
    def fit(self, trainset, testset):
        model, score = train(trainset, testset, verbose_eval=100)
        self.model = model
        return self
    
    def predict(self, data):
        assert self.model is not None
        return self.model.predict(data)
    
if __name__ == "__main__":
    randomforest
            

In [112]:
randomforest.tsCV(rf, data = 'trimmed_data.csv', interval= 0, imputation=True, params=params, verbose=True)

 target_month  |  normal mape  |   null mape   |     mape      
[16.79918072 17.50822652 17.51742556 ... 16.34107673 16.81556346
 16.88827399] index
2517    17.003350
2518    17.276014
2519    17.675819
2520    15.284076
2521    14.915122
          ...    
4946    17.568889
4947    17.049748
4948    17.349343
4949    17.541083
4950    17.856736
Name: 로그_취급액, Length: 1706, dtype: float64
       2       |    1.2955     |    0.8277     |    1.1556     
[15.99204372 15.98012834 15.98012834 ... 16.2854025  16.3904455
 16.40169253] index
4951    16.284542
4952    15.904036
4953    15.762420
4954    16.506754
4955    16.856073
          ...    
7750    16.381767
7751    15.022379
7752    15.806804
7753    16.955343
7754    16.716877
Name: 로그_취급액, Length: 1662, dtype: float64
       3       |    0.9819     |    1.3469     |    1.1309     
[17.36744937 17.36744937 16.64640994 ... 16.23738219 16.28844148
 16.13918299] index
7759     15.921959
7760     17.397556
7761     15.841551
7762     15.903

KeyboardInterrupt: 

In [None]:
params_grid = {"max_depth":6,
              "min_samples_leaf": 2,
              "boostrap":"True"}

def objective(hyperparameters, iteration, n_splits=5):
    if 'n_estimators' in hyperparamters.keys():
        del hyperparameters['n_estimators']
        
    
    for i in range(2,13):
        train = data.loc[data.월.astype(int) < i]
        valid = data.loc[data.월.astype(int) == i]
        
        x_train = train.drop(columns=["취급액"])
        y_train = train.loc["취급액"]
        
        rf_model = rf(**hyperparameters)
        rf_model.fit(x_train, y_train)
        



def random_search(params_grid, max_evals = MAX_EVALS):
    results = pd.DataFrame(columns = ["score","params","iterations",
                                     index = list(range(MAX_EVALS))])
    
    hyperparameters = {k:random.sample(v, 1)[0] for k,v in params_grid.items()}
    

In [None]:
def train(trainset, testset, verbose_eval = False):
    test_result = {}
    model = rf(params = params,
                    train_set =  trainset,
                    valid_sets = test)

In [None]:
class RandomForestModel:
    
    def __init__(self):
        self.model = None
        
    def tsCV(self, data, inter)

In [None]:
def objective(hyperparameters, iteration):
    if 'n_estimators' in hyperparamters.keys():
        del hyperparameters['n_estimators']
        
    cv_results = rf.cv(hyperparameters, train_set, nfold= N_FOLDS, )



def random_search(params_grid, max_evals = MAX_EVALS):
    results = pd.DataFrame(columns = ["score","params","iterations",
                                     index = list(range(MAX_EVALS))])
    
    hyperparameters = {k:random.sample(v, 1)[0] for k,v in params_grid.items()}
    

In [None]:
def rf_run(data):
    model = RandomForestRegressor(criterion='mape')
    
    for i in range(2, 13):
        train = data.loc[lambda x:x.월 < i]
        test = data.loc[lambda x:x.월 == i]
    
        x_train = train.drop(columns=["취급액"])
        y_train = train.loc[:, "취급액"]
    
        params_grid = {"max_depth": [5,6,7],
                      "min_samples_split":[2,3,4]}
        
    model.fit()
    