# Library

In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib 
%matplotlib inline
plt.style.use('seaborn')
#view all columns
pd.options.display.max_columns = None
#.2f
pd.options.display.float_format = '{:.4f}'.format

import warnings
warnings.filterwarnings('ignore')

#sklearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold, train_test_split,StratifiedKFold,cross_val_score

#metrics
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#sklearn
from sklearn.ensemble import RandomForestRegressor,ExtraTreesRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from sklearn.ensemble import VotingRegressor
from catboost import CatBoostRegressor

from feature_engine.outliers import OutlierTrimmer
from feature_engine.transformation import BoxCoxTransformer,YeoJohnsonTransformer
from feature_engine.selection import DropFeatures,DropCorrelatedFeatures,DropDuplicateFeatures
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

#optuna
import optuna
from optuna import Trial, visualization
from optuna.samplers import TPESampler

# Model

In [4]:
train = pd.read_csv("./data/drive-download-20220502T013044Z-001/train.csv")
test = pd.read_csv("./data/drive-download-20220502T013044Z-001/test.csv")
sample_submission = pd.read_csv("./data/drive-download-20220502T013044Z-001/sample_submission.csv")


#drop id column
train.drop(['id'],axis=1,inplace=True)
test.drop(['id'],axis=1,inplace=True)

num_cols = train.select_dtypes(include=['float64','int64']).columns.tolist()
cat_cols = train.select_dtypes(exclude=['float64','int64']).columns.tolist()
num_cols.remove('target')


#Education
train['Education']=train['Education'].map({'Basic':1,'2n Cycle':2,'Graduation':3,
                       'PhD':4,'Master':5})
test['Education']=test['Education'].map({'Basic':1,'2n Cycle':2,'Graduation':3,
                       'PhD':4,'Master':5})
#Dt_Customer
train['Year_Customer']=train['Dt_Customer'].str.split('-',expand=True)[2].astype('int64')
test['Year_Customer']=test['Dt_Customer'].str.split('-',expand=True)[2].astype('int64')
train.drop('Dt_Customer',axis=1,inplace=True)
test.drop('Dt_Customer',axis=1,inplace=True)

#year_birth -> age : 상쇄

#total purhases
train['TotalPurchases']=train['NumCatalogPurchases']+train['NumStorePurchases']+train['NumWebPurchases']+train['NumDealsPurchases']
test['TotalPurchases']=test['NumCatalogPurchases']+test['NumStorePurchases']+test['NumWebPurchases']+test['NumDealsPurchases']

#kidhome+teenhome
train['Child']=train['Kidhome'] + train['Teenhome']
test['Child']=test['Kidhome'] + test['Teenhome']

#Martial_status
train['Marital_Status']=train['Marital_Status'].replace(['Divorced','Widow','YOLO','Alone','Absurd'],'Single')
test['Marital_Status']=train['Marital_Status'].replace(['Divorced','Widow','YOLO','Alone','Absurd'],'Single')


#drop outlier
outliers = OutlierTrimmer(variables=['Year_Birth','Income'],fold=3)
train = outliers.fit_transform(train)

#target log
train['target']= np.log1p(train['target'])

#yeo
#totalpurchases

#StandardScaler

ss=StandardScaler()
train[num_cols]=ss.fit_transform(train[num_cols])
test[num_cols]=ss.transform(test[num_cols])

#dummies
train = pd.get_dummies(train)
test = pd.get_dummies(test)

#model
X= train.drop(['target'],axis=1)
y= train['target']

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3, random_state=42)


#과적합
# trans=YeoJohnsonTransformer(variables=['NumCatalogPurchases'
# ])

# X_train=trans.fit_transform(X_train)
# X_test = trans.transform(X_test)

# fold

In [9]:
def NMAE(true, pred):
    mae = np.mean(np.abs(true-pred))
    score = mae / np.mean(np.abs(true))
    return score

nmae=[]
models=[RandomForestRegressor,SVR,KNeighborsRegressor,LGBMRegressor,XGBRegressor,ExtraTreesRegressor,CatBoostRegressor]
for i in models:
    reg=i()
    reg.fit(X_train,y_train)
    pred=reg.predict(X_test)
    mse.append(NMAE(y_test,pred))
    
algorism=['rf_reg','KNN','svr','xgb_reg','lgbm_reg','extra_reg','cat_reg']


# lgbm_params= {'lambda_l1': 1.776746024131738e-08, 'lambda_l2': 1.6357395553052135e-05, 'num_leaves': 60, 'n_estimators': 909, 'feature_fraction': 0.7654450449982961, 'bagging_fraction': 0.4362714208794097, 'min_child_samples': 14}
# rf_params= {'n_estimators': 255, 'max_depth': 73, 'min_samples_split': 5, 'min_samples_leaf': 2}
# extra_params= {'n_estimators': 764, 'max_depth': 20, 'min_samples_split': 3, 'min_samples_leaf': 3}

for i in algorism:
    if i=='rf_reg':
        rf_reg=RandomForestRegressor(**rf_params,random_state=42)
    elif i=='KNN':
        knn_reg=KNeighborsRegressor()
    elif i=='svr':
        svr=SVR(kernel='linear')
    elif i=='xgb_reg':
        xgb_reg=XGBRegressor(random_state=42)
    elif i=='extra_reg':
        extra_reg=ExtraTreesRegressor(**extra_params,random_state=42)
    elif i=='cat_reg':
        cat_reg= CatBoostRegressor(random_state=42)
    else:
        lgbm_reg=LGBMRegressor(**lgbm_params,random_state=42)
        
vot_model=VotingRegressor(estimators=[('LGBoost',lgbm_reg),('Rf',rf_reg),('EXTRA RF',extra_reg),('cat_reg',cat_reg)])
vot_model.fit(X_train,y_train)
pred2=vot_model.predict(X_test)
nmae.append(NMAE(y_test,pred2))

nmae_data=pd.DataFrame(mse,index=['RandomForestRegressor','SVR','KNeighborsRegressor','LGBMRegreessor','XGBRegressor','EXTRARegressor','CatBoostRegressor','Voting'],columns=['mse'])
nmae_data

Learning rate set to 0.037282
0:	learn: 1.4226296	total: 992us	remaining: 991ms
1:	learn: 1.3824885	total: 2.06ms	remaining: 1.03s
2:	learn: 1.3427377	total: 3.06ms	remaining: 1.02s
3:	learn: 1.3020598	total: 4.21ms	remaining: 1.05s
4:	learn: 1.2629998	total: 5.21ms	remaining: 1.03s
5:	learn: 1.2275372	total: 6.21ms	remaining: 1.03s
6:	learn: 1.1905651	total: 7.2ms	remaining: 1.02s
7:	learn: 1.1598588	total: 8.12ms	remaining: 1.01s
8:	learn: 1.1255565	total: 9.95ms	remaining: 1.09s
9:	learn: 1.0911574	total: 10.9ms	remaining: 1.08s
10:	learn: 1.0630548	total: 11.3ms	remaining: 1.01s
11:	learn: 1.0343928	total: 12.5ms	remaining: 1.03s
12:	learn: 1.0060993	total: 13.6ms	remaining: 1.03s
13:	learn: 0.9776047	total: 14.5ms	remaining: 1.02s
14:	learn: 0.9509992	total: 15.1ms	remaining: 994ms
15:	learn: 0.9271806	total: 15.5ms	remaining: 952ms
16:	learn: 0.9016047	total: 16.5ms	remaining: 952ms
17:	learn: 0.8770633	total: 17.5ms	remaining: 955ms
18:	learn: 0.8563336	total: 18.5ms	remaining: 

195:	learn: 0.2233902	total: 191ms	remaining: 782ms
196:	learn: 0.2228694	total: 192ms	remaining: 781ms
197:	learn: 0.2224761	total: 193ms	remaining: 780ms
198:	learn: 0.2224492	total: 193ms	remaining: 777ms
199:	learn: 0.2222440	total: 194ms	remaining: 776ms
200:	learn: 0.2219958	total: 195ms	remaining: 775ms
201:	learn: 0.2216258	total: 196ms	remaining: 775ms
202:	learn: 0.2213764	total: 197ms	remaining: 774ms
203:	learn: 0.2208843	total: 198ms	remaining: 774ms
204:	learn: 0.2205817	total: 199ms	remaining: 774ms
205:	learn: 0.2202244	total: 201ms	remaining: 773ms
206:	learn: 0.2199074	total: 201ms	remaining: 772ms
207:	learn: 0.2196272	total: 202ms	remaining: 771ms
208:	learn: 0.2193010	total: 204ms	remaining: 771ms
209:	learn: 0.2189839	total: 205ms	remaining: 770ms
210:	learn: 0.2184758	total: 206ms	remaining: 769ms
211:	learn: 0.2178866	total: 207ms	remaining: 768ms
212:	learn: 0.2174778	total: 208ms	remaining: 768ms
213:	learn: 0.2168238	total: 209ms	remaining: 766ms
214:	learn: 

390:	learn: 0.1578458	total: 383ms	remaining: 596ms
391:	learn: 0.1576200	total: 384ms	remaining: 596ms
392:	learn: 0.1574038	total: 385ms	remaining: 595ms
393:	learn: 0.1572719	total: 386ms	remaining: 594ms
394:	learn: 0.1570398	total: 387ms	remaining: 593ms
395:	learn: 0.1567998	total: 388ms	remaining: 592ms
396:	learn: 0.1564933	total: 389ms	remaining: 591ms
397:	learn: 0.1563776	total: 390ms	remaining: 590ms
398:	learn: 0.1560262	total: 391ms	remaining: 589ms
399:	learn: 0.1558127	total: 392ms	remaining: 588ms
400:	learn: 0.1556895	total: 393ms	remaining: 587ms
401:	learn: 0.1554227	total: 394ms	remaining: 586ms
402:	learn: 0.1552832	total: 395ms	remaining: 585ms
403:	learn: 0.1551930	total: 396ms	remaining: 584ms
404:	learn: 0.1549854	total: 397ms	remaining: 583ms
405:	learn: 0.1547491	total: 398ms	remaining: 582ms
406:	learn: 0.1544431	total: 399ms	remaining: 581ms
407:	learn: 0.1541568	total: 400ms	remaining: 581ms
408:	learn: 0.1538647	total: 401ms	remaining: 580ms
409:	learn: 

583:	learn: 0.1205281	total: 575ms	remaining: 410ms
584:	learn: 0.1203815	total: 576ms	remaining: 409ms
585:	learn: 0.1202892	total: 577ms	remaining: 408ms
586:	learn: 0.1200549	total: 578ms	remaining: 407ms
587:	learn: 0.1199325	total: 579ms	remaining: 406ms
588:	learn: 0.1197873	total: 580ms	remaining: 405ms
589:	learn: 0.1196610	total: 581ms	remaining: 404ms
590:	learn: 0.1195014	total: 582ms	remaining: 403ms
591:	learn: 0.1192933	total: 583ms	remaining: 402ms
592:	learn: 0.1191418	total: 584ms	remaining: 401ms
593:	learn: 0.1189770	total: 585ms	remaining: 400ms
594:	learn: 0.1188404	total: 586ms	remaining: 399ms
595:	learn: 0.1188166	total: 587ms	remaining: 398ms
596:	learn: 0.1186275	total: 588ms	remaining: 397ms
597:	learn: 0.1185389	total: 589ms	remaining: 396ms
598:	learn: 0.1184117	total: 590ms	remaining: 395ms
599:	learn: 0.1183467	total: 591ms	remaining: 394ms
600:	learn: 0.1182749	total: 592ms	remaining: 393ms
601:	learn: 0.1181448	total: 593ms	remaining: 392ms
602:	learn: 

774:	learn: 0.0973278	total: 767ms	remaining: 223ms
775:	learn: 0.0972047	total: 768ms	remaining: 222ms
776:	learn: 0.0970659	total: 769ms	remaining: 221ms
777:	learn: 0.0969055	total: 770ms	remaining: 220ms
778:	learn: 0.0968327	total: 771ms	remaining: 219ms
779:	learn: 0.0967628	total: 772ms	remaining: 218ms
780:	learn: 0.0966976	total: 773ms	remaining: 217ms
781:	learn: 0.0965857	total: 774ms	remaining: 216ms
782:	learn: 0.0965130	total: 775ms	remaining: 215ms
783:	learn: 0.0963605	total: 776ms	remaining: 214ms
784:	learn: 0.0962828	total: 777ms	remaining: 213ms
785:	learn: 0.0961636	total: 778ms	remaining: 212ms
786:	learn: 0.0960748	total: 780ms	remaining: 211ms
787:	learn: 0.0960005	total: 781ms	remaining: 210ms
788:	learn: 0.0959487	total: 782ms	remaining: 209ms
789:	learn: 0.0958525	total: 783ms	remaining: 208ms
790:	learn: 0.0958151	total: 784ms	remaining: 207ms
791:	learn: 0.0957374	total: 785ms	remaining: 206ms
792:	learn: 0.0955757	total: 786ms	remaining: 205ms
793:	learn: 

962:	learn: 0.0807067	total: 960ms	remaining: 36.9ms
963:	learn: 0.0806581	total: 961ms	remaining: 35.9ms
964:	learn: 0.0805752	total: 962ms	remaining: 34.9ms
965:	learn: 0.0805114	total: 963ms	remaining: 33.9ms
966:	learn: 0.0804437	total: 964ms	remaining: 32.9ms
967:	learn: 0.0803745	total: 965ms	remaining: 31.9ms
968:	learn: 0.0803127	total: 966ms	remaining: 30.9ms
969:	learn: 0.0802485	total: 967ms	remaining: 29.9ms
970:	learn: 0.0801601	total: 967ms	remaining: 28.9ms
971:	learn: 0.0800832	total: 968ms	remaining: 27.9ms
972:	learn: 0.0800017	total: 969ms	remaining: 26.9ms
973:	learn: 0.0799190	total: 970ms	remaining: 25.9ms
974:	learn: 0.0797878	total: 972ms	remaining: 24.9ms
975:	learn: 0.0796934	total: 973ms	remaining: 23.9ms
976:	learn: 0.0795925	total: 974ms	remaining: 22.9ms
977:	learn: 0.0795449	total: 975ms	remaining: 21.9ms
978:	learn: 0.0794735	total: 976ms	remaining: 20.9ms
979:	learn: 0.0793799	total: 977ms	remaining: 19.9ms
980:	learn: 0.0793059	total: 978ms	remaining: 

187:	learn: 0.2189652	total: 192ms	remaining: 829ms
188:	learn: 0.2185154	total: 193ms	remaining: 828ms
189:	learn: 0.2182422	total: 194ms	remaining: 826ms
190:	learn: 0.2180367	total: 195ms	remaining: 825ms
191:	learn: 0.2178514	total: 196ms	remaining: 824ms
192:	learn: 0.2175266	total: 197ms	remaining: 823ms
193:	learn: 0.2172158	total: 198ms	remaining: 823ms
194:	learn: 0.2170342	total: 199ms	remaining: 822ms
195:	learn: 0.2170110	total: 200ms	remaining: 819ms
196:	learn: 0.2169600	total: 200ms	remaining: 816ms
197:	learn: 0.2164263	total: 201ms	remaining: 815ms
198:	learn: 0.2157090	total: 202ms	remaining: 814ms
199:	learn: 0.2152513	total: 203ms	remaining: 813ms
200:	learn: 0.2149815	total: 204ms	remaining: 812ms
201:	learn: 0.2143529	total: 205ms	remaining: 811ms
202:	learn: 0.2139600	total: 206ms	remaining: 810ms
203:	learn: 0.2133346	total: 207ms	remaining: 809ms
204:	learn: 0.2128017	total: 208ms	remaining: 808ms
205:	learn: 0.2127313	total: 210ms	remaining: 808ms
206:	learn: 

376:	learn: 0.1587963	total: 385ms	remaining: 636ms
377:	learn: 0.1585393	total: 386ms	remaining: 635ms
378:	learn: 0.1583352	total: 387ms	remaining: 634ms
379:	learn: 0.1582473	total: 388ms	remaining: 633ms
380:	learn: 0.1581016	total: 389ms	remaining: 632ms
381:	learn: 0.1578082	total: 391ms	remaining: 633ms
382:	learn: 0.1575570	total: 392ms	remaining: 632ms
383:	learn: 0.1572775	total: 393ms	remaining: 630ms
384:	learn: 0.1571553	total: 394ms	remaining: 629ms
385:	learn: 0.1568502	total: 395ms	remaining: 629ms
386:	learn: 0.1565620	total: 396ms	remaining: 627ms
387:	learn: 0.1563414	total: 397ms	remaining: 626ms
388:	learn: 0.1560462	total: 398ms	remaining: 625ms
389:	learn: 0.1557811	total: 399ms	remaining: 624ms
390:	learn: 0.1554522	total: 400ms	remaining: 624ms
391:	learn: 0.1552416	total: 402ms	remaining: 623ms
392:	learn: 0.1551279	total: 402ms	remaining: 622ms
393:	learn: 0.1547738	total: 403ms	remaining: 621ms
394:	learn: 0.1545855	total: 405ms	remaining: 620ms
395:	learn: 

568:	learn: 0.1240422	total: 577ms	remaining: 437ms
569:	learn: 0.1238067	total: 578ms	remaining: 436ms
570:	learn: 0.1237207	total: 579ms	remaining: 435ms
571:	learn: 0.1233953	total: 581ms	remaining: 434ms
572:	learn: 0.1231932	total: 581ms	remaining: 433ms
573:	learn: 0.1231608	total: 582ms	remaining: 432ms
574:	learn: 0.1230569	total: 583ms	remaining: 431ms
575:	learn: 0.1227992	total: 584ms	remaining: 430ms
576:	learn: 0.1226193	total: 585ms	remaining: 429ms
577:	learn: 0.1225073	total: 586ms	remaining: 428ms
578:	learn: 0.1224280	total: 587ms	remaining: 427ms
579:	learn: 0.1223097	total: 588ms	remaining: 426ms
580:	learn: 0.1221618	total: 589ms	remaining: 425ms
581:	learn: 0.1220971	total: 591ms	remaining: 424ms
582:	learn: 0.1218525	total: 591ms	remaining: 423ms
583:	learn: 0.1216825	total: 593ms	remaining: 422ms
584:	learn: 0.1214452	total: 594ms	remaining: 421ms
585:	learn: 0.1213671	total: 595ms	remaining: 420ms
586:	learn: 0.1211432	total: 596ms	remaining: 419ms
587:	learn: 

753:	learn: 0.1002555	total: 770ms	remaining: 251ms
754:	learn: 0.1001009	total: 771ms	remaining: 250ms
755:	learn: 0.1000000	total: 772ms	remaining: 249ms
756:	learn: 0.0998856	total: 773ms	remaining: 248ms
757:	learn: 0.0997651	total: 775ms	remaining: 247ms
758:	learn: 0.0997424	total: 776ms	remaining: 246ms
759:	learn: 0.0996200	total: 777ms	remaining: 245ms
760:	learn: 0.0994365	total: 778ms	remaining: 244ms
761:	learn: 0.0992682	total: 779ms	remaining: 243ms
762:	learn: 0.0991313	total: 780ms	remaining: 242ms
763:	learn: 0.0990572	total: 781ms	remaining: 241ms
764:	learn: 0.0990399	total: 782ms	remaining: 240ms
765:	learn: 0.0989685	total: 783ms	remaining: 239ms
766:	learn: 0.0987889	total: 784ms	remaining: 238ms
767:	learn: 0.0986209	total: 785ms	remaining: 237ms
768:	learn: 0.0985105	total: 786ms	remaining: 236ms
769:	learn: 0.0983734	total: 787ms	remaining: 235ms
770:	learn: 0.0982626	total: 788ms	remaining: 234ms
771:	learn: 0.0981704	total: 789ms	remaining: 233ms
772:	learn: 

940:	learn: 0.0820077	total: 962ms	remaining: 60.3ms
941:	learn: 0.0819662	total: 963ms	remaining: 59.3ms
942:	learn: 0.0818976	total: 964ms	remaining: 58.3ms
943:	learn: 0.0818045	total: 965ms	remaining: 57.3ms
944:	learn: 0.0817642	total: 966ms	remaining: 56.2ms
945:	learn: 0.0817103	total: 967ms	remaining: 55.2ms
946:	learn: 0.0815987	total: 968ms	remaining: 54.2ms
947:	learn: 0.0814624	total: 969ms	remaining: 53.2ms
948:	learn: 0.0813594	total: 970ms	remaining: 52.1ms
949:	learn: 0.0813003	total: 971ms	remaining: 51.1ms
950:	learn: 0.0812319	total: 972ms	remaining: 50.1ms
951:	learn: 0.0811630	total: 973ms	remaining: 49.1ms
952:	learn: 0.0811210	total: 974ms	remaining: 48ms
953:	learn: 0.0810448	total: 975ms	remaining: 47ms
954:	learn: 0.0808945	total: 976ms	remaining: 46ms
955:	learn: 0.0808515	total: 977ms	remaining: 45ms
956:	learn: 0.0808437	total: 978ms	remaining: 44ms
957:	learn: 0.0807466	total: 979ms	remaining: 42.9ms
958:	learn: 0.0806649	total: 980ms	remaining: 41.9ms
959

Unnamed: 0,mse
RandomForestRegressor,0.0369
SVR,0.2229
KNeighborsRegressor,0.0519
LGBMRegreessor,0.0373
XGBRegressor,0.0387
EXTRARegressor,0.0349
CatBoostRegressor,0.0362
Voting,0.0341


# parameters (optuna)

In [11]:
def objectiveLGBM(trial: Trial, X,y,test):
    param = {
        'objective':'regression',
        'metric':'root_mean_squared_error',
        'verbosity':-1,
        'boosting_type':'gbdt',
        'lambda_l1':trial.suggest_loguniform('lambda_l1',1e-8, 10.0),
        'lambda_l2':trial.suggest_loguniform('lambda_l2',1e-8, 10.0),
        'num_leaves':trial.suggest_int('num_leaves',2,512),
        'learning_rate':0.01,
        'n_estimators':trial.suggest_int('n_estimators',100,3000),
        'feature_fraction':trial.suggest_uniform('feature_fraction',0.4,1.0),
        'bagging_fraction':trial.suggest_uniform('bagging_fraction',0.4,1.0),
        'bagging_freq':trial.suggest_int('min_child_samples',5,100),
        'gpu_use_dp':True
    }
    X_train,X_test,y_train,y_test =train_test_split(X,y, test_size=.3)
    
    lgbm_regr= LGBMRegressor(**param)
    lgbm_regr=lgbm_regr.fit(X_train,y_train,verbose=False)
    
    score=NMAE(lgbm_regr.predict(X_test),y_test)
    return score

study= optuna.create_study(direction='minimize',sampler=TPESampler())
study.optimize(lambda trial: objectiveLGBM(trial,X,y,X_test), n_trials=20)
print("Best Trial : score {}, \nparams {}".format(study.best_trial.value, study.best_trial.params))



def objectiveExtra(trial : Trial, X,y,test):
    param = {
        'n_estimators':trial.suggest_int('n_estimators',100,1000),
        'criterion':'squared_error',
        'max_depth':trial.suggest_int('max_depth',10,100),
        'min_samples_split':trial.suggest_int('min_samples_split',2,10),
        'min_samples_leaf':trial.suggest_int('min_samples_leaf',1,5)
    }
    X_train,X_test,y_train,y_test =train_test_split(X,y, test_size=.3)
    
    extra_regr= ExtraTreesRegressor(**param)
    extra_regr=extra_reg.fit(X_train,y_train)
    
    score=NMAE(extra_regr.predict(X_test),y_test)
    return score

# study= optuna.create_study(direction='minimize',sampler=TPESampler())
# study.optimize(lambda trial: objectiveExtra(trial,X,y,X_test), n_trials=20)
# print("Best Trial : score {}, \nparams {}".format(study.best_trial.value, study.best_trial.params))

def objectiveRandom(trial : Trial, X,y,test):
    param = {
        'n_estimators':trial.suggest_int('n_estimators',100,1000),
        'criterion':'squared_error',
        'max_depth':trial.suggest_int('max_depth',10,100),
        'min_samples_split':trial.suggest_int('min_samples_split',2,10),
        'min_samples_leaf':trial.suggest_int('min_samples_leaf',1,5)
    }
    X_train,X_test,y_train,y_test =train_test_split(X,y, test_size=.3)
    
    random_regr= RandomForestRegressor(**param)
    random_regr=random_regr.fit(X_train,y_train)
    
    score=NMAE(random_regr.predict(X_test),y_test)
    return score

# study= optuna.create_study(direction='minimize',sampler=TPESampler())
# study.optimize(lambda trial: objectiveRandom(trial,X,y,X_test), n_trials=20)
# print("Best Trial : score {}, \nparams {}".format(study.best_trial.value, study.best_trial.params))

[32m[I 2022-05-11 11:06:42,979][0m A new study created in memory with name: no-name-c29b7afd-6e64-4135-a8a3-dc3bc95153a0[0m




[32m[I 2022-05-11 11:06:43,473][0m Trial 0 finished with value: 0.03655892815568999 and parameters: {'lambda_l1': 9.078853483895135e-05, 'lambda_l2': 0.03358585286789913, 'num_leaves': 167, 'n_estimators': 727, 'feature_fraction': 0.5307486082291663, 'bagging_fraction': 0.919087576569294, 'min_child_samples': 51}. Best is trial 0 with value: 0.03655892815568999.[0m




[32m[I 2022-05-11 11:06:44,017][0m Trial 1 finished with value: 0.03581852676828778 and parameters: {'lambda_l1': 0.017474274461751867, 'lambda_l2': 0.0005325899345952098, 'num_leaves': 455, 'n_estimators': 1257, 'feature_fraction': 0.802894547442065, 'bagging_fraction': 0.5317442899667877, 'min_child_samples': 40}. Best is trial 1 with value: 0.03581852676828778.[0m




[32m[I 2022-05-11 11:06:44,384][0m Trial 2 finished with value: 0.036246380296670704 and parameters: {'lambda_l1': 7.666239892887975e-08, 'lambda_l2': 3.953770822902107e-06, 'num_leaves': 142, 'n_estimators': 1210, 'feature_fraction': 0.7983629032369957, 'bagging_fraction': 0.4320439191475025, 'min_child_samples': 14}. Best is trial 1 with value: 0.03581852676828778.[0m




[32m[I 2022-05-11 11:06:45,488][0m Trial 3 finished with value: 0.03406065011058072 and parameters: {'lambda_l1': 0.027589808523426882, 'lambda_l2': 6.797327494135526, 'num_leaves': 231, 'n_estimators': 2910, 'feature_fraction': 0.7258213533850517, 'bagging_fraction': 0.5848942874968927, 'min_child_samples': 11}. Best is trial 3 with value: 0.03406065011058072.[0m




[32m[I 2022-05-11 11:06:46,113][0m Trial 4 finished with value: 0.033713085259515226 and parameters: {'lambda_l1': 0.5996751090592694, 'lambda_l2': 3.5555188614312024e-07, 'num_leaves': 50, 'n_estimators': 1336, 'feature_fraction': 0.6939515999735961, 'bagging_fraction': 0.8681412763827302, 'min_child_samples': 10}. Best is trial 4 with value: 0.033713085259515226.[0m
[32m[I 2022-05-11 11:06:46,265][0m Trial 5 finished with value: 0.048605127444450134 and parameters: {'lambda_l1': 8.667001601553485e-06, 'lambda_l2': 6.84989588726226e-08, 'num_leaves': 246, 'n_estimators': 210, 'feature_fraction': 0.6348776188154303, 'bagging_fraction': 0.6629094849534556, 'min_child_samples': 18}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:46,492][0m Trial 6 finished with value: 0.03592738040536511 and parameters: {'lambda_l1': 0.7526117973246422, 'lambda_l2': 0.11436479208695519, 'num_leaves': 231, 'n_estimators': 483, 'feature_fraction': 0.878936320747285, 'bagging_fraction': 0.8307125507417548, 'min_child_samples': 7}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:48,529][0m Trial 7 finished with value: 0.03374792843279671 and parameters: {'lambda_l1': 2.2868484842815288e-07, 'lambda_l2': 0.010938029141184656, 'num_leaves': 130, 'n_estimators': 2903, 'feature_fraction': 0.677713937877866, 'bagging_fraction': 0.8927509491276464, 'min_child_samples': 17}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:49,361][0m Trial 8 finished with value: 0.03636650240254164 and parameters: {'lambda_l1': 7.27338032493407e-05, 'lambda_l2': 0.003025563241280729, 'num_leaves': 284, 'n_estimators': 1444, 'feature_fraction': 0.7145437669553465, 'bagging_fraction': 0.6586009713589203, 'min_child_samples': 94}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:50,451][0m Trial 9 finished with value: 0.03672251329699235 and parameters: {'lambda_l1': 0.21629976878520732, 'lambda_l2': 1.2071297626076732e-07, 'num_leaves': 248, 'n_estimators': 2993, 'feature_fraction': 0.5890963975546482, 'bagging_fraction': 0.4191535785843903, 'min_child_samples': 75}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:50,829][0m Trial 10 finished with value: 0.03530508062075707 and parameters: {'lambda_l1': 6.87063471591638, 'lambda_l2': 1.3579464589810306e-05, 'num_leaves': 12, 'n_estimators': 2164, 'feature_fraction': 0.43010676569184225, 'bagging_fraction': 0.998234488673088, 'min_child_samples': 36}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:51,153][0m Trial 11 finished with value: 0.039984608603144005 and parameters: {'lambda_l1': 1.052609168519256e-08, 'lambda_l2': 1.3583615393376335e-05, 'num_leaves': 3, 'n_estimators': 2088, 'feature_fraction': 0.9977009840732596, 'bagging_fraction': 0.8087335087582791, 'min_child_samples': 25}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:52,557][0m Trial 12 finished with value: 0.03747401772329735 and parameters: {'lambda_l1': 6.159398578263957e-07, 'lambda_l2': 1.544396159372907e-08, 'num_leaves': 102, 'n_estimators': 2054, 'feature_fraction': 0.6385391963382238, 'bagging_fraction': 0.807441282768546, 'min_child_samples': 31}. Best is trial 4 with value: 0.033713085259515226.[0m




[32m[I 2022-05-11 11:06:54,138][0m Trial 13 finished with value: 0.03202241431923558 and parameters: {'lambda_l1': 0.0031312079038249506, 'lambda_l2': 0.32676182748101673, 'num_leaves': 87, 'n_estimators': 2534, 'feature_fraction': 0.5103921841246037, 'bagging_fraction': 0.9014188231796354, 'min_child_samples': 63}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:06:55,183][0m Trial 14 finished with value: 0.03648870743996521 and parameters: {'lambda_l1': 0.0032291976640620675, 'lambda_l2': 4.8009152612902355, 'num_leaves': 67, 'n_estimators': 1787, 'feature_fraction': 0.41118433482485706, 'bagging_fraction': 0.9845227149134205, 'min_child_samples': 69}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:06:56,433][0m Trial 15 finished with value: 0.032661486832899315 and parameters: {'lambda_l1': 0.0026357325818284535, 'lambda_l2': 0.33710293260226765, 'num_leaves': 358, 'n_estimators': 2433, 'feature_fraction': 0.5272826465017068, 'bagging_fraction': 0.7261660708531414, 'min_child_samples': 62}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:06:57,777][0m Trial 16 finished with value: 0.03500873559742222 and parameters: {'lambda_l1': 0.0019432581704715891, 'lambda_l2': 0.6102387777164053, 'num_leaves': 380, 'n_estimators': 2478, 'feature_fraction': 0.4958459793182427, 'bagging_fraction': 0.7263724899250356, 'min_child_samples': 70}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:06:59,089][0m Trial 17 finished with value: 0.034412017143369895 and parameters: {'lambda_l1': 0.0009568466212558921, 'lambda_l2': 0.3920507559383343, 'num_leaves': 334, 'n_estimators': 2510, 'feature_fraction': 0.5058008266273011, 'bagging_fraction': 0.7471444204611851, 'min_child_samples': 57}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:07:00,254][0m Trial 18 finished with value: 0.03515708784568802 and parameters: {'lambda_l1': 9.798635295051463e-06, 'lambda_l2': 0.0004672356258340117, 'num_leaves': 495, 'n_estimators': 2483, 'feature_fraction': 0.5660038564463179, 'bagging_fraction': 0.6046946980484333, 'min_child_samples': 87}. Best is trial 13 with value: 0.03202241431923558.[0m




[32m[I 2022-05-11 11:07:01,185][0m Trial 19 finished with value: 0.03698363959274002 and parameters: {'lambda_l1': 0.028905075129817836, 'lambda_l2': 0.005760925567388815, 'num_leaves': 402, 'n_estimators': 1717, 'feature_fraction': 0.4560609034063313, 'bagging_fraction': 0.7721119919622494, 'min_child_samples': 57}. Best is trial 13 with value: 0.03202241431923558.[0m


Best Trial : score 0.03202241431923558, 
params {'lambda_l1': 0.0031312079038249506, 'lambda_l2': 0.32676182748101673, 'num_leaves': 87, 'n_estimators': 2534, 'feature_fraction': 0.5103921841246037, 'bagging_fraction': 0.9014188231796354, 'min_child_samples': 63}


In [35]:
import plotly.offline as py
py.init_notebook_mode(connected=True)
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.offline as offline
x, y = (list(x) for x in zip(*sorted(zip(etr.feature_importances_, X_train.columns.values), 
                                                            reverse = False)))
trace2 = go.Bar(
    x=x ,
    y=y,
    marker=dict(
        color=x,
        colorscale = 'Viridis',
        reversescale = True
    ),
    name='Random Forest Feature importance',
    orientation='h',
)

layout = dict(
    title='Barplot of Feature importances',
     width = 1000, height = 1000,
    yaxis=dict(
        showgrid=False,
        showline=False,
        showticklabels=True,
#         domain=[0, 0.85],
    ),
    margin=dict(
    l=300,
),
)

fig1 = go.Figure(data=[trace2])
fig1['layout'].update(layout)
py.iplot(fig1, filename='plots')

# pred

In [58]:
final_pred = vot_model.predict(test)
sample_submission['target'] = np.expm1(final_pred)
sample_submission.to_csv("/Users/bnj/Desktop/백남진/project/data/drive-download-20220502T013044Z-001/submision.csv",index=False)