In [1]:
!pip3 install gensim polars annoy hyperopt


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import pandas as pd
import numpy as np
import glob
import itertools
from collections import Counter
import polars as pl
from gensim.models import Word2Vec
from annoy import AnnoyIndex
import random
from sklearn.model_selection import GroupKFold
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe

In [3]:
DISK_PIECES = 4
data_dir = "../data/"
type_labels = {'clicks':0, 'carts':1, 'orders':2}
type_weight = {0:1, 1:6, 2:3}

In [4]:
def load_data(path):    
    dfs = []
    for e, chunk_file in enumerate(glob.glob(path)):
        chunk = pd.read_parquet(chunk_file)
        dfs.append(chunk)
    return pd.concat(dfs).reset_index(drop=True) #.astype({"ts": "datetime64[ms]"})


In [5]:
train_df = load_data(f'{data_dir}/train_pqt_chunks/*')
test_df = load_data(f'{data_dir}/test_pqt_chunks/*')

print('shape',train_df.shape, test_df.shape)

shape (216716096, 4) (6928123, 4)


In [6]:
sentences_df = pl.from_pandas(pd.concat([train_df,test_df])).groupby('session').agg(pl.col('aid').alias('sentence'))
sentences = sentences_df['sentence'].to_list()
w2vec = Word2Vec(sentences=sentences, vector_size=32, min_count=1, workers=6)
w2vec.save("../models/otto_word2vec.model")
del sentences_df, sentences, w2vec

In [6]:
w2vec = Word2Vec.load("../models/otto_word2vec.model")
embedding_weight = np.load('../models/otto_word2vec.model.wv.vectors.npy')
embedding_weight_neg = np.load('../models/otto_word2vec.model.syn1neg.npy')
embedding_weigh_dict_df = pl.from_pandas(pd.DataFrame(embedding_weight, columns = ['Embedding_' + str(x) for x in range(32)]).reset_index().rename(columns = {'index':'aid'})).with_columns(pl.col('aid').cast(pl.Int32))


aid2idx = {aid: i for i, aid in enumerate(w2vec.wv.index_to_key)}
index = AnnoyIndex(32, 'euclidean')

for aid, idx in aid2idx.items():
    index.add_item(idx, w2vec.wv.vectors[idx])
    
index.build(10)

def get_nn(aid,num_candidates):
    return [w2vec.wv.index_to_key[i] for i in index.get_nns_by_item(aid2idx[aid], num_candidates+1)[1:]]

In [7]:
def pqt_to_dict(df):
    return df.groupby('aid_x').aid_y.apply(list).to_dict()

# LOAD THREE CO-VISITATION MATRICES
top_40_clicks = pqt_to_dict( pd.read_parquet(f'{data_dir}top_40_clicks_0.pqt') )
for k in range(1,DISK_PIECES): 
    top_40_clicks.update( pqt_to_dict( pd.read_parquet(f'{data_dir}top_40_clicks_{k}.pqt') ) )

top_40_buys = pqt_to_dict( pd.read_parquet(f'{data_dir}top_40_carts_orders_0.pqt') )
for k in range(1,DISK_PIECES): 
    top_40_buys.update( pqt_to_dict( pd.read_parquet(f'{data_dir}top_40_carts_orders_{k}.pqt') ) )
top_40_buy2buy = pqt_to_dict( pd.read_parquet(f'{data_dir}top_40_buy2buy.pqt') )

# TOP CLICKS AND ORDERS IN TEST
top_clicks = test_df.loc[test_df['type']=='clicks','aid'].value_counts().index.values[:40]
top_orders = test_df.loc[test_df['type']=='orders','aid'].value_counts().index.values[:40]

print('Here are size of our 3 co-visitation matrices:')
print( len( top_40_clicks ), len( top_40_buy2buy ), len( top_40_buys ) )

Here are size of our 3 co-visitation matrices:
1837166 1168768 1837166


In [8]:
type_weight_multipliers = {0: 3, 1: 6, 2: 3}
def suggest_clicks(df,num_candidates):
    # USER HISTORY AIDS AND TYPES
    aids=df.aid.tolist()
    types = df.type.tolist()
    unique_aids = list(dict.fromkeys(aids[::-1] ))
    # RERANK CANDIDATES USING WEIGHTS
    if len(unique_aids)>=num_candidates:
        weights=np.logspace(0.1,1,len(aids),base=2, endpoint=True)-1
        aids_temp = Counter() 
        # RERANK BASED ON REPEAT ITEMS AND TYPE OF ITEMS
        for aid,w,t in zip(aids,weights,types): 
            aids_temp[aid] += w * type_weight_multipliers[t]
        sorted_aids = [k for k,v in aids_temp.most_common(num_candidates)]
        return sorted_aids
    # USE "CLICKS" CO-VISITATION MATRIX
    aids2 = list(itertools.chain(*[top_40_clicks[aid] for aid in unique_aids if aid in top_40_clicks]))
    #WORD2VEC CANDIDATES based on last 5 aids
    recent_aids=aids[::-1]
    word2vec_candidates = list(itertools.chain(*[get_nn(recent_aids[i],num_candidates) for i in range(min(5,len(recent_aids)))]))
    #Matrix Factorization based candidates
    #matrix_fact_candidates = list(itertools.chain(*[aid_matrix_fact[recent_aids[i]] for i in range(min(5,len(recent_aids)))]))
    # RERANK CANDIDATES
    #top_aids2 = [aid2 for aid2, cnt in Counter(aids2+word2vec_candidates+matrix_fact_candidates).most_common(num_candidates) if aid2 not in unique_aids]
    top_aids2 = [aid2 for aid2, cnt in Counter(aids2+word2vec_candidates).most_common(num_candidates) if aid2 not in unique_aids]
    
    result = unique_aids + top_aids2[:num_candidates - len(unique_aids)]
    # USE TOP20 TEST CLICKS
    return result + list(top_clicks)[:num_candidates-len(result)]

def suggest_buys(df,num_candidates):
    # USER HISTORY AIDS AND TYPES
    aids=df.aid.tolist()
    types = df.type.tolist()
    # UNIQUE AIDS AND UNIQUE BUYS
    unique_aids = list(dict.fromkeys(aids[::-1] ))
    df = df.loc[(df['type']==1)|(df['type']==2)]
    unique_buys = list(dict.fromkeys( df.aid.tolist()[::-1] ))
    # RERANK CANDIDATES USING WEIGHTS
    if len(unique_aids)>=num_candidates:
        weights=np.logspace(0.5,1,len(aids),base=2, endpoint=True)-1
        aids_temp = Counter() 
        # RERANK BASED ON REPEAT ITEMS AND TYPE OF ITEMS
        for aid,w,t in zip(aids,weights,types): 
            aids_temp[aid] += w * type_weight_multipliers[t]
        # RERANK CANDIDATES USING "BUY2BUY" CO-VISITATION MATRIX
        aids3 = list(itertools.chain(*[top_20_buy2buy[aid] for aid in unique_buys if aid in top_20_buy2buy]))
        for aid in aids3: aids_temp[aid] += 0.1
        sorted_aids = [k for k,v in aids_temp.most_common(num_candidates)]
        return sorted_aids
    # USE "CART ORDER" CO-VISITATION MATRIX
    aids2 = list(itertools.chain(*[top_20_buys[aid] for aid in unique_aids if aid in top_20_buys]))
    # USE "BUY2BUY" CO-VISITATION MATRIX
    aids3 = list(itertools.chain(*[top_20_buy2buy[aid] for aid in unique_buys if aid in top_20_buy2buy]))
    # USE "Word2vec" Matrix
    recent_aids=list(dict.fromkeys( df.loc[(df['type']==1)].aid.tolist()[::-1] ))
    word2vec_candidates = list(itertools.chain(*[get_nn(recent_aids[i],num_candidates) for i in range(min(3,len(recent_aids)))]))
    # RERANK CANDIDATES
    top_aids2 = [aid2 for aid2, cnt in Counter(aids2+aids3+word2vec_candidates).most_common(num_candidates) if aid2 not in unique_aids] 
    result = unique_aids + top_aids2[:num_candidates - len(unique_aids)]
    # USE TOP20 TEST ORDERS
    return result + list(top_orders)[:num_candidates-len(result)]


def click_recall_score(pred_df,test_labels):
    score = 0
    weights = {'clicks': 0.10, 'carts': 0.30, 'orders': 0.60}
    for t in ['clicks']:
        sub = pred_df.loc[pred_df.session_type.str.contains(t)].copy()
        sub['session'] = sub.session_type.apply(lambda x: int(x.split('_')[0]))
        sub.labels = sub.labels.apply(lambda x: [int(i) for i in x.split(' ')[:20]])
        test_labels = test_labels.loc[test_labels['type']==t]
        test_labels = test_labels.merge(sub, how='left', on=['session'])
        
        test_labels['hits'] = test_labels.apply(lambda df: len(set(df.ground_truth).intersection(set(df.labels))), axis=1)
        test_labels['gt_count'] = test_labels.ground_truth.str.len().clip(0,20)
        recall = test_labels['hits'].sum() / test_labels['gt_count'].sum()
        score += weights[t]*recall
        print(f'{t} recall =',recall)

In [9]:
train_df = pd.read_parquet(f"{data_dir}local_cv/3week_train_data.parquet")
train_df = train_df[train_df.type==0]


test_labels = pd.read_parquet("../data/local_cv/4week_train_labels.parquet")
test_labels.head()

Unnamed: 0,session,type,labels
0,0,0,"[10268, 219925, 1140855, 915702, 480578, 11458..."
1,0,1,"[974651, 974651, 974651, 280978, 1521766, 1760..."
2,0,2,"[1199474, 543308]"
3,1,0,"[1464360, 207905, 1628317, 376932, 497868]"
4,2,0,"[1593105, 427698, 414004, 465360, 526287, 5671..."


In [10]:
pred_df_clicks = train_df.sort_values(["session", "ts"]).groupby(["session"]).apply(
    lambda x: suggest_clicks(x,20)
)

del train_df, top_40_clicks, w2vec, embedding_weight, embedding_weight_neg, embedding_weigh_dict_df, index

In [11]:
train_df_click_recommended = pl.from_pandas(pd.DataFrame(pred_df_clicks, columns = ['aid']).reset_index()).explode("aid")
train_df_click_recommended = train_df_click_recommended.with_columns([
    pl.lit(0).alias('ts').cast(pl.Int32),    
    pl.lit(0).alias('type').cast(pl.Int8),
    pl.col('session').cast(pl.Int32),
    pl.col('aid').cast(pl.Int32),
])

n_col_after_join = train_df_click_recommended.groupby('session').agg([
    pl.col('aid').cumcount().alias('CG_ranking')]).select(
    pl.col('CG_ranking').explode().cast(pl.Int64))
train_df_click_recommended = pl.concat([train_df_click_recommended, n_col_after_join], how="horizontal")

train_df_click_recommended.head()


session,aid,ts,type,CG_ranking
i32,i32,i32,i8,i64
8643220,1189975,0,0,0
8643220,573273,0,0,1
8643220,1405280,0,0,2
8643220,1768884,0,0,3
8643220,226746,0,0,4


In [12]:
# calculate recall for ranking
pred_df = pd.DataFrame(pred_df_clicks.add_suffix("_clicks"), columns=["labels"]).reset_index()
clicks_pred_df = pd.DataFrame(pred_df_clicks.add_suffix("_clicks"), columns=["labels"]).reset_index()
pred_df.columns = ["session_type", "labels"]
pred_df["labels"] = pred_df["labels"].map(lambda x:x[0:20])
pred_df["labels"] = pred_df.labels.apply(lambda x: " ".join(map(str,x)))

del pred_df_clicks
pred_df.head()

Unnamed: 0,session_type,labels
0,8643220_clicks,1189975 573273 1405280 1768884 226746 406885 1...
1,8643221_clicks,111691 921137 501445 1189046 1355112 1097227 3...
2,8643222_clicks,1104129 1772730 1568399 1410436 756920 1029653...
3,8643223_clicks,1742315 1811963 206418 713187 1596300 1292042 ...
4,8643224_clicks,471589 778561 1106262 1706288 1502227 933835 2...


In [13]:
aids = test_labels.labels.explode().astype('int32').rename('item')
tar = test_labels[['session']].astype('int32').rename({'session':'user'},axis=1)
tar = tar.merge(aids, left_index=True, right_index=True, how='left')
tar['click'] = 1
tar.columns = ["session","aid","click"]
tar = pl.from_pandas(tar).with_columns([pl.col('session').cast(pl.Int32),pl.col('aid').cast(pl.Int32)])
train_df_click_recommended = train_df_click_recommended.join(tar,on=['session','aid'],how='left').with_columns(pl.col("click").fill_null(pl.lit(0),),)
del tar, aids

In [14]:
train_df_click_recommended = (train_df_click_recommended.filter(pl.col("click") == 0).sample(frac=0.5)
                                .vstack(train_df_click_recommended.filter(pl.col("click") == 1)))

In [15]:
aid_features = pl.read_parquet("../data/features/item_feat.pqt").with_columns(pl.col('aid').cast(pl.Int32))
train_df_click_recommended = train_df_click_recommended.join(aid_features, how='left',on="aid")
del aid_features

user_features = pl.read_parquet("../data/features/user_feat.pqt").with_columns(pl.col('session').cast(pl.Int32))
train_df_click_recommended = train_df_click_recommended.join(user_features,how='left', on='session')
del user_features

user_item_features = pl.read_parquet("../data/features/user_item.pqt").with_columns([pl.col('session').cast(pl.Int32),pl.col('aid').cast(pl.Int32)])
train_df_click_recommended = train_df_click_recommended.join(user_item_features,how='left', on=['session','aid'])
del user_item_features

train_df_click_recommended = train_df_click_recommended.fill_null(-1) 

In [16]:
train_df_click_recommended = train_df_click_recommended.select(pl.exclude(["__index_level_0__","__index_level_0___right","ts_right","type_right"]))

In [17]:
train_df_click_recommended.head()

session,aid,ts,type,CG_ranking,click,item_overall_count,item_unique_session,item_type_score,item_common_day,item_mean_hour,item_click_cnt,item_cart_cnt,item_order_cnt,item_click_rate,item_cart_rate,item_order_rate,user_session_length,unique_aid,user_type_score,user_hh_mean,unique_days,unique_sessions,first_day,last_day,avg_clk_hr,avg_cart_hr,avg_ord_hr,user_click_cnt,user_cart_cnt,user_order_cnt,user_click_rate,user_cart_rate,user_order_rate,user_click_rate_per_session,user_cart_rate_per_session,user_order_rate_per_session,count_prev_click_action,count_prev_cart_action,count_prev_order_action,hist_click,hist_cart,hist_order,action_num_reverse_chrono,log_recency_score,type_weighted_log_recency_score
i32,i32,i32,i8,i64,i64,i64,i64,f64,i8,f64,f64,f64,f64,f64,f64,f64,i64,i64,f64,f64,i64,f64,i8,i8,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,i64,f64,f64
8643220,1189975,0,0,0,0,8895,4953,0.071388,6,13.639236,8379.0,397.0,119.0,0.94199,0.044632,0.013378,2,2,0.0,22.0,1,1.0,6,6,22.0,-1.0,-1.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,1.0,1.0
9903629,493104,0,0,14,0,37955,15528,0.16596,6,13.9599,32679.0,4253.0,1023.0,0.860993,0.112054,0.026953,14,2,0.785714,10.357143,2,2.0,3,6,10.333333,10.4,10.333333,6.0,5.0,3.0,0.428571,0.357143,0.214286,3.0,2.5,1.5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0
10337730,131155,0,0,18,0,1991,1388,0.034656,6,14.048719,1929.0,55.0,7.0,0.96886,0.027624,0.003516,11,2,0.454545,19.636364,1,2.0,4,4,19.428571,20.0,20.0,7.0,3.0,1.0,0.636364,0.272727,0.090909,3.5,1.5,0.5,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0
8643220,1768884,0,0,3,0,13381,6534,0.105448,2,13.836559,12273.0,805.0,303.0,0.917196,0.06016,0.022644,2,2,0.0,22.0,1,1.0,6,6,22.0,-1.0,-1.0,2.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0
10207377,1183916,0,0,11,0,11285,6993,0.06708,5,13.245104,10637.0,539.0,109.0,0.942579,0.047763,0.009659,14,8,0.142857,7.571429,2,3.0,4,6,7.666667,7.0,-1.0,12.0,2.0,0.0,0.857143,0.142857,0.0,4.0,0.666667,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0


In [18]:
train_df = pd.read_parquet(f"{data_dir}local_cv/3week_train_data.parquet")
train_df = train_df[train_df.type==0]
real_action = pl.from_pandas(train_df[["session","aid"]]).with_columns([pl.lit(1).alias('real_action'),pl.col('session').cast(pl.Int32),pl.col('aid').cast(pl.Int32)])
del train_df
train_df_click_recommended = train_df_click_recommended.join(real_action,how='left', on=['session','aid']).with_columns(pl.col("real_action").fill_null(pl.lit(0),),)
del real_action

In [19]:
candidates = train_df_click_recommended.to_pandas()
del train_df_click_recommended
#candidates.loc[candidates.aid.isin(top_clicks_l3days),"is_top_clicks_3days"] = 1
#candidates.loc[candidates.aid.isin(top_clicks_l1days),"is_top_clicks_1days"] = 1
#candidates.loc[candidates.aid.isin(top_clicks),"is_top_clicks_7days"] = 1
#candidates.fillna(value = {"is_top_clicks_3days": 0, "is_top_clicks_1days": 0, "is_top_clicks_7days": 0},inplace=True)
candidates.head()

Unnamed: 0,session,aid,ts,type,CG_ranking,click,item_overall_count,item_unique_session,item_type_score,item_common_day,...,count_prev_click_action,count_prev_cart_action,count_prev_order_action,hist_click,hist_cart,hist_order,action_num_reverse_chrono,log_recency_score,type_weighted_log_recency_score,real_action
0,8643220,1189975,0,0,0,0,8895,4953,0.071388,6,...,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,0,1.0,1.0,1
1,9903629,493104,0,0,14,0,37955,15528,0.16596,6,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,0
2,10337730,131155,0,0,18,0,1991,1388,0.034656,6,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,0
3,8643220,1768884,0,0,3,0,13381,6534,0.105448,2,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,0
4,10207377,1183916,0,0,11,0,11285,6993,0.06708,5,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1,-1.0,-1.0,0


In [20]:
candidates.click = candidates.click.astype(int)
candidates["click"].value_counts()

0    37640200
1    29240130
Name: click, dtype: int64

In [21]:
test_sessions = random.choices(candidates.session.unique(), k=candidates.session.nunique()//2)
test_candidates = candidates[candidates.session.isin(test_sessions)]
train_candidates = candidates[~candidates.session.isin(test_sessions)]
print(train_candidates.shape,test_candidates.shape)

(41744249, 47) (25136081, 47)


In [22]:
del candidates

In [23]:
FEATURES = [col for col in train_candidates.columns.values.tolist() if col not in ["session","aid","click","ts","type"]]
FEATURES

['CG_ranking',
 'item_overall_count',
 'item_unique_session',
 'item_type_score',
 'item_common_day',
 'item_mean_hour',
 'item_click_cnt',
 'item_cart_cnt',
 'item_order_cnt',
 'item_click_rate',
 'item_cart_rate',
 'item_order_rate',
 'user_session_length',
 'unique_aid',
 'user_type_score',
 'user_hh_mean',
 'unique_days',
 'unique_sessions',
 'first_day',
 'last_day',
 'avg_clk_hr',
 'avg_cart_hr',
 'avg_ord_hr',
 'user_click_cnt',
 'user_cart_cnt',
 'user_order_cnt',
 'user_click_rate',
 'user_cart_rate',
 'user_order_rate',
 'user_click_rate_per_session',
 'user_cart_rate_per_session',
 'user_order_rate_per_session',
 'count_prev_click_action',
 'count_prev_cart_action',
 'count_prev_order_action',
 'hist_click',
 'hist_cart',
 'hist_order',
 'action_num_reverse_chrono',
 'log_recency_score',
 'type_weighted_log_recency_score',
 'real_action']

In [34]:
space={'max_depth': hp.quniform("max_depth", 3, 12, 1),
        'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
        'min_child_weight' : hp.quniform('min_child_weight', 1000, 100000, 1000),
        'num_boost_round': hp.quniform('num_boost_round', 50,1000,50),
        'subsample': hp.uniform('learning_rate', 0.4,1),
        'objective':hp.choice('objective',['rank:pairwise','rank:ndcg','rank:map']),
        'seed': 0}

In [None]:
X_train = train_candidates.loc[:, FEATURES]
y_train = train_candidates.loc[:, 'click']
X_valid = test_candidates.loc[:, FEATURES]
y_valid = test_candidates.loc[:, 'click']

test_labels = test_labels[test_labels['type']=="clicks"]
test_labels = test_labels[test_labels.session.isin(test_candidates["session"].unique())]


# IF YOU HAVE 50 CANDIDATE WE USE 50 BELOW
dtrain = xgb.DMatrix(X_train, y_train, group=train_candidates[["session"]].groupby("session")['session'].count().values) 
dvalid = xgb.DMatrix(X_valid, y_valid, group=test_candidates[["session"]].groupby("session")['session'].count().values) 
def objective(space):
    clf=xgb.train(params={'max_depth':int(space['max_depth']),
                    'min_child_weight':int(space['min_child_weight']), 
                   'colsample_bytree':space['colsample_bytree'], 
                   'subsample' : space['subsample'],
                    'tree_method': "auto",
                    'objective' : space['objective']},
                    num_boost_round = int(space['num_boost_round']),
                      dtrain=dtrain,verbose_eval=100
                        )
    
    preds = np.zeros(len(test_candidates))
    predictions = test_candidates[['session','aid']].copy()
    dtest = xgb.DMatrix(data=X_valid)
    predictions['pred'] = clf.predict(dvalid)

    predictions = predictions.sort_values(['session','pred'], ascending=[True,False]).reset_index(drop=True)
    predictions['n'] = predictions.groupby('aid').aid.cumcount().astype('int8')
    predictions = predictions.loc[predictions.n<20]
    sub = predictions.groupby('session').aid.apply(list)
    sub = sub.to_frame().reset_index()
    sub.aid = sub.aid.apply(lambda x: " ".join(map(str,x)))
    sub.columns = ['session_type','labels']
    
    sub.session_type = sub.session_type.astype('str')+ '_clicks'
    sub = sub.loc[sub.session_type.str.contains('clicks')].copy()
    sub['session'] = sub.session_type.apply(lambda x: int(x.split('_')[0]))
    sub.labels = sub.labels.apply(lambda x: [int(i) for i in x.split(' ')[:20]])
    verify = test_labels.merge(sub, how='left', on=['session'])
    verify.dropna(inplace=True)
    verify['hits'] = verify.apply(lambda df: len(set(df.ground_truth).intersection(set(df.labels))), axis=1)
    verify['gt_count'] = verify.ground_truth.str.len().clip(0,20)
    recall = verify['hits'].sum() / verify['gt_count'].sum()
    return {'loss': -recall, 'status': STATUS_OK }

trials = Trials()

best_hyperparams = fmin(fn = objective,
                        space = space,
                        algo = tpe.suggest,
                        max_evals = 1,
                        trials = trials)

print(best_hyperparams)