In [1]:
import re
from time import time
from collections import Counter

import tensorflow as tf
import pandas as pd
import numpy as np

from nltk.stem.porter import PorterStemmer
from fastcache import clru_cache as lru_cache

from sklearn.model_selection import ShuffleSplit
from sklearn import metrics

from tqdm import tqdm, tqdm_notebook
tqdm.pandas(tqdm_notebook())

  from ._conv import register_converters as _register_converters


A Jupyter Widget

In [2]:
t_start = time()

stemmer = PorterStemmer()

def rmse(y_true, y_pred):
    return np.sqrt(metrics.mean_squared_error(y_true, y_pred))

@lru_cache(1024)
def stem(s):
    return stemmer.stem(s)

whitespace = re.compile(r'\s+')
non_letter = re.compile(r'\W+')

def tokenize(text):
    text = text.lower()
    text = non_letter.sub(' ', text)

    tokens = []

    for t in text.split():
        #t = stem(t)
        tokens.append(t)

    return tokens

class Tokenizer:
    def __init__(self, min_df=10, tokenizer=str.split):
        self.min_df = min_df
        self.tokenizer = tokenizer
        self.doc_freq = None
        self.vocab = None
        self.vocab_idx = None
        self.max_len = None

    def fit_transform(self, texts):
        tokenized = []
        doc_freq = Counter()
        n = len(texts)

        for text in texts:
            sentence = self.tokenizer(text)
            tokenized.append(sentence)
            doc_freq.update(set(sentence))

        vocab = sorted([t for (t, c) in doc_freq.items() if c >= self.min_df])
        vocab_idx = {t: (i + 1) for (i, t) in enumerate(vocab)}
        doc_freq = [doc_freq[t] for t in vocab]

        self.doc_freq = doc_freq
        self.vocab = vocab
        self.vocab_idx = vocab_idx

        max_len = 0
        result_list = []
        for text in tokenized:
            text = self.text_to_idx(text)
            max_len = max(max_len, len(text))
            result_list.append(text)

        self.max_len = max_len
        result = np.zeros(shape=(n, max_len), dtype=np.int32)
        for i in range(n):
            text = result_list[i]
            result[i, :len(text)] = text

        return result    

    def text_to_idx(self, tokenized):
        return [self.vocab_idx[t] for t in tokenized if t in self.vocab_idx]

    def transform(self, texts):
        n = len(texts)
        result = np.zeros(shape=(n, self.max_len), dtype=np.int32)

        for i in range(n):
            text = self.tokenizer(texts[i])
            text = self.text_to_idx(text)[:self.max_len]
            result[i, :len(text)] = text

        return result
    
    def vocabulary_size(self):
        return len(self.vocab) + 1



In [3]:
%%time
print('reading train data...')
df_train = pd.read_csv('../input/train.tsv', sep='\t')
df_train = df_train[df_train.price != 0].reset_index(drop=True)

price = df_train.pop('price')
y = np.log1p(price.values).reshape(-1,1)
mean = y.mean()
std = y.std()
ynorm = (y - mean) / std
ynorm = ynorm.reshape(-1, 1)

reading train data...
CPU times: user 4.3 s, sys: 278 ms, total: 4.58 s
Wall time: 4.62 s


In [4]:
%%time
df_train.name.fillna('unkname', inplace=True)
df_train.category_name.fillna('unk_cat', inplace=True)
df_train.brand_name.fillna('unk_brand', inplace=True)
df_train.item_description.fillna('nodesc', inplace=True)

CPU times: user 300 ms, sys: 836 µs, total: 301 ms
Wall time: 299 ms


In [5]:
%%time
#impute brand names using flashtext
def get_brands(x, keyword_processor):
    matches = keyword_processor.extract_keywords(' '.join(x))
    if len(matches) > 0:
        return matches[0]
    else:
        return None
    
from flashtext import KeywordProcessor
all_brands = df_train.loc[df_train.brand_name.value_counts().values > 1, 'brand_name'].tolist()
#all_brands.remove('always')

keyword_processor = KeywordProcessor(case_sensitive=True)
keyword_processor.add_keywords_from_list(all_brands)
print(len(keyword_processor))

tmp = df_train[['name','item_description']].progress_apply(lambda x: get_brands(x, keyword_processor), axis=1).fillna("unk_brand")
missing_indices = df_train['brand_name'] == 'unk_brand'
df_train.loc[missing_indices, 'brand_name'] = tmp[missing_indices]
df_train.brand_name.fillna("unk_brand", inplace=True)

  0%|          | 597/1481661 [00:00<04:08, 5967.08it/s]

448


100%|██████████| 1481661/1481661 [01:11<00:00, 20753.17it/s]


CPU times: user 1min 11s, sys: 184 ms, total: 1min 12s
Wall time: 1min 11s


In [5]:
%%time
print('processing category...')

def paths(tokens):
    all_paths = ['/'.join(tokens[0:(i+1)]) for i in range(len(tokens))]
    return ' '.join(all_paths)

@lru_cache(1024)
def cat_process(cat):
    cat = cat.lower()
    cat = whitespace.sub('', cat)
    split = cat.split('/')
    return paths(split)

df_train.category_name = df_train.category_name.apply(cat_process)

cat_tok = Tokenizer(min_df=10)
X_cat = cat_tok.fit_transform(df_train.category_name)
cat_voc_size = cat_tok.vocabulary_size()
print(cat_voc_size)

processing category...
1129
CPU times: user 10.3 s, sys: 160 ms, total: 10.4 s
Wall time: 10.4 s


In [6]:
%%time
print('processing title...')
name_num_col = 7
name_tok = Tokenizer(min_df=5, tokenizer=tokenize)
X_name = name_tok.fit_transform(df_train.name)
X_name = X_name[:, :name_num_col]
name_voc_size = name_tok.vocabulary_size()
print(name_voc_size)

processing title...
25587
CPU times: user 15.4 s, sys: 216 ms, total: 15.6 s
Wall time: 15.6 s


In [7]:
%%time
print('processing description...')

desc_num_col = 40
desc_tok = Tokenizer(min_df=10, tokenizer=tokenize)
X_desc = desc_tok.fit_transform(df_train.item_description)
X_desc = X_desc[:, :desc_num_col]
desc_voc_size = desc_tok.vocabulary_size()
print(desc_voc_size)

processing description...
32139
CPU times: user 47.6 s, sys: 908 ms, total: 48.5 s
Wall time: 48.5 s


In [8]:
%%time
print('processing brand...')

df_train.brand_name = df_train.brand_name.str.lower()
df_train.brand_name = df_train.brand_name.str.replace(' ', '_')

brand_cnt = Counter(df_train.brand_name[df_train.brand_name != 'unk_brand'])
brands = sorted(b for (b, c) in brand_cnt.items() if c >= 2)
brands_idx = {b: (i + 1) for (i, b) in enumerate(brands)}

X_brand = df_train.brand_name.apply(lambda b: brands_idx.get(b, 0))
X_brand = X_brand.values.reshape(-1, 1) 
brand_voc_size = len(brands) + 1
print(brand_voc_size)

processing brand...
3567
CPU times: user 1.62 s, sys: 12 ms, total: 1.63 s
Wall time: 1.63 s


In [9]:
%%time
print('processing other features...')

X_item_cond = (df_train.item_condition_id - 1).astype('uint8').values.reshape(-1, 1)
X_shipping = df_train.shipping.astype('float32').values.reshape(-1, 1)

processing other features...
CPU times: user 17.2 ms, sys: 40.2 ms, total: 57.4 ms
Wall time: 91.5 ms


In [10]:
%%time
print("getting count features")

X_name_wcount = df_train.item_description.apply(lambda x: len(str(x).split()))
X_name_wcount = X_name_wcount.values.reshape(-1,1)/X_name_wcount.max()

getting count features
CPU times: user 2.45 s, sys: 40.1 ms, total: 2.49 s
Wall time: 2.49 s


In [31]:
print('defining the model...')

def prepare_batches(seq, step):
    n = len(seq)
    res = []
    for i in range(0, n, step):
        res.append(seq[i:i+step])
    return res

def conv1d(inputs, num_filters, filter_size, padding='same'):
    he_std = np.sqrt(2 / (filter_size * num_filters))
    out = tf.layers.conv1d(
        inputs=inputs, filters=num_filters, padding=padding,
        kernel_size=filter_size,
        activation=tf.nn.relu, 
        kernel_initializer=tf.random_normal_initializer(stddev=he_std))
    return out

def dense(X, size, reg=0.0, activation=None):
    he_std = np.sqrt(2 / int(X.shape[1]))
    out = tf.layers.dense(X, units=size, activation=activation, 
                     kernel_initializer=tf.random_normal_initializer(stddev=he_std),
                     #kernel_regularizer=tf.contrib.layers.l2_regularizer(reg)
                         )
    return out

def embed(inputs, size, dim):
    std = np.sqrt(2 / dim)
    emb = tf.Variable(tf.random_uniform([size, dim], -std, std))
    lookup = tf.nn.embedding_lookup(emb, inputs)
    return lookup

defining the model...


In [50]:
def train_tf(params):

    name_embed, desc_embed, brand_embed, cat_embed, dense1_dim, dense2_dim, lr1, lr2, lr3,\
            name_drop, desc_drop, dense_drop, name_filters3, name_filters2, desc_filter3, desc_filter2, batch_size = params
     
    ypreds = []
    ytrues = []
    for k, randint in enumerate(np.random.choice(1000, 4)):
        
        name_embeddings_dim = name_embed
        name_seq_len = X_name.shape[1]
        desc_embeddings_dim = desc_embed
        desc_seq_len = X_desc.shape[1]

        brand_embeddings_dim = brand_embed

        cat_embeddings_dim = cat_embed
        cat_seq_len = X_cat.shape[1]


        graph = tf.Graph()
        graph.seed = 1

        with graph.as_default():
            place_name = tf.placeholder(tf.int32, shape=(None, name_seq_len))
            place_desc = tf.placeholder(tf.int32, shape=(None, desc_seq_len))
            place_brand = tf.placeholder(tf.int32, shape=(None, 1))
            place_cat = tf.placeholder(tf.int32, shape=(None, cat_seq_len))
            place_ship = tf.placeholder(tf.float32, shape=(None, 1))
            place_cond = tf.placeholder(tf.uint8, shape=(None, 1))
            #place_wcount = tf.placeholder(tf.float32, shape=(None, 1))
            prob_name = tf.placeholder_with_default(0.0, shape=())
            prob_desc = tf.placeholder_with_default(0.0, shape=())
            prob_dense = tf.placeholder_with_default(0.0, shape=())

            place_y = tf.placeholder(dtype=tf.float32, shape=(None, 1))

            place_lr = tf.placeholder(tf.float32, shape=(), )

            name = embed(place_name, name_voc_size, name_embeddings_dim)
            desc = embed(place_desc, desc_voc_size, desc_embeddings_dim)
            brand = embed(place_brand, brand_voc_size, brand_embeddings_dim)
            cat = embed(place_cat, cat_voc_size, cat_embeddings_dim)


            name = tf.layers.average_pooling1d(name, pool_size=int(name_seq_len), strides=1, padding='valid')
            name = tf.layers.dropout(name, rate=prob_name, seed=1, training=True)
            name = tf.contrib.layers.flatten(name)
            #print(name.shape)

            desc = tf.layers.average_pooling1d(desc, pool_size=int(desc_seq_len), strides=1, padding='valid')
            desc = tf.layers.dropout(desc, rate=prob_desc, seed=1, training=True)
            desc = tf.contrib.layers.flatten(desc)
            #print(desc.shape)

            brand = tf.contrib.layers.flatten(brand)
            #print(brand.shape)

            cat = tf.layers.average_pooling1d(cat, pool_size=cat_seq_len, strides=1, padding='valid')
            cat = tf.contrib.layers.flatten(cat)
            #print(cat.shape)

            ship = place_ship
            #print(ship.shape)

            #name_wcount = place_wcount
            #print(name_wcount.shape)

            cond = tf.one_hot(place_cond, 5)
            cond = tf.contrib.layers.flatten(cond)
            #print(cond.shape)

            out = tf.concat([name, desc, brand, cat, ship, cond], axis=1)
            #print('concatenated dim:', out.shape)
            #out = tf.contrib.layers.batch_norm(out, decay=0.9)
            out = dense(out, dense1_dim, activation=tf.nn.relu)
            out = tf.contrib.nn.alpha_dropout(out, keep_prob=1 - prob_dense, seed=1)
            
            out = dense(out, dense2_dim, activation=tf.nn.relu)
            out = tf.contrib.nn.alpha_dropout(out, keep_prob=1 -prob_dense, seed=1)

            out = dense(out, 1)

            loss = tf.losses.mean_squared_error(place_y, out)
            rmse = tf.sqrt(loss)
            opt = tf.train.AdamOptimizer(learning_rate=place_lr)
            train_step = opt.minimize(loss)

            init = tf.global_variables_initializer()

        session = tf.Session(config=None, graph=graph)
        session.run(init)
        ##### print('training the model...')
        ypreds2 = []
        train_idx, val_idx= list(ShuffleSplit(1, test_size=0.05, random_state=randint).split(X_name))[0]
        lr_init=lr1
        lr_decay=0.0014
        lr = lr_init
        for i in range(3):
            t0 = time()
            np.random.seed(i)
            np.random.shuffle(train_idx)
            batches = prepare_batches(train_idx, batch_size)

            if i == 1:
                lr = lr2# 0.001
            elif i == 2:
                lr = lr3 # 0.001/0.0005
            elif i == 3:
                lr = 0.00001 # 1e-6
            #lr = lr_init - lr_decay*i
            print("current learning rate", lr)
            for j, idx in enumerate(batches):
                #if i == 0:
                #    lr = lr_init - 0.002*j/(len(batches)-1)
                feed_dict = {
                    place_name: X_name[idx],
                    place_desc: X_desc[idx],
                    place_brand: X_brand[idx],
                    place_cat: X_cat[idx],
                    place_cond: X_item_cond[idx],
                    place_ship: X_shipping[idx],
                    prob_name: name_drop,
                    prob_desc: desc_drop,
                    prob_dense: dense_drop,
                    #place_wcount: X_name_wcount[idx],
                    place_y: ynorm[idx],
                    place_lr: lr,
                }
                session.run(train_step, feed_dict=feed_dict)

            took = time() - t0
            print('Training epoch %d took %.3fs' % (i, took))
            val_batches = prepare_batches(val_idx, 5000)
            y_pred = np.zeros(len(X_name))
            for idx in val_batches:
                feed_dict = {
                    place_name: X_name[idx],
                    place_desc: X_desc[idx],
                    place_brand: X_brand[idx],
                    place_cat: X_cat[idx],
                    place_cond: X_item_cond[idx],
                    place_ship: X_shipping[idx],
                    prob_name: 0.0,
                    prob_desc: 0.0,
                    prob_dense: 0.0,
                    #place_wcount: X_name_wcount[idx],
                }
                batch_pred = session.run(out, feed_dict=feed_dict)
                y_pred[idx] = batch_pred[:, 0]
            y_pred_val = (y_pred[val_idx])*std + mean
            ypreds2.append(y_pred_val)
            y_true_val = (ynorm[val_idx][:,0])*std + mean
            score = np.sqrt(metrics.mean_squared_error(y_true_val, y_pred_val))
            print("Validation rmse for {}th set {} epoch is {}".format(k, i, score))
            
        y_pred_mean = ypreds2[0]*0.1 + ypreds2[1]*0.4 + ypreds2[2]*0.5
        score = np.sqrt(metrics.mean_squared_error(y_true_val, y_pred_mean))
        ypreds.extend(y_pred_mean)
        ytrues.extend(y_true_val)
        print("Validation rmse for {}th set is {}".format(k, score))
    overall_score = np.sqrt(metrics.mean_squared_error(ytrues, ypreds))
    print("Overall Validation rmse for {} is {}".format(params, overall_score))
    print(" ")
    print(" ")
    
    return overall_score

In [51]:
##### print('training the model...')
params = [100, 100, 40, 30, 300, 150, 0.003, 0.002, 0.001, 0.05, 0.1, 0.0, 20, 20, 20, 20, 2000]
train_tf(params)

current learning rate 0.003
Training epoch 0 took 7.201s
Validation rmse for 0th set 0 epoch is 0.43410001078931576
current learning rate 0.002
Training epoch 1 took 7.340s
Validation rmse for 0th set 1 epoch is 0.4222762194606952
current learning rate 0.001
Training epoch 2 took 7.256s
Validation rmse for 0th set 2 epoch is 0.41833763398772333
Validation rmse for 0th set is 0.41618030795377964
current learning rate 0.003
Training epoch 0 took 7.277s
Validation rmse for 1th set 0 epoch is 0.4376433553070137
current learning rate 0.002
Training epoch 1 took 7.245s
Validation rmse for 1th set 1 epoch is 0.423218415542139
current learning rate 0.001


KeyboardInterrupt: 

In [42]:
import skopt
from skopt import gp_minimize, gbrt_minimize
from skopt.space import Real, Integer, Categorical

In [53]:
space = [Integer(16, 512), #name='name_embed'),
         Integer(16, 512),# name='desc_embed'),
         Integer(16, 256),# name='brand_embed'),
         Integer(16, 256),# name='cat_embed'),
         Integer(64, 512),# name='dense1_dim'),
         Integer(32, 256),# name='dense2_dim'),
         Real(0.001, 0.01, "uniform"), #name='lr1'),
         Real(1e-5, 0.005, "uniform"), #name='lr2'),
         Real(1e-5, 1e-3, "log-uniform"), #name='lr3'),
         Real(0.001, 0.4, "uniform"), #name='name_drop'),
         Real(0.001, 0.5, "uniform"), #name='desc_drop'),
         Real(0.0, 0.05, "uniform"), #name='dense_drop'),
         Integer(10, 30), #name='name_filters3'),
         Integer(10, 30), #name='name_filters2'),
         Integer(10, 30), #name='desc_filters3'),
         Integer(10, 30), #name='desc_filters2'),
         Integer(256, 5000), #name='batch_size'),
        ]

res_gp = gbrt_minimize(train_tf, space, n_calls=100, random_state=0)

current learning rate 0.008625265649057131
Training epoch 0 took 6.590s
Validation rmse for 0th set 0 epoch is 0.4434127747140635
current learning rate 0.003121582846962003
Training epoch 1 took 6.555s
Validation rmse for 0th set 1 epoch is 0.43216031750481443
current learning rate 5.871694001325899e-05
Training epoch 2 took 6.503s
Validation rmse for 0th set 2 epoch is 0.4324635647097395
Validation rmse for 0th set is 0.4317298305791089
current learning rate 0.008625265649057131
Training epoch 0 took 6.513s
Validation rmse for 1th set 0 epoch is 0.4422179442534868
current learning rate 0.003121582846962003
Training epoch 1 took 6.627s
Validation rmse for 1th set 1 epoch is 0.43171878452698687
current learning rate 5.871694001325899e-05
Training epoch 2 took 6.513s
Validation rmse for 1th set 2 epoch is 0.43059525116511915
Validation rmse for 1th set is 0.4306897288078303
current learning rate 0.008625265649057131
Training epoch 0 took 6.589s
Validation rmse for 2th set 0 epoch is 0.44

Training epoch 1 took 10.410s
Validation rmse for 0th set 1 epoch is 0.4344109485555618
current learning rate 2.616697456085447e-05
Training epoch 2 took 10.300s
Validation rmse for 0th set 2 epoch is 0.4348493740869265
Validation rmse for 0th set is 0.4346179371480898
current learning rate 0.00989536454253304
Training epoch 0 took 10.351s
Validation rmse for 1th set 0 epoch is 0.44323249101111944
current learning rate 0.0005192036056326602
Training epoch 1 took 10.338s
Validation rmse for 1th set 1 epoch is 0.43421206111093097
current learning rate 2.616697456085447e-05
Training epoch 2 took 10.190s
Validation rmse for 1th set 2 epoch is 0.43437767809937416
Validation rmse for 1th set is 0.4343289848219536
current learning rate 0.00989536454253304
Training epoch 0 took 10.315s
Validation rmse for 2th set 0 epoch is 0.442952238128948
current learning rate 0.0005192036056326602
Training epoch 1 took 10.358s
Validation rmse for 2th set 1 epoch is 0.4345608091100434
current learning rate 

Training epoch 2 took 12.495s
Validation rmse for 0th set 2 epoch is 0.4303776423764698
Validation rmse for 0th set is 0.42838144628823477
current learning rate 0.006126565653309762
Training epoch 0 took 12.437s
Validation rmse for 1th set 0 epoch is 0.4406895568074298
current learning rate 0.003521649023656683
Training epoch 1 took 12.363s
Validation rmse for 1th set 1 epoch is 0.43386191172527044
current learning rate 3.775312222605937e-05
Training epoch 2 took 12.397s
Validation rmse for 1th set 2 epoch is 0.42982161233905014
Validation rmse for 1th set is 0.4304297895764913
current learning rate 0.006126565653309762
Training epoch 0 took 12.398s
Validation rmse for 2th set 0 epoch is 0.4442050448376003
current learning rate 0.003521649023656683
Training epoch 1 took 12.396s
Validation rmse for 2th set 1 epoch is 0.42957465836176684
current learning rate 3.775312222605937e-05
Training epoch 2 took 12.414s
Validation rmse for 2th set 2 epoch is 0.4308283078304698
Validation rmse for 

current learning rate 0.002582812330021152
Training epoch 0 took 16.846s
Validation rmse for 1th set 0 epoch is 0.43119187946914433
current learning rate 0.004586743163427907
Training epoch 1 took 16.830s
Validation rmse for 1th set 1 epoch is 0.4259714223256821
current learning rate 0.0005428040012841644
Training epoch 2 took 16.798s
Validation rmse for 1th set 2 epoch is 0.41359994122303545
Validation rmse for 1th set is 0.4146955306405214
current learning rate 0.002582812330021152
Training epoch 0 took 16.795s
Validation rmse for 2th set 0 epoch is 0.42904778369473373
current learning rate 0.004586743163427907
Training epoch 1 took 16.781s
Validation rmse for 2th set 1 epoch is 0.42051667319413844
current learning rate 0.0005428040012841644
Training epoch 2 took 16.856s
Validation rmse for 2th set 2 epoch is 0.4133855273316723
Validation rmse for 2th set is 0.4127614805689933
current learning rate 0.002582812330021152
Training epoch 0 took 16.919s
Validation rmse for 3th set 0 epoch

Training epoch 1 took 15.245s
Validation rmse for 1th set 1 epoch is 0.41960910381337874
current learning rate 0.00015210772295550437
Training epoch 2 took 15.354s
Validation rmse for 1th set 2 epoch is 0.41334481347126995
Validation rmse for 1th set is 0.41327647894107206
current learning rate 0.004423086914360238
Training epoch 0 took 15.178s
Validation rmse for 2th set 0 epoch is 0.42875607469838856
current learning rate 0.00488317667513295
Training epoch 1 took 15.125s
Validation rmse for 2th set 1 epoch is 0.4214048002008269
current learning rate 0.00015210772295550437
Training epoch 2 took 15.204s
Validation rmse for 2th set 2 epoch is 0.41343458711033154
Validation rmse for 2th set is 0.41319002022374507
current learning rate 0.004423086914360238
Training epoch 0 took 15.120s
Validation rmse for 3th set 0 epoch is 0.4313847132455034
current learning rate 0.00488317667513295
Training epoch 1 took 15.198s
Validation rmse for 3th set 1 epoch is 0.4199945276964344
current learning r

Training epoch 2 took 26.882s
Validation rmse for 1th set 2 epoch is 0.42833559476006067
Validation rmse for 1th set is 0.4277605524950522
current learning rate 0.009669660388285376
Training epoch 0 took 26.920s
Validation rmse for 2th set 0 epoch is 0.4755722034791313
current learning rate 0.0049301362623372265
Training epoch 1 took 26.861s
Validation rmse for 2th set 1 epoch is 0.4363233810090073
current learning rate 0.0009755665868588849
Training epoch 2 took 26.875s
Validation rmse for 2th set 2 epoch is 0.43874999382418856
Validation rmse for 2th set is 0.43340891120907976
current learning rate 0.009669660388285376
Training epoch 0 took 26.831s
Validation rmse for 3th set 0 epoch is 0.45850998752648003
current learning rate 0.0049301362623372265
Training epoch 1 took 26.818s
Validation rmse for 3th set 1 epoch is 0.43654168427419
current learning rate 0.0009755665868588849
Training epoch 2 took 26.872s
Validation rmse for 3th set 2 epoch is 0.45456979897398975
Validation rmse for

current learning rate 0.0073578527779913855
Training epoch 0 took 14.754s
Validation rmse for 2th set 0 epoch is 0.4314644732214662
current learning rate 0.003448860128842222
Training epoch 1 took 14.559s
Validation rmse for 2th set 1 epoch is 0.41802940918008746
current learning rate 0.0008629621115734557
Training epoch 2 took 14.621s
Validation rmse for 2th set 2 epoch is 0.4146253123260138
Validation rmse for 2th set is 0.41278400481071437
current learning rate 0.0073578527779913855
Training epoch 0 took 14.659s
Validation rmse for 3th set 0 epoch is 0.4353664286637776
current learning rate 0.003448860128842222
Training epoch 1 took 14.594s
Validation rmse for 3th set 1 epoch is 0.4214976436071822
current learning rate 0.0008629621115734557
Training epoch 2 took 14.575s
Validation rmse for 3th set 2 epoch is 0.4174780693143234
Validation rmse for 3th set is 0.41603186752962196
Overall Validation rmse for [424, 364, 142, 203, 468, 113, 0.0073578527779913855, 0.003448860128842222, 0.0

Training epoch 1 took 14.700s
Validation rmse for 2th set 1 epoch is 0.41488923674927847
current learning rate 0.0006309101781639358
Training epoch 2 took 14.650s
Validation rmse for 2th set 2 epoch is 0.4110028003582719
Validation rmse for 2th set is 0.40938310958119206
current learning rate 0.0029791668111021153
Training epoch 0 took 14.704s
Validation rmse for 3th set 0 epoch is 0.4281008703418083
current learning rate 0.0020955705392677877
Training epoch 1 took 14.611s
Validation rmse for 3th set 1 epoch is 0.4177831398073784
current learning rate 0.0006309101781639358
Training epoch 2 took 14.648s
Validation rmse for 3th set 2 epoch is 0.41398589135762226
Validation rmse for 3th set is 0.41200157114377217
Overall Validation rmse for [347, 291, 19, 141, 438, 185, 0.0029791668111021153, 0.0020955705392677877, 0.0006309101781639358, 0.08444600645130546, 0.0721869479739259, 0.0006316700578813118, 29, 10, 16, 12, 1285] is 0.41079356631080555
 
 
current learning rate 0.0043176026707169

Training epoch 2 took 15.677s
Validation rmse for 2th set 2 epoch is 0.41347354972712236
Validation rmse for 2th set is 0.41281006602267306
current learning rate 0.005009812875283353
Training epoch 0 took 15.829s
Validation rmse for 3th set 0 epoch is 0.4347231004741818
current learning rate 0.0036556031490228053
Training epoch 1 took 15.689s
Validation rmse for 3th set 1 epoch is 0.4194635717509161
current learning rate 0.00036004634585808386
Training epoch 2 took 15.704s
Validation rmse for 3th set 2 epoch is 0.4150493039155896
Validation rmse for 3th set is 0.4141633368385039
Overall Validation rmse for [189, 428, 159, 198, 480, 151, 0.005009812875283353, 0.0036556031490228053, 0.00036004634585808386, 0.14207482105785874, 0.030756369862898265, 0.00122380465195045, 30, 14, 14, 16, 1295] is 0.41393186377373764
 
 
current learning rate 0.003762050947690504
Training epoch 0 took 6.204s
Validation rmse for 0th set 0 epoch is 0.4346123314828307
current learning rate 0.0019695936328546857

current learning rate 0.006946427101413927
Training epoch 0 took 13.243s
Validation rmse for 3th set 0 epoch is 0.4319527898952067
current learning rate 0.0037925756104967958
Training epoch 1 took 13.163s
Validation rmse for 3th set 1 epoch is 0.4193853679822672
current learning rate 1.4619119710422602e-05
Training epoch 2 took 13.189s
Validation rmse for 3th set 2 epoch is 0.41787709372520004
Validation rmse for 3th set is 0.4172333616785616
Overall Validation rmse for [432, 345, 237, 78, 225, 56, 0.006946427101413927, 0.0037925756104967958, 1.4619119710422602e-05, 0.1737886484417505, 0.011477538734049691, 0.0007394566252548197, 29, 15, 15, 29, 1987] is 0.416271959788273
 
 
current learning rate 0.001034669088742206
Training epoch 0 took 14.615s
Validation rmse for 0th set 0 epoch is 0.45453964204131714
current learning rate 0.004872853789350501
Training epoch 1 took 14.537s
Validation rmse for 0th set 1 epoch is 0.4431310132176944
current learning rate 9.965768050977438e-05
Training

Training epoch 1 took 14.788s
Validation rmse for 3th set 1 epoch is 0.42559242641859696
current learning rate 0.0004908089208262501
Training epoch 2 took 14.800s
Validation rmse for 3th set 2 epoch is 0.4210535676308624
Validation rmse for 3th set is 0.422144755739978
Overall Validation rmse for [306, 443, 153, 86, 474, 176, 0.0029246020022204197, 9.332837424017402e-05, 0.0004908089208262501, 0.009138541767808449, 0.45760132044257706, 0.0012466247634850271, 21, 12, 17, 25, 1806] is 0.42098193266357137
 
 
current learning rate 0.005344167928150011
Training epoch 0 took 8.945s
Validation rmse for 0th set 0 epoch is 0.4381748688423946
current learning rate 0.0028481305336178213
Training epoch 1 took 8.876s
Validation rmse for 0th set 1 epoch is 0.4262035921840755
current learning rate 1.7784746067426728e-05
Training epoch 2 took 9.080s
Validation rmse for 0th set 2 epoch is 0.4252061121944524
Validation rmse for 0th set is 0.4255101586857251
current learning rate 0.005344167928150011
Tr

Training epoch 2 took 32.243s
Validation rmse for 3th set 2 epoch is 0.4154955765293451
Validation rmse for 3th set is 0.4136861771137146
Overall Validation rmse for [215, 268, 172, 17, 251, 246, 0.0034645996253242542, 0.002967759775499689, 0.0005524896190267775, 0.2209409030480764, 0.29145797592055195, 0.0011499201495264347, 21, 15, 14, 21, 281] is 0.41328831362169594
 
 
current learning rate 0.008428243177205532
Training epoch 0 took 6.052s
Validation rmse for 0th set 0 epoch is 0.4468752376028537
current learning rate 0.0007565392411993496
Training epoch 1 took 5.926s
Validation rmse for 0th set 1 epoch is 0.44987241606113937
current learning rate 0.0003428925087589326
Training epoch 2 took 6.155s
Validation rmse for 0th set 2 epoch is 0.4506730102181865
Validation rmse for 0th set is 0.44896439032945884
current learning rate 0.008428243177205532
Training epoch 0 took 6.035s
Validation rmse for 1th set 0 epoch is 0.451852075214679
current learning rate 0.0007565392411993496
Trainin

current learning rate 0.006123224425459065
Training epoch 0 took 8.140s
Validation rmse for 0th set 0 epoch is 0.44946888945125063
current learning rate 0.00028525893484901296
Training epoch 1 took 8.221s
Validation rmse for 0th set 1 epoch is 0.4523234492006765
current learning rate 0.00040103256681444125
Training epoch 2 took 8.120s
Validation rmse for 0th set 2 epoch is 0.44853432412507405
Validation rmse for 0th set is 0.44907834979244465
current learning rate 0.006123224425459065
Training epoch 0 took 8.085s
Validation rmse for 1th set 0 epoch is 0.44878285689329916
current learning rate 0.00028525893484901296
Training epoch 1 took 7.965s
Validation rmse for 1th set 1 epoch is 0.45257441537149656
current learning rate 0.00040103256681444125
Training epoch 2 took 8.165s
Validation rmse for 1th set 2 epoch is 0.44559175430802744
Validation rmse for 1th set is 0.44655501539681913
current learning rate 0.006123224425459065
Training epoch 0 took 7.987s
Validation rmse for 2th set 0 epo

Training epoch 1 took 12.129s
Validation rmse for 0th set 1 epoch is 0.45952021939842225
current learning rate 0.00012123831324688619
Training epoch 2 took 12.088s
Validation rmse for 0th set 2 epoch is 0.4588198824544967
Validation rmse for 0th set is 0.45917778336301
current learning rate 0.003221293429526397
Training epoch 0 took 12.127s
Validation rmse for 1th set 0 epoch is 0.47262611163679746
current learning rate 8.412011859055949e-05
Training epoch 1 took 12.099s
Validation rmse for 1th set 1 epoch is 0.46121527690162756
current learning rate 0.00012123831324688619
Training epoch 2 took 12.092s
Validation rmse for 1th set 2 epoch is 0.4601445141948787
Validation rmse for 1th set is 0.4613839246124962
current learning rate 0.003221293429526397
Training epoch 0 took 12.154s
Validation rmse for 2th set 0 epoch is 0.4590862833964948
current learning rate 8.412011859055949e-05
Training epoch 1 took 12.080s
Validation rmse for 2th set 1 epoch is 0.46047330737737285
current learning r

Training epoch 2 took 8.833s
Validation rmse for 0th set 2 epoch is 0.42436883399652814
Validation rmse for 0th set is 0.42521577658493953
current learning rate 0.002408066493231404
Training epoch 0 took 8.892s
Validation rmse for 1th set 0 epoch is 0.43574358268588786
current learning rate 0.00033561115255416167
Training epoch 1 took 8.857s
Validation rmse for 1th set 1 epoch is 0.4269974753240342
current learning rate 0.00029617271388229744
Training epoch 2 took 8.829s
Validation rmse for 1th set 2 epoch is 0.4242550408256224
Validation rmse for 1th set is 0.42533529538680986
current learning rate 0.002408066493231404
Training epoch 0 took 8.879s
Validation rmse for 2th set 0 epoch is 0.436357928713005
current learning rate 0.00033561115255416167
Training epoch 1 took 8.885s
Validation rmse for 2th set 1 epoch is 0.42822275095887685
current learning rate 0.00029617271388229744
Training epoch 2 took 8.867s
Validation rmse for 2th set 2 epoch is 0.4248474778404112
Validation rmse for 2

current learning rate 0.0034940806812251454
Training epoch 0 took 38.659s
Validation rmse for 1th set 0 epoch is 0.4683430953287976
current learning rate 0.00488628342997619
Training epoch 1 took 38.711s
Validation rmse for 1th set 1 epoch is 0.4294447423163703
current learning rate 0.00018175936518614953
Training epoch 2 took 38.578s
Validation rmse for 1th set 2 epoch is 0.43141181499935843
Validation rmse for 1th set is 0.4258467815131189
current learning rate 0.0034940806812251454
Training epoch 0 took 38.689s
Validation rmse for 2th set 0 epoch is 0.4532268484948114
current learning rate 0.00488628342997619
Training epoch 1 took 38.704s
Validation rmse for 2th set 1 epoch is 0.44206218431018157
current learning rate 0.00018175936518614953
Training epoch 2 took 38.674s
Validation rmse for 2th set 2 epoch is 0.43402772438115295
Validation rmse for 2th set is 0.431376118012089
current learning rate 0.0034940806812251454
Training epoch 0 took 38.684s
Validation rmse for 3th set 0 epoc

Training epoch 1 took 30.966s
Validation rmse for 1th set 1 epoch is 0.4148822064287017
current learning rate 4.778300593129668e-05
Training epoch 2 took 30.935s
Validation rmse for 1th set 2 epoch is 0.4120970360753745
Validation rmse for 1th set is 0.41235407294251053
current learning rate 0.0028823889977636873
Training epoch 0 took 30.945s
Validation rmse for 2th set 0 epoch is 0.4289286749651142
current learning rate 0.0007076168437656144
Training epoch 1 took 30.878s
Validation rmse for 2th set 1 epoch is 0.4137309098671364
current learning rate 4.778300593129668e-05
Training epoch 2 took 30.872s
Validation rmse for 2th set 2 epoch is 0.41219290899189864
Validation rmse for 2th set is 0.4121741150746766
current learning rate 0.0028823889977636873
Training epoch 0 took 30.960s
Validation rmse for 3th set 0 epoch is 0.43149004252980955
current learning rate 0.0007076168437656144
Training epoch 1 took 30.997s
Validation rmse for 3th set 1 epoch is 0.414744319287885
current learning r

Training epoch 2 took 21.567s
Validation rmse for 1th set 2 epoch is 0.4146811274415091
Validation rmse for 1th set is 0.41437852564437644
current learning rate 0.006102474847977808
Training epoch 0 took 21.544s
Validation rmse for 2th set 0 epoch is 0.4329155762273314
current learning rate 0.00402364083528095
Training epoch 1 took 21.505s
Validation rmse for 2th set 1 epoch is 0.4184184716337066
current learning rate 5.925304949108791e-05
Training epoch 2 took 21.546s
Validation rmse for 2th set 2 epoch is 0.4148648831406811
Validation rmse for 2th set is 0.41409879553641116
current learning rate 0.006102474847977808
Training epoch 0 took 21.551s
Validation rmse for 3th set 0 epoch is 0.4334536980601322
current learning rate 0.00402364083528095
Training epoch 1 took 21.533s
Validation rmse for 3th set 1 epoch is 0.41906846772862166
current learning rate 5.925304949108791e-05
Training epoch 2 took 21.665s
Validation rmse for 3th set 2 epoch is 0.4164523348493628
Validation rmse for 3th

current learning rate 0.0013371809146089553
Training epoch 0 took 12.964s
Validation rmse for 2th set 0 epoch is 0.46261459203453215
current learning rate 0.0003132219547326967
Training epoch 1 took 12.903s
Validation rmse for 2th set 1 epoch is 0.46680891696900567
current learning rate 1.2619612799946628e-05
Training epoch 2 took 12.783s
Validation rmse for 2th set 2 epoch is 0.4624468030122216
Validation rmse for 2th set is 0.46340417354538777
current learning rate 0.0013371809146089553
Training epoch 0 took 12.917s
Validation rmse for 3th set 0 epoch is 0.46517286448355716
current learning rate 0.0003132219547326967
Training epoch 1 took 12.894s
Validation rmse for 3th set 1 epoch is 0.46153054764904194
current learning rate 1.2619612799946628e-05
Training epoch 2 took 12.853s
Validation rmse for 3th set 2 epoch is 0.4620748415232332
Validation rmse for 3th set is 0.4615855837600605
Overall Validation rmse for [96, 497, 248, 242, 405, 54, 0.0013371809146089553, 0.0003132219547326967

Training epoch 1 took 11.725s
Validation rmse for 2th set 1 epoch is 0.4183516363430869
current learning rate 6.681621553406312e-05
Training epoch 2 took 11.909s
Validation rmse for 2th set 2 epoch is 0.41514244398749484
Validation rmse for 2th set is 0.4146605456034005
current learning rate 0.005545963190377054
Training epoch 0 took 11.909s
Validation rmse for 3th set 0 epoch is 0.43313363698134116
current learning rate 0.004085552379505424
Training epoch 1 took 11.934s
Validation rmse for 3th set 1 epoch is 0.4190312867384121
current learning rate 6.681621553406312e-05
Training epoch 2 took 11.807s
Validation rmse for 3th set 2 epoch is 0.4159607469220729
Validation rmse for 3th set is 0.41566646780248745
Overall Validation rmse for [311, 163, 27, 208, 352, 75, 0.005545963190377054, 0.004085552379505424, 6.681621553406312e-05, 0.15049153975265586, 0.3262197108981102, 0.0005400283687417074, 21, 18, 14, 18, 1232] is 0.41495978099673925
 
 
current learning rate 0.0016148451230602093
Tr

Training epoch 2 took 11.638s
Validation rmse for 2th set 2 epoch is 0.46886633660745447
Validation rmse for 2th set is 0.4695448352907005
current learning rate 0.0012085539630746678
Training epoch 0 took 11.679s
Validation rmse for 3th set 0 epoch is 0.4748444268230752
current learning rate 0.00012870269978429455
Training epoch 1 took 11.661s
Validation rmse for 3th set 1 epoch is 0.46757281072914647
current learning rate 2.6136885377824263e-05
Training epoch 2 took 11.587s
Validation rmse for 3th set 2 epoch is 0.46775919204262323
Validation rmse for 3th set is 0.46796343680635977
Overall Validation rmse for [416, 478, 39, 198, 175, 137, 0.0012085539630746678, 0.00012870269978429455, 2.6136885377824263e-05, 0.09461719637097857, 0.023334240915448005, 0.048224238700308876, 21, 27, 30, 19, 4240] is 0.4688246990826592
 
 
current learning rate 0.003074905326529948
Training epoch 0 took 8.034s
Validation rmse for 0th set 0 epoch is 0.4350497486101271
current learning rate 0.00079864026378

current learning rate 0.0050895838234160376
Training epoch 0 took 13.598s
Validation rmse for 3th set 0 epoch is 0.4299816890305834
current learning rate 0.0031092032400196687
Training epoch 1 took 13.553s
Validation rmse for 3th set 1 epoch is 0.41789669834574894
current learning rate 0.0003110240352690577
Training epoch 2 took 13.400s
Validation rmse for 3th set 2 epoch is 0.4150924872386526
Validation rmse for 3th set is 0.4135748770364383
Overall Validation rmse for [162, 343, 21, 37, 436, 118, 0.0050895838234160376, 0.0031092032400196687, 0.0003110240352690577, 0.025660420073919995, 0.13842030152329515, 0.0006953976408071051, 18, 24, 13, 28, 1312] is 0.4132895840295706
 
 
current learning rate 0.003583987178289598
Training epoch 0 took 7.941s
Validation rmse for 0th set 0 epoch is 0.43304311781660665
current learning rate 0.0040657622787365
Training epoch 1 took 7.752s
Validation rmse for 0th set 1 epoch is 0.4216092993584168
current learning rate 0.0001396418172553854
Training e

Training epoch 1 took 6.566s
Validation rmse for 3th set 1 epoch is 0.43805102531399887
current learning rate 7.385825868704096e-05
Training epoch 2 took 6.560s
Validation rmse for 3th set 2 epoch is 0.4359684938886514
Validation rmse for 3th set is 0.43704734194603895
Overall Validation rmse for [255, 70, 234, 170, 460, 234, 0.001059753590486383, 0.00025251901896782393, 7.385825868704096e-05, 0.2953066873983904, 0.0010503063647482136, 0.0005017440656326867, 26, 24, 20, 21, 4003] is 0.4370525428079088
 
 
current learning rate 0.00103842702786734
Training epoch 0 took 13.126s
Validation rmse for 0th set 0 epoch is 0.44357091324544806
current learning rate 0.0005683808869267143
Training epoch 1 took 13.216s
Validation rmse for 0th set 1 epoch is 0.43360161737581115
current learning rate 4.16843340301095e-05
Training epoch 2 took 13.096s
Validation rmse for 0th set 2 epoch is 0.432033252914606
Validation rmse for 0th set is 0.43246846130713995
current learning rate 0.00103842702786734
Tr

In [157]:
#0.413 -- [244, 245, 20, 30, 379, 111, 0.007, 0.004889124444379768, 0.0008893344484252365, 0.20170778812902923, 0.4322101218941912, 0.00040881437786467474, 22, 14, 29, 15, 700]
#0.4125 - [335, 368, 30, 207, 501, 129, 0.0028, 0.0007076168437656144, 4.778300593129668e-05, 0.32388425803846765, 0.09060816640189791, 0.0006894953854488185, 27, 26, 18, 27, 396]
#0.4128 - [230, 478, 19, 50, 350, 156, 0.0088, 0.00451485158454785, 0.0003698907526454667, 0.00413597314173117, 0.06832928809863183, 0.0006335617241631399, 10, 30, 15, 30, 797]
#0.4129 - [249, 357, 48, 209, 331, 252, 0.0029, 0.0015495505982415813, 0.00018267824518215857, 0.3003165566092163, 0.23796727279121543, 0.00048365710299387864, 28, 30, 11, 18, 962]
#0.4131 - [422, 322, 252, 115, 425, 190, 0.0037, 0.001963948456452917, 3.8822104969446264e-05, 0.009805955801694414, 0.03824395410569333, 0.0007812555233795872, 10, 14, 14, 27, 768]
#0.4131 - [312, 412, 195, 82, 299, 193, 0.0050, 0.003602751160806771, 8.203542652197838e-05, 0.0031397184953593266, 0.057066240447346026, 0.00012511895820388477, 22, 19, 15, 29, 569]
#0.4132 - [215, 268, 172, 17, 251, 246, 0.0034, 0.002967759775499689, 0.0005524896190267775, 0.2209409030480764, 0.29145797592055195, 0.0011499201495264347, 21, 15, 14, 21, 281]
#0.4124 - [133, 197, 16, 36, 452, 97, 0.0043, 0.0025318236697005393, 4.498909382519441e-05, 0.15558319249405014, 0.0544129578491453, 4.571395252885303e-05, 25, 15, 18, 26, 439]
#0.4108 - [347, 291, 19, 141, 438, 185, 0.0030, 0.0020955705392677877, 0.0006309101781639358, 0.08444600645130546, 0.0721869479739259, 0.0006316700578813118, 29, 10, 16, 12, 1285]
#0.4123 - [288, 290, 220, 182, 458, 143, 0.0016, 0.0004175825175008739, 3.430748111600067e-05, 0.0033717035103933055, 0.031262832742402324, 0.00026036277618005646, 19, 13, 10, 16, 329]
#0.4108 - [132, 407, 94, 249, 437, 156, 0.0040, 0.0013121365325581096, 0.0004805886736155402, 0.005072081019905154, 0.014233366406514063, 0.0001552363639330923, 29, 14, 18, 15, 579]
#0.4131 - [433, 380, 179, 73, 508, 193, 0.0043, 0.004799784588296129, 0.0008674740857502088, 0.1639507990732214, 0.05043616270685462, 0.0012438307389190054, 27, 21, 12, 30, 2071]


0.4071720483590182

In [204]:
np.sqrt(metrics.mean_squared_error(y_true_val[np.abs(y_true_val- y_pred_val) <= 1.5], y_pred_val[np.abs(y_true_val- y_pred_val) <= 1.5]))

0.39042055713946605

In [205]:
df_train.iloc[val_idx].loc[np.abs(y_true_val- y_pred_val) > 1.5].category_name.value_counts()

women women/jewelry women/jewelry/bracelets                                                                  6
other other/other other/other/other                                                                          5
women women/athleticapparel women/athleticapparel/pants,tights,leggings                                      5
women women/women'shandbags women/women'shandbags/shoulderbag                                                4
vintage&collectibles vintage&collectibles/tradingcards vintage&collectibles/tradingcards/animation           4
beauty beauty/makeup beauty/makeup/nails                                                                     3
women women/other women/other/other                                                                          3
vintage&collectibles vintage&collectibles/tradingcards vintage&collectibles/tradingcards/sports              3
kids kids/toys kids/toys/games                                                                               3
w

In [206]:
tmp = df_train.iloc[val_idx].loc[np.abs(y_true_val- y_pred_val) > 1.5]
tmp['price'] = np.expm1(y_true_val[np.abs(y_true_val- y_pred_val) > 1.5])
tmp['predicted'] = np.expm1(y_pred_val[np.abs(y_true_val- y_pred_val) > 1.5])

In [207]:
tmp.loc[tmp.category_name.str.contains('bracelets')]

Unnamed: 0,train_id,name,item_condition_id,category_name,brand_name,shipping,item_description,price,predicted
694691,695063,100% authentic bizentine design 14k gold,3,women women/jewelry women/jewelry/bracelets,unk_brand,1,No description yet,300.0,62.547579
996274,996837,Breast Cancer Awareness Charm,2,women women/jewelry women/jewelry/bracelets,unk_brand,0,No description yet,50.0,8.626883
326686,326840,"James Avery 925 18 8"" Charm bracelet",3,women women/jewelry women/jewelry/bracelets,unk_brand,0,"Guaranteed authentic, priced to sell. No offer...",506.0,95.802142
1458255,1459112,AUTHENTIC DY rope bracelet,3,women women/jewelry women/jewelry/bracelets,david_yurman,1,"Black rope bracelet for smaller wrist 6.5"".",40.0,273.346336
917666,918175,Bundle of 3 bracelets,2,women women/jewelry women/jewelry/bracelets,unk_brand,0,3 bracelets for Lorena,64.0,10.449836
1365943,1366738,Keep collective charms,2,women women/jewelry women/jewelry/bracelets,unk_brand,1,"Note: lot is as pictured, minus one purple/pin...",100.0,15.740973


In [None]:
#playstation vr -- as a single word
# category games - controller and adapters
# integers before 