In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline 
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz
import matplotlib.pyplot as plt

import lightgbm as lgbm
import gc
import xgboost as xgb
import pickle as pickle


from catboost import CatBoostRegressor


In [24]:
items           = pd.read_csv('items.csv',usecols=["item_id", "item_category_id"])
item_categories = pd.read_csv('item_categories.csv')
shops           = pd.read_csv('shops.csv')
sales_train     = pd.read_csv('sales_train.csv.gz')
test            = pd.read_csv('test.csv.gz')

In [4]:
items['item_id'] = pd.to_numeric(items['item_id'],downcast='unsigned')
items['item_category_id'] = pd.to_numeric(items['item_category_id'],downcast='unsigned')

In [5]:
sales_train['date'] = sales_train['date'].astype('category')
sales_train['date_block_num'] = pd.to_numeric(sales_train['date_block_num'],downcast='unsigned')
sales_train['shop_id'] = pd.to_numeric(sales_train['shop_id'],downcast='unsigned')
sales_train['item_price'] = sales_train['item_price'].astype('int')
sales_train['item_price'] = pd.to_numeric(sales_train['item_price'],downcast='unsigned')
sales_train['item_cnt_day'] = pd.to_numeric(sales_train['item_cnt_day'],downcast='signed')


In [6]:
transactions = sales_train
transactions[['day','month', 'year']] = transactions['date'].str.split('.', expand=True).astype(int)
transactions['day'] = pd.to_numeric(transactions['day'],downcast='unsigned')
transactions['month'] = pd.to_numeric(transactions['month'],downcast='unsigned')
transactions['year'] = pd.to_numeric(transactions['year'],downcast='unsigned')


In [7]:
transactions = transactions.set_index('item_id').join(items.set_index('item_id'))
transactions.reset_index(inplace=True)
transactions = transactions[transactions['year'] != 2013]
transactions['y'] = pd.to_numeric(transactions.groupby(['date_block_num', 'item_id', 'shop_id'])['item_cnt_day'].transform('sum')\
                .clip(0,20), downcast='unsigned')

In [8]:
len(transactions)

1668287

In [9]:
transactions['turnover'] = transactions['item_price'] * transactions['item_cnt_day']
transactions['turnover'] = pd.to_numeric(transactions['turnover'], downcast='unsigned')

In [10]:
transactions['item_first_block'] = pd.to_numeric(transactions.groupby('item_id')['date_block_num'].transform(np.min), downcast='unsigned')
transactions['item_last_block'] = pd.to_numeric(transactions.groupby('item_id')['date_block_num'].transform(np.max), downcast='unsigned')

transactions['is_first_two_blocks'] = \
                    transactions['date_block_num'].isin([transactions['item_first_block']+1,transactions['item_first_block']+2])


transactions['is_last_two_blocks'] = \
                transactions['date_block_num'].isin([transactions['item_last_block']-1,transactions['item_last_block']])
                                

In [11]:
number_of_items = transactions['item_id'].nunique()
print("number_of_items:", number_of_items)
number_of_categories = transactions['item_category_id'].nunique()
print("number_of_categories:", number_of_categories)
number_of_shops = transactions['shop_id'].nunique()
print("number_of_shops:", number_of_shops)
number_of_days = 365 + 365 - 30 - 31
print("number_of_days:", number_of_days)
number_of_blocks = transactions['date_block_num'].nunique()
print("number_of_blocks:", number_of_blocks)
total_sales = transactions['item_cnt_day'].sum()
print("total_sales:", total_sales)
total_turnover = transactions['turnover'].sum()
print("total_turnover:", total_turnover)
average_price = transactions['item_price'].mean()
print("average_price:", average_price)

number_of_items: 17054
number_of_categories: 79
number_of_shops: 55
number_of_days: 669
number_of_blocks: 22
total_sales: 2085473
total_turnover: 2181307117
average_price: 1015.4701882829513


#ITEM

-UNITS
item_units
item_block_units
item_mean_units_block
item_day_units
item_mean_units_day
item_max_units_block
item_min_units_block
item_max_units_day
item_min_units_day

-TURNOVER
item_turnover
item_block_turnover
item_mean_turnover_block
item_day_turnover
item_mean_turnover_day
item_max_turnover_block
item_min_turnover_block
item_max_turnover_day
item_min_turnover_day


-TIME
item_days_of_activity
item_blocks_of_activity
item_mean_day_between_activity
item_longest_stretch_days_without_activity
item_longest_stretch_blocks_without_activity
item_longest_stretch_block_with_activity
item_number_of_consecutive_days_with_activity
item_days_between_start_and_first_activity
item_blocks_between_start_and_first_activity
item_first_block
item_last_block
item_first_day
item_last_day
item_activity_on_all_blocks


-PRICE
item_mean_price
item_mean_price_block
item_min_price
item_max_price
item_number_different_prices
item_price_amplitude (%age min/max)
item_deviation_mean_category_price


-TREND
is_first_two_full_blocks (actually second/third to make sure we have a "full" block if this was a new release !!!!
is_last_two_blocks
item_first_two_blocks_units
item_last_two_blocks_units
item_fluctuation_units_first_last_blocks
item_first_two_blocks_mean_price
item_last_two_blocks_mean_price
item_fluctuation_price_first_last_blocks

-ENCODINGS
item_share_of_total_units
item_share_of_total_gross
item_share_of_category_units
item_share_of_category_turnover

In [15]:
gc.collect()
transactions_items = transactions.copy()
transactions_items_blocks = transactions.copy()

In [16]:
transactions_items_blocks['item_block_units'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')    
transactions_items_blocks['item_block_turnover'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')    
transactions_items_blocks['item_mean_price_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float')    

In [17]:
transactions_items['item_units'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.mean), downcast='float') 
transactions_items['item_day_units'] = pd.to_numeric(transactions_items.groupby(['item_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.mean), downcast='float') 
transactions_items['item_max_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.min), downcast='unsigned') 

In [18]:
transactions_items['item_turnover'] = pd.to_numeric(transactions_items.groupby(['item_id'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.mean), downcast='float') 
transactions_items['item_day_turnover'] = pd.to_numeric(transactions_items.groupby(['item_id','date'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['turnover'].transform(np.mean), downcast='float') 
transactions_items['item_max_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_turnover'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_turnover'].transform(np.min), downcast='unsigned') 

In [19]:
transactions_items['item_days_of_activity'] = pd.to_numeric(transactions_items.groupby(['item_id'])['date'].transform("nunique"), downcast='unsigned') 
transactions_items['item_blocks_of_activity'] = pd.to_numeric(transactions_items.groupby(['item_id'])['date_block_num'].transform("nunique"), downcast='unsigned') 

def get_number_of_days_since_start(day,month, year):
    days = 0
    if year == 2015:
        days = 365
    def is_even(num):
        return num % 2 == 0
    half_of_month = int(month/2)
    even = (30*half_of_month) + (31*half_of_month)
    if is_even(month):
        days = days + even - 30 - day
    else:
        days = days + even + day
    return days

transactions_items['item_days_since_start'] = pd.to_numeric(transactions_items.apply(lambda row: get_number_of_days_since_start(row['day'],row['month'], row['year']),axis=1), downcast='unsigned') 

def get_average_days_between_sales(days):
    days = sorted(np.unique(days))
    if len(days) == 0:
        return 9999
    if len(days) == 1:
        return 999
    return np.mean(np.ediff1d(days)) / len(days)

average_days_between_sales = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_average_days_between_sales(x))

transactions_items['item_mean_day_between_activity'] = pd.to_numeric(transactions_items['item_id'].map(average_days_between_sales), downcast='unsigned') 


def get_max_stretch_without_sales_days(days):
    days = np.unique(days)
    max_stretch = 0
    len_days = len(days)
    for index,day in enumerate(sorted(days)):
        if index == len_days - 1:
            return max_stretch
        next_day = days[index+1]
        stretch = next_day - day
        if stretch > max_stretch:
            max_stretch = stretch
            

        
max_stretch_without_sales_day = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_max_stretch_without_sales_days(x))

transactions_items['item_longest_stretch_days_without_activity'] = pd.to_numeric(transactions_items['item_id'].map(max_stretch_without_sales_day), downcast='unsigned') 

In [20]:
gc.collect()

def get_max_stretch_without_sales_block(blocks):
    blocks = np.unique(blocks)
    max_stretch = 0
    len_blocks = len(blocks)
    for index,block in enumerate(sorted(blocks)):
        if index == len_blocks - 1:
            return max_stretch
        next_block = blocks[index+1]
        stretch = next_block - block
        if stretch > max_stretch:
            max_stretch = stretch
            

        
item_longest_stretch_blocks_without_activity = transactions_items.groupby(['item_id'])['date_block_num']\
                                    .apply(list).apply(lambda x: get_max_stretch_without_sales_block(x))

transactions_items['item_longest_stretch_blocks_without_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_longest_stretch_blocks_without_activity), downcast='unsigned') 



def get_longest_stretch(following_pairs, n=1,new_n=1):
    #print("following_pairs", following_pairs, " n: ", n, " new_n: ", new_n)
    len_pairs = len(following_pairs)
    if len_pairs == 0:
        return 0
    if len_pairs == 1:
        if new_n > n:
            return new_n
        return n
    if following_pairs[1][0] == following_pairs[0][1]:
        new_n+=1
    else:
        if new_n > n:
            n=new_n
        new_n=1
    return get_longest_stretch(following_pairs[1:], n,new_n)


assert(get_longest_stretch([]) == 0)
assert(get_longest_stretch([[1, 2], [2,3], [3, 4], [4,5] ,[8,9], [11, 12]]) == 4)
assert(get_longest_stretch([[-1, 0],[1, 2], [2,3], [3, 4], [4,5] ,[8,9], [11, 12]]) == 4)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[20, 21], [25,26]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[14, 15], [15,16]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[14, 15], [15,16],[18,19] ,[22,23], [23,24],[24, 25]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5], [7, 8]]) == 1)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [15, 16]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [15, 16], [16, 17]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [14, 15], [15, 16], [16, 17]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [20, 21], [21, 22], [22,23],[23,24]]) == 4)


def get_following_pairs(pairs):
    pairs = np.unique(pairs)
    len_pairs = len(pairs)
    following = []
    for index,pair in enumerate(sorted(pairs)):
        if index == len_pairs - 1:
            return following
        next_pair = pairs[index+1]
        if next_pair == pair + 1:
            following.append([pair, next_pair])
        
assert(get_following_pairs([1,2,5,6,7,8,9,11,12,15]) == [[1, 2], [5, 6], [6, 7], [7, 8], [8, 9], [11, 12]])
assert(get_following_pairs([1,2,5,6,7,10]) == [[1, 2], [5, 6], [6, 7]])
assert(get_following_pairs([1,2,4,5,7,9,10]) == [[1, 2], [4, 5], [9,10]])
assert(get_following_pairs([1,2,4,5,7,9,10,11,12,15]) == [[1, 2], [4, 5], [9,10],[10,11],[11,12]])


item_longest_stretch_block_with_activity = transactions_items.groupby(['item_id'])['date_block_num']\
                                    .apply(list).apply(lambda x: get_longest_stretch(get_following_pairs(x)))

transactions_items['item_longest_stretch_block_with_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_longest_stretch_block_with_activity), downcast='unsigned') 


item_number_of_consecutive_days_with_activity = transactions_items.groupby(['item_id'])['item_days_since_start']\
                                    .apply(list).apply(lambda x: len(get_following_pairs(x)))
    
transactions_items['item_number_of_consecutive_days_with_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_number_of_consecutive_days_with_activity), downcast='unsigned') 

In [21]:
def get_units_between_first_and_last(units):
    return np.max(units) - np.min(units)

item_days_between_start_and_first_activity = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_units_between_first_and_last(x))
transactions_items['item_days_between_start_and_first_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_days_between_start_and_first_activity), downcast='unsigned') 

item_blocks_between_start_and_first_activity = transactions_items.groupby(['item_id'])['date_block_num'].apply(list).apply(lambda x: get_units_between_first_and_last(x))
transactions_items['item_blocks_between_start_and_first_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_blocks_between_start_and_first_activity), downcast='unsigned') 

In [22]:

transactions_items['item_first_day'] = pd.to_numeric(transactions_items.groupby('item_id')['item_days_since_start'].transform(np.min), downcast='unsigned') 
transactions_items['item_last_day'] = pd.to_numeric(transactions_items.groupby('item_id')['item_days_since_start'].transform(np.max), downcast='unsigned') 

item_activity_on_all_blocks = transactions_items.groupby('item_id')['date_block_num'].nunique().apply(lambda x: x==number_of_blocks)
transactions_items['item_activity_on_all_blocks'] = transactions_items['item_id'].map(item_activity_on_all_blocks)

In [23]:
transactions_items['item_mean_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.mean), downcast='float') 
transactions_items['item_min_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.max), downcast='unsigned') 
transactions_items['item_number_different_prices'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform('nunique'), downcast='unsigned') 
transactions_items['item_price_amplitude'] = pd.to_numeric(((transactions_items['item_max_price'] - transactions_items['item_min_price'] ) / transactions_items['item_min_price']) * 100, downcast='float') 
transactions_items['category_mean_price'] = pd.to_numeric(transactions_items.groupby('item_category_id')['item_price'].transform(np.mean), downcast='float') 
transactions_items['item_deviation_mean_category_price'] =  pd.to_numeric(((transactions_items['item_mean_price'] - transactions_items['category_mean_price'] ) / transactions_items['category_mean_price']) * 100, downcast='float') 

In [24]:
item_first_two_blocks_units = transactions_items.groupby(['item_id','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
item_first_two_blocks_units = item_first_two_blocks_units[item_first_two_blocks_units['is_first_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_first_two_blocks_units'] = pd.to_numeric(transactions_items['item_id'].map(item_first_two_blocks_units), downcast='unsigned') 

item_last_two_blocks_units = transactions_items.groupby(['item_id','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
item_last_two_blocks_units = item_last_two_blocks_units[item_last_two_blocks_units['is_last_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_last_two_blocks_units'] = pd.to_numeric(transactions_items['item_id'].map(item_last_two_blocks_units), downcast='unsigned') 

transactions_items['item_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_items['item_first_two_blocks_units'] - transactions_items['item_last_two_blocks_units'] ) / \
                                                             transactions_items['item_first_two_blocks_units']) * 100 * -1, downcast='float') 


item_first_two_blocks_mean_price = transactions_items.groupby(['item_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
item_first_two_blocks_mean_price = item_first_two_blocks_mean_price[item_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_first_two_blocks_mean_price'] = pd.to_numeric(transactions_items['item_id'].map(item_first_two_blocks_mean_price), downcast='unsigned') 

item_last_two_blocks_mean_price = transactions_items.groupby(['item_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
item_last_two_blocks_mean_price = item_last_two_blocks_mean_price[item_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_last_two_blocks_mean_price'] = pd.to_numeric(transactions_items['item_id'].map(item_last_two_blocks_mean_price), downcast='unsigned') 

transactions_items['item_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_items['item_first_two_blocks_mean_price'] - transactions_items['item_last_two_blocks_mean_price'] ) / \
                                                             transactions_items['item_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [25]:
transactions_items['item_share_of_total_units'] = pd.to_numeric(transactions_items['item_units'] * 100 / total_sales , downcast='float') 

transactions_items['item_share_of_total_turnover'] = pd.to_numeric(transactions_items['item_turnover'] * 100 / total_turnover, downcast='float') 

transactions_items['category_units'] = pd.to_numeric(transactions_items.groupby('item_category_id')['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_share_of_category_units'] = pd.to_numeric(transactions_items['item_units'] * 100 / transactions_items['category_units'], downcast='float') 

transactions_items['category_turnover'] = pd.to_numeric(transactions_items.groupby('item_category_id')['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_share_of_category_turnover'] = pd.to_numeric(transactions_items['item_turnover'] * 100 / transactions_items['category_turnover'], downcast='float') 


In [27]:
transactions_items.to_pickle("pickled/transactions_items")
transactions_items_blocks.to_pickle("pickled/transactions_items_blocks")

del transactions_items
del transactions_items_blocks
gc.collect()

114

#CATEGORY

-UNITS
category_units
category_block_units
category_mean_units_block
category_day_units
category_mean_units_day
category_max_units_block
category_min_units_block
category_max_units_day
category_min_units_day

-TURNOVER
category_turnover
category_block_turnover
category_mean_turnover_block
category_day_turnover
category_mean_turnover_day
category_max_turnover_block
category_min_turnover_block
category_max_turnover_day
category_min_turnover_day


-PRICE
category_mean_price
category_mean_price_block
category_min_price
category_max_price


-TREND
category_first_two_blocks_units
category_last_two_blocks_units
category_fluctuation_units_first_last_blocks
category_first_two_blocks_mean_price
category_last_two_blocks_mean_price
category_fluctuation_price_first_last_blocks

-SUBCATEGORY
subcategory
subcategory 1hot

-UNITS
subcategory_units
subcategory_block_units
subcategory_mean_units_block
subcategory_day_units
subcategory_mean_units_day
subcategory_max_units_block
subcategory_min_units_block
subcategory_max_units_day
subcategory_min_units_day

-TURNOVER
subcategory_turnover
subcategory_block_turnover
subcategory_mean_turnover_block
subcategory_day_turnover
subcategory_mean_turnover_day
subcategory_max_turnover_block
subcategory_min_turnover_block
subcategory_max_turnover_day
subcategory_min_turnover_day

-ENCODINGS
category_share_of_total_units
category_share_of_total_gross
subcategory_share_of_total_units
subcategory_share_of_total_gross

-TREND
subcategory_first_two_blocks_units
subcategory_last_two_blocks_units
subcategory_fluctuation_units_first_last_blocks
subcategory_first_two_blocks_mean_price
subcategory_last_two_blocks_mean_price
subcategory_fluctuation_price_first_last_blocks

In [28]:
gc.collect()
transactions_categories = transactions.copy()
transactions_categories_blocks = transactions.copy()

In [29]:
sub_cats = {}
for i in range(1,8):
    sub_cats[i] = "Accessories"
sub_cats[8] = "Tickets"
sub_cats[9] = "Delivery of goods"
for i in range(10,18):
    sub_cats[i] = "Consoles"
for i in range(18,25):
    sub_cats[i] = "Game for Consoles"
sub_cats[25] = "Accessories for Games"
sub_cats[26] = "Android Games"
sub_cats[27] = "MAC Games"
for i in range(28,32):
    sub_cats[i] = "PC Games"
for i in range(32,37):
    sub_cats[i] = "Payment Cards"
for i in range(37,40):
    sub_cats[i] = "Cinema - Blu-ray"
sub_cats[40] = "Cinema - DVD"
sub_cats[41] = "Cinema - Collectible"
for i in range(42,46):
    sub_cats[i] = "Audiobooks"
for i in range(46,55):
    sub_cats[i] = "Books"
for i in range(55,57):
    sub_cats[i] = "Music - CD"
sub_cats[57] = "Music - MP3"
sub_cats[58] = "Music - Vinyl"
sub_cats[59] = "Music - Music Video"
sub_cats[60] = "Music - Gift Edition"
for i in range(61,74):
    sub_cats[i] = "Gifts"
for i in range(73,79):
    sub_cats[i] = "Software"
sub_cats[79] = "Utility"
for i in range(80,84):
    sub_cats[i] = "Misc"
    
    
transactions_categories['subcategory'] = transactions_categories['item_category_id'].apply(lambda x: sub_cats[x]).astype('category')
transactions_categories_blocks['subcategory'] = transactions_categories_blocks['item_category_id'].apply(lambda x: sub_cats[x]).astype('category')

In [30]:
transactions_categories_blocks['category_block_units'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['category_block_turnover'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['category_mean_price_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 
transactions_categories_blocks['subcategory_block_units'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['subcategory_block_turnover'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['subcategory_mean_price_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 


In [31]:
transactions_categories['category_units'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.mean), downcast='float') 
transactions_categories['category_day_units'] = pd.to_numeric(transactions_categories.groupby(['item_category_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.mean), downcast='float') 
transactions_categories['category_max_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.min), downcast='unsigned') 
transactions_categories['category_max_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.min), downcast='unsigned') 

In [32]:
transactions_categories['category_turnover'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.mean), downcast='float') 
transactions_categories['category_day_turnover'] = pd.to_numeric(transactions_categories.groupby(['item_category_id','date'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.mean), downcast='float') 
transactions_categories['category_max_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.min), downcast='unsigned') 
transactions_categories['category_max_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.min), downcast='unsigned') 

In [33]:
transactions_categories['category_mean_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.mean), downcast='float') 
transactions_categories['category_min_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.min), downcast='unsigned')
transactions_categories['category_max_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.max), downcast='unsigned')

In [34]:
category_first_two_blocks_units = transactions_categories.groupby(['item_category_id','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
category_first_two_blocks_units = category_first_two_blocks_units[category_first_two_blocks_units['is_first_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_first_two_blocks_units'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_first_two_blocks_units), downcast='unsigned')

category_last_two_blocks_units = transactions_categories.groupby(['item_category_id','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
category_last_two_blocks_units = category_last_two_blocks_units[category_last_two_blocks_units['is_last_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_last_two_blocks_units'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_last_two_blocks_units), downcast='unsigned')

transactions_categories['category_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_categories['category_first_two_blocks_units'] - transactions_categories['category_last_two_blocks_units'] ) / \
                                                             transactions_categories['category_first_two_blocks_units']) * 100 * -1, downcast='float') 


category_first_two_blocks_mean_price = transactions_categories.groupby(['item_category_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
category_first_two_blocks_mean_price = category_first_two_blocks_mean_price[category_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_first_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_first_two_blocks_mean_price), downcast='unsigned')

category_last_two_blocks_mean_price = transactions_categories.groupby(['item_category_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
category_last_two_blocks_mean_price = category_last_two_blocks_mean_price[category_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_last_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_last_two_blocks_mean_price), downcast='unsigned')

transactions_categories['category_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_categories['category_first_two_blocks_mean_price'] - transactions_categories['category_last_two_blocks_mean_price'] ) / \
                                                             transactions_categories['category_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [35]:
transactions_categories['video_game'] = transactions_categories["item_category_id"].isin(list(range(18,32)))
transactions_categories['gaming_old_gen'] = transactions_categories["item_category_id"].isin([10,11,15,18,19,23])
transactions_categories['gaming_new_gen'] = transactions_categories["item_category_id"].isin([12,14,16,20,22,24])
transactions_categories['pc_games'] = transactions_categories["item_category_id"].isin(list(range(27,32)))
transactions_categories['payment_cards'] = transactions_categories["item_category_id"].isin(list(range(32,37)))
transactions_categories['movies'] = transactions_categories["item_category_id"].isin(list(range(37,42)))
transactions_categories['movies_niche'] = transactions_categories["item_category_id"].isin([38,39])
transactions_categories['books'] = transactions_categories["item_category_id"].isin([42,55])
transactions_categories['music'] = transactions_categories["item_category_id"].isin(list(range(55,61)))
transactions_categories['music_CD'] = transactions_categories["item_category_id"].isin([55,56])
transactions_categories['music_vinyl'] = transactions_categories["item_category_id"].isin([58])
transactions_categories['gifts'] = transactions_categories["item_category_id"].isin(list(range(61,72)))
transactions_categories['software'] = transactions_categories["item_category_id"].isin(list(range(73,79)))

In [36]:
transactions_categories['subcategory_units'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_day_units'] = pd.to_numeric(transactions_categories.groupby(['subcategory','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_max_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.min), downcast='unsigned')
transactions_categories['subcategory_max_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.min), downcast='unsigned')

In [37]:
transactions_categories['subcategory_turnover'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_day_turnover'] = pd.to_numeric(transactions_categories.groupby(['subcategory','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_max_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.min), downcast='unsigned')
transactions_categories['subcategory_max_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.min), downcast='unsigned')

In [38]:
transactions_categories['category_share_of_total_units'] = pd.to_numeric(transactions_categories['category_units'] * 100 / total_sales , downcast='float') 
transactions_categories['category_share_of_total_turnover'] = pd.to_numeric(transactions_categories['category_turnover']* 100 / total_turnover, downcast='float') 

transactions_categories['subcategory_units'] = pd.to_numeric(transactions_categories.groupby("subcategory")['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_share_of_total_units'] = pd.to_numeric(transactions_categories['subcategory_units'] * 100 / total_sales, downcast='float') 
transactions_categories['subcategory_turnover'] = pd.to_numeric(transactions_categories.groupby("subcategory")['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_share_of_total_turnover'] = pd.to_numeric(transactions_categories['subcategory_turnover']* 100 / total_turnover, downcast='float') 

In [39]:
subcategory_first_two_blocks_units = transactions_categories.groupby(['subcategory','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
subcategory_first_two_blocks_units = subcategory_first_two_blocks_units[subcategory_first_two_blocks_units['is_first_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_first_two_blocks_units'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_first_two_blocks_units), downcast='unsigned')

subcategory_last_two_blocks_units = transactions_categories.groupby(['subcategory','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
subcategory_last_two_blocks_units = subcategory_last_two_blocks_units[subcategory_last_two_blocks_units['is_last_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_last_two_blocks_units'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_last_two_blocks_units), downcast='unsigned')

transactions_categories['subcategory_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_categories['subcategory_first_two_blocks_units'] - transactions_categories['subcategory_last_two_blocks_units'] ) / \
                                                             transactions_categories['subcategory_first_two_blocks_units']) * 100 * -1, downcast='float') 


subcategory_first_two_blocks_mean_price = transactions_categories.groupby(['subcategory','is_first_two_blocks'], as_index=False)['item_price'].mean()
subcategory_first_two_blocks_mean_price = subcategory_first_two_blocks_mean_price[subcategory_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_first_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_first_two_blocks_mean_price), downcast='unsigned')

subcategory_last_two_blocks_mean_price = transactions_categories.groupby(['subcategory','is_last_two_blocks'], as_index=False)['item_price'].mean()
subcategory_last_two_blocks_mean_price = subcategory_last_two_blocks_mean_price[subcategory_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_last_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_last_two_blocks_mean_price), downcast='unsigned')

transactions_categories['subcategory_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_categories['subcategory_first_two_blocks_mean_price'] - transactions_categories['subcategory_last_two_blocks_mean_price'] ) / \
                                                             transactions_categories['subcategory_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [40]:

transactions_categories.to_pickle("pickled/transactions_categories")
transactions_categories_blocks.to_pickle("pickled/transactions_categories_blocks")

del transactions_categories
del transactions_categories_blocks
gc.collect()

221

#SHOP

-UNITS
shop_units
shop_block_units
shop_mean_units_block
shop_day_units
shop_mean_units_day
shop_max_units_block
shop_min_units_block
shop_max_units_day
shop_min_units_day

-TURNOVER
shop_turnover
shop_block_turnover
shop_mean_turnover_block
shop_day_turnover
shop_mean_turnover_day
shop_max_turnover_block
shop_min_turnover_block
shop_max_turnover_day
shop_min_turnover_day

-PRICE
shop_mean_price
shop_mean_price_block


-TREND
shop_first_two_blocks_units
shop_last_two_blocks_units
shop_fluctuation_units_first_last_blocks
shop_first_two_blocks_mean_price
shop_last_two_blocks_mean_price
shop_fluctuation_price_first_last_blocks

-ENCODINGS
shop_share_of_total_units
shop_share_of_total_gross

-MISC
shop_ids_TC
shop_ids_TRK
shop_ids_SEC
shop_ids_shopping_center
shop_ids_moscow

-CATEGORY
shop_top_category_units
shop_top_category_turnover
shop_top_subcategory_units
shop_top_subcategory_turnover

In [41]:
gc.collect()
transactions_shops = transactions.copy()
transactions_shops_blocks = transactions.copy()

In [42]:
shop_areas = {}
for i in range(0,2):
    shop_areas[i] = "Yakutsk"
shop_areas[2] = "Adygea"
shop_areas[3] = "Balashikha"
shop_areas[4] = "Volga"
shop_areas[5] = "Vologda"
for i in range(6,9):
    shop_areas[i] = "Voronezh"
shop_areas[9] = "Outbound Trading"
for i in range(10,12):
    shop_areas[i] = "Zhukovsky"
shop_areas[12] = "Online store emergency"
for i in range(13,15):
    shop_areas[i] = "Kazan"
shop_areas[15] = "Kaluga"
shop_areas[16] = "Kolomna"
for i in range(17,19):
    shop_areas[i] = "Krasnoyarsk"
shop_areas[19] = "Kursk"
for i in range(20,33):
    shop_areas[i] = "Moscow"
shop_areas[33] = "Mytishchi"
for i in range(34,36):
    shop_areas[i] = "N.Novgorod"
for i in range(36,38):
    shop_areas[i] = "Novosibirsk"
shop_areas[38] = "Omsk"
for i in range(39,42):
    shop_areas[i] = "RostovNaDonu"
for i in range(42,44):
    shop_areas[i] = "St. Petersburg"
for i in range(44,46):
    shop_areas[i] = "Samara"
shop_areas[46] = "Sergiev Posad"
shop_areas[47] = "Surgut"
shop_areas[48] = "Tomsk"
for i in range(49,52):
    shop_areas[i] = "Tyumen TC"
for i in range(52,54):
    shop_areas[i] = "Ufa"
shop_areas[54] = "Khimki"
shop_areas[55] = "Digital warehouse"
shop_areas[56] = "Chekhov"
for i in range(57,59):
    shop_areas[i] = "Yakutsk"
shop_areas[59] = "Yaroslavl"

transactions_shops['area'] = transactions_shops['shop_id'].apply(lambda x: shop_areas[x]).astype('category')

transactions_shops_blocks['area'] = transactions_shops['shop_id'].apply(lambda x: shop_areas[x]).astype('category')



In [43]:
transactions_shops_blocks['shop_block_units'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['shop_block_turnover'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['shop_mean_price_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 
transactions_shops_blocks['area_block_units'] = pd.to_numeric(transactions_shops_blocks.groupby(['area','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['area_block_turnover'] = pd.to_numeric(transactions_shops_blocks.groupby(['area','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['area_mean_price_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 


In [44]:
transactions_shops['shop_units'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.mean), downcast='float') 
transactions_shops['shop_day_units'] = pd.to_numeric(transactions_shops.groupby(['shop_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.mean), downcast='float') 
transactions_shops['shop_max_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.min), downcast='unsigned')
transactions_shops['shop_max_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.min), downcast='unsigned')

In [45]:
transactions_shops['shop_turnover'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops['shop_day_turnover'] = pd.to_numeric(transactions_shops.groupby(['shop_id','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops['shop_max_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops['shop_max_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.min), downcast='unsigned')

In [46]:
transactions_shops['shop_mean_price'] = pd.to_numeric(transactions_shops.groupby('shop_id')['item_price'].transform(np.mean), downcast='float') 


In [47]:
                                                   


shop_first_two_blocks_mean_price = transactions_shops.groupby(['shop_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
shop_first_two_blocks_mean_price = shop_first_two_blocks_mean_price[shop_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_first_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_first_two_blocks_mean_price), downcast='unsigned')

shop_last_two_blocks_mean_price = transactions_shops.groupby(['shop_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
shop_last_two_blocks_mean_price = shop_last_two_blocks_mean_price[shop_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_last_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_last_two_blocks_mean_price), downcast='unsigned')

transactions_shops['shop_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_shops['shop_first_two_blocks_mean_price'] - transactions_shops['shop_last_two_blocks_mean_price'] ) / \
                                                             transactions_shops['shop_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [48]:
transactions_shops['shop_share_of_units'] = pd.to_numeric(transactions_shops['shop_units'] * 100 / total_sales, downcast='float') 
transactions_shops['shop_share_of_turnover'] = pd.to_numeric(transactions_shops['shop_turnover'] * 100 / total_turnover, downcast='float') 

In [49]:
shop_ids_TC = [1,2,13,14,16,23,24,26,28,31,37,38,42,43,44,46,50,54,58]
shop_ids_TRK = [3,33,39,40]
shop_ids_SEC = [7,34,36,47,48,49,56]
shop_ids_shopping_center = [4,5,8,15,17,18,19,27,29,30,32,41,45,51,53,59]
shop_ids_moscow = list(range(20,33))


transactions_shops['shop_TC'] = transactions_shops['shop_id'].isin(shop_ids_TC)
transactions_shops['shop_TRK'] = transactions_shops['shop_id'].isin(shop_ids_TRK)
transactions_shops['shop_SEC'] = transactions_shops['shop_id'].isin(shop_ids_SEC)
transactions_shops['shop_shopping_center'] = transactions_shops['shop_id'].isin(shop_ids_shopping_center)
transactions_shops['shop_moscow'] = transactions_shops['shop_id'].isin(shop_ids_moscow)

In [50]:
a = transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['item_cnt_day'].sum()

b = pd.DataFrame(transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['item_cnt_day'].sum()\
                  .groupby(['shop_id'])['item_cnt_day'].max()).reset_index()

c = b.merge(a, on=['shop_id', 'item_cnt_day'],how='left')[['shop_id', 'item_category_id']].rename(columns={'item_category_id':'max_category_units'})

transactions_shops = transactions_shops.merge(c, on=['shop_id'], how='left')

transactions_shops['max_category_units'] = pd.to_numeric(transactions_shops['max_category_units'], downcast='unsigned')

In [51]:
a = transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['turnover'].sum()

b = pd.DataFrame(transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['turnover'].sum()\
                  .groupby(['shop_id'])['turnover'].max()).reset_index()

c = b.merge(a, on=['shop_id', 'turnover'],how='left')[['shop_id', 'item_category_id']].rename(columns={'item_category_id':'max_category_turnover'})

transactions_shops = transactions_shops.merge(c, on=['shop_id'], how='left')
transactions_shops['max_category_turnover'] = pd.to_numeric(transactions_shops['max_category_turnover'], downcast='unsigned')

-AREA
area



-UNITS
area_units
area_block_units
area_mean_units_block
area_day_units
area_mean_units_day
area_max_units_block
area_min_units_block
area_max_units_day
area_min_units_day

-TURNOVER
area_turnover
area_block_turnover
area_mean_turnover_block
area_day_turnover
area_mean_turnover_day
area_max_turnover_block
area_min_turnover_block
area_max_turnover_day
area_min_turnover_day

-PRICE
area_mean_price
area_mean_price_block


-TREND
area_first_two_blocks_units
area_last_two_blocks_units
area_fluctuation_units_first_last_blocks
area_first_two_blocks_mean_price
area_last_two_blocks_mean_price
area_fluctuation_price_first_last_blocks

-ENCODINGS
area_share_of_total_units
area_share_of_total_gross

In [52]:
transactions_shops['area_units'] = pd.to_numeric(transactions_shops.groupby(['area'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.mean), downcast='float') 
transactions_shops['area_day_units'] = pd.to_numeric(transactions_shops.groupby(['area','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.mean), downcast='float') 
transactions_shops['area_max_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.min), downcast='unsigned')
transactions_shops['area_max_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.min), downcast='unsigned')

In [53]:
transactions_shops['area_turnover'] = pd.to_numeric(transactions_shops.groupby(['area'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops['area_day_turnover'] = pd.to_numeric(transactions_shops.groupby(['area','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops['area_max_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops['area_max_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.min), downcast='unsigned')

In [54]:
transactions_shops['area_mean_price'] = pd.to_numeric(transactions_shops.groupby('area')['item_price'].transform(np.mean), downcast='float') 


In [55]:
area_first_two_blocks_units = transactions_shops.groupby(['area','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
area_first_two_blocks_units = area_first_two_blocks_units[area_first_two_blocks_units['is_first_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_first_two_blocks_units'] = pd.to_numeric(transactions_shops['area'].map(area_first_two_blocks_units), downcast='unsigned')

area_last_two_blocks_units = transactions_shops.groupby(['area','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
area_last_two_blocks_units = area_last_two_blocks_units[area_last_two_blocks_units['is_last_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_last_two_blocks_units'] = pd.to_numeric(transactions_shops['area'].map(area_last_two_blocks_units), downcast='unsigned')

transactions_shops['area_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_shops['area_first_two_blocks_units'] - transactions_shops['area_last_two_blocks_units'] ) / \
                                                             transactions_shops['area_first_two_blocks_units']) * 100 * -1, downcast='float') 


area_first_two_blocks_mean_price = transactions_shops.groupby(['area','is_first_two_blocks'], as_index=False)['item_price'].mean()
area_first_two_blocks_mean_price = area_first_two_blocks_mean_price[area_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_first_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['area'].map(area_first_two_blocks_mean_price), downcast='unsigned')

area_last_two_blocks_mean_price = transactions_shops.groupby(['area','is_last_two_blocks'], as_index=False)['item_price'].mean()
area_last_two_blocks_mean_price = area_last_two_blocks_mean_price[area_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_last_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['area'].map(area_last_two_blocks_mean_price), downcast='unsigned')

transactions_shops['area_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_shops['area_first_two_blocks_mean_price'] - transactions_shops['area_last_two_blocks_mean_price'] ) / \
                                                             transactions_shops['area_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [56]:

transactions_shops.to_pickle("pickled/transactions_shops")
transactions_shops_blocks.to_pickle("pickled/transactions_shops_blocks")


del transactions_shops
del transactions_shops_blocks
gc.collect()

182

shop_category


-UNITS
shop_category_units
shop_category_block_units
shop_category_mean_units_block
shop_category_day_units
shop_category_mean_units_day
shop_category_max_units_block
shop_category_min_units_block
shop_category_max_units_day
shop_category_min_units_day

-TURNOVER
shop_category_turnover
shop_category_block_turnover
shop_category_mean_turnover_block
shop_category_day_turnover
shop_category_mean_turnover_day
shop_category_max_turnover_block
shop_category_min_turnover_block
shop_category_max_turnover_day
shop_category_min_turnover_day

-PRICE
shop_category_mean_price
shop_category_mean_price_block


-TREND
shop_category_first_two_blocks_units
shop_category_last_two_blocks_units
shop_category_fluctuation_units_first_last_blocks
shop_category_first_two_blocks_mean_price
shop_category_last_two_blocks_mean_price
shop_category_fluctuation_price_first_last_blocks

-ENCODINGS
shop_category_share_of_total_units
shop_category_share_of_total_gross

In [57]:
gc.collect()
transactions_shops_categories = transactions.copy()
transactions_shops_categories_blocks = transactions.copy()

In [58]:
transactions_shops_categories_blocks['shop_category_block_units'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories_blocks['shop_category_block_turnover'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories_blocks['shop_category_mean_price_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 

In [59]:
transactions_shops_categories['shop_category_units'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_day_units'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_max_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.min), downcast='unsigned')
transactions_shops_categories['shop_category_max_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.min), downcast='unsigned')


In [60]:
transactions_shops_categories['shop_category_turnover'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_day_turnover'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_max_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops_categories['shop_category_max_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.min), downcast='unsigned')

In [61]:
transactions_shops_categories['shop_category_mean_price'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['item_price'].transform(np.mean), downcast='float') 


In [62]:

transactions_shops_categories.to_pickle("pickled/transactions_shops_categories")
transactions_shops_categories_blocks.to_pickle("pickled/transactions_shops_categories_blocks")


del transactions_shops_categories
del transactions_shops_categories_blocks
gc.collect()

84

In [63]:
###
#DEBUG
###


pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
#transactions.sample(10).sort_values(by=['item_units'], ascending=False)
#transactions[transactions['item_category_id'] == 58].sample(10).sort_values(by=['total_sales_units'], ascending=False)

In [12]:
train_item_ids = transactions['item_id'].unique()
train_shop_ids = transactions['shop_id'].unique()
test_item_ids = test['item_id'].unique()
test_shop_ids = test['shop_id'].unique()
train_blocks = transactions['date_block_num'].unique()

all_item_ids = np.unique(np.append(test_item_ids,train_item_ids))
all_shop_ids = np.unique(np.append(train_shop_ids,test_shop_ids))

In [13]:
len(test_item_ids)

5100

In [14]:
combinations = []
for shop in all_shop_ids:
    #get all article ids ever associated to this shop
    train_ids = transactions[transactions['shop_id'] == shop]['item_id'].unique()
    test_ids = test[test['shop_id'] == shop]['item_id'].unique()
    all_shop = np.unique(np.append(train_ids, test_ids))
    all_shop_combo = [[item, shop, block] for item in all_shop for block in train_blocks]
    for combo in all_shop_combo:
        combinations.append(combo)

In [15]:
len(combinations)

8333930

In [16]:
all_combos = pd.DataFrame(np.unique(np.vstack([combinations]), axis=0), columns=['item_id', 'shop_id', 'date_block_num'])

In [44]:
#all_combos = all_combos.sample(500000)

In [22]:
all_combos.head()

Unnamed: 0,item_id,shop_id,date_block_num
0,0.0,54.0,12.0
1,0.0,54.0,13.0
2,0.0,54.0,14.0
3,0.0,54.0,15.0
4,0.0,54.0,16.0


In [17]:
all_combos['item_id'] = pd.to_numeric(all_combos['item_id'], downcast='unsigned')
all_combos['shop_id'] = pd.to_numeric(all_combos['shop_id'], downcast='unsigned')
all_combos['date_block_num'] = pd.to_numeric(all_combos['date_block_num'], downcast='unsigned')

In [18]:
len(all_combos)

8333930

In [19]:
all_combos = pd.merge(all_combos, items[['item_id', 'item_category_id']], on='item_id', how='left')

In [20]:
dates = transactions[['date_block_num', 'month', 'year']].drop_duplicates(['date_block_num', 'month', 'year'])

dates_dict = {}

for index,row in dates.iterrows():
    dates_dict[row['date_block_num']] = {"month": row['month'], "year": row['year']}
    
dates_dict

{20: {'month': 9, 'year': 2014},
 15: {'month': 4, 'year': 2014},
 18: {'month': 7, 'year': 2014},
 19: {'month': 8, 'year': 2014},
 21: {'month': 10, 'year': 2014},
 22: {'month': 11, 'year': 2014},
 23: {'month': 12, 'year': 2014},
 24: {'month': 1, 'year': 2015},
 27: {'month': 4, 'year': 2015},
 25: {'month': 2, 'year': 2015},
 12: {'month': 1, 'year': 2014},
 14: {'month': 3, 'year': 2014},
 16: {'month': 5, 'year': 2014},
 17: {'month': 6, 'year': 2014},
 13: {'month': 2, 'year': 2014},
 26: {'month': 3, 'year': 2015},
 28: {'month': 5, 'year': 2015},
 29: {'month': 6, 'year': 2015},
 30: {'month': 7, 'year': 2015},
 31: {'month': 8, 'year': 2015},
 32: {'month': 9, 'year': 2015},
 33: {'month': 10, 'year': 2015}}

In [21]:
all_combos['month'] = pd.to_numeric(all_combos['date_block_num'].apply(lambda block: dates_dict[block]['month']), downcast='unsigned')
all_combos['year'] = pd.to_numeric(all_combos['date_block_num'].apply(lambda block: dates_dict[block]['year']), downcast='unsigned')

In [33]:
def downcast(df, columns, dtypes):
    for column in columns:
        dtype = dtypes[column].kind
        if dtype in ['O', 'b']:
            continue  
        if dtype == 'u':
            df[column] = pd.to_numeric(df[column].astype(int), downcast='unsigned')
        elif dtype == 'i':
            df[column] = pd.to_numeric(df[column].astype(int), downcast='signed')
        else:
            df[column] = pd.to_numeric(df[column], downcast='float')

In [34]:
def fillnas(df, columns, dtypes):
    for column in columns:
        dtype = dtypes[column].kind
        if dtype in ['O', 'b']:
            continue  
        df[column].fillna(0, inplace=True)

In [24]:
#del training

In [25]:
transactions_items_columns = ['item_id', 'item_first_block',
       'item_last_block', 'is_first_two_blocks', 'is_last_two_blocks',
       'item_units', 'item_mean_units_block', 'item_day_units',
       'item_mean_units_day', 'item_max_units_block',
       'item_min_units_block', 'item_max_units_day', 'item_min_units_day',
       'item_turnover', 'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       'item_blocks_between_start_and_first_activity', 'item_first_day',
       'item_last_day', 'item_activity_on_all_blocks', 'item_mean_price',
       'item_min_price', 'item_max_price', 'item_number_different_prices',
       'item_price_amplitude', 
       'item_deviation_mean_category_price',
       'item_first_two_blocks_units', 'item_last_two_blocks_units',
       'item_fluctuation_units_first_last_blocks',
       'item_first_two_blocks_mean_price',
       'item_last_two_blocks_mean_price',
       'item_fluctuation_price_first_last_blocks',
       'item_share_of_total_units', 'item_share_of_total_turnover']

In [26]:
transactions_items = pd.read_pickle("pickled/transactions_items")
transactions_items_dtypes = transactions_items.dtypes
training = pd.merge(all_combos, transactions_items[transactions_items_columns].drop_duplicates('item_id'), on=['item_id'], how='left', copy=False)

del transactions_items
fillnas(training, transactions_items_columns, transactions_items_dtypes)
downcast(training, transactions_items_columns, transactions_items_dtypes)
gc.collect()

21

In [26]:
transactions_items_blocks_columns =  [ 'date_block_num', 'item_id', 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block']

In [28]:
transactions_items_blocks = pd.read_pickle("pickled/transactions_items_blocks")
transactions_items_blocks_dtypes = transactions_items_blocks.dtypes

training = pd.merge(training, transactions_items_blocks[transactions_items_blocks_columns]\
                    .drop_duplicates(['item_id', 'date_block_num']), on=['item_id','date_block_num'], how='left', copy=False)

del transactions_items_blocks
training.fillna(0, inplace=True)
downcast(training, transactions_items_blocks_columns, transactions_items_blocks_dtypes)
gc.collect()

14

In [27]:
transactions_categories_columns = [
       'item_category_id',
       'category_units', 'category_mean_units_block',
       'category_day_units', 'category_mean_units_day',
       'category_max_units_block', 'category_min_units_block',
       'category_max_units_day', 'category_min_units_day',
       'category_turnover', 'category_mean_turnover_block',
       'category_day_turnover', 'category_mean_turnover_day',
       'category_max_turnover_block', 'category_min_turnover_block',
       'category_max_turnover_day', 'category_min_turnover_day',
       'category_mean_price', 'category_min_price', 'category_max_price',
       'category_first_two_blocks_units',
       'category_last_two_blocks_units',
       'category_fluctuation_units_first_last_blocks',
       'category_first_two_blocks_mean_price',
       'category_last_two_blocks_mean_price',
       'category_fluctuation_price_first_last_blocks', 'subcategory',
       'video_game', 'gaming_old_gen', 'gaming_new_gen', 'pc_games',
       'payment_cards', 'movies', 'movies_niche', 'books', 'music',
       'music_CD', 'music_vinyl', 'gifts', 'software',
       'subcategory_units',
       'subcategory_mean_units_block', 'subcategory_day_units',
       'subcategory_mean_units_day', 'subcategory_max_units_block',
       'subcategory_min_units_block', 'subcategory_max_units_day',
       'subcategory_min_units_day', 'subcategory_turnover', 'subcategory_mean_turnover_block',
       'subcategory_day_turnover', 'subcategory_mean_turnover_day',
       'subcategory_max_turnover_block', 'subcategory_min_turnover_block',
       'subcategory_max_turnover_day', 'subcategory_min_turnover_day',
       'category_share_of_total_units',
       'category_share_of_total_turnover',
       'subcategory_share_of_total_units',
       'subcategory_share_of_total_turnover',
       'subcategory_first_two_blocks_units',
       'subcategory_last_two_blocks_units',
       'subcategory_fluctuation_units_first_last_blocks',
       'subcategory_first_two_blocks_mean_price',
       'subcategory_last_two_blocks_mean_price',
       'subcategory_fluctuation_price_first_last_blocks']


In [31]:
transactions_categories = pd.read_pickle("pickled/transactions_categories")
transactions_categories_dtypes = transactions_categories.dtypes
training = pd.merge(training, transactions_categories[transactions_categories_columns]\
                    .drop_duplicates('item_category_id'), on=['item_category_id'], how='left', copy=False)

del transactions_categories
fillnas(training, transactions_categories_columns, transactions_categories_dtypes)
downcast(training, transactions_categories_columns, transactions_categories_dtypes)
gc.collect()

14

In [28]:
transactions_categories_blocks_columns = ['item_category_id', 'date_block_num', 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block', 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block']

In [33]:
transactions_categories_blocks = pd.read_pickle("pickled/transactions_categories_blocks")
transactions_categories_blocks_dtypes = transactions_categories_blocks.dtypes
training = pd.merge(training, transactions_categories_blocks[transactions_categories_blocks_columns]\
                    .drop_duplicates(['item_category_id', 'date_block_num']), on=['item_category_id', 'date_block_num'], how='left', copy=False)

del transactions_categories_blocks
fillnas(training, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
downcast(training, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
gc.collect()

14

In [29]:
transactions_shops_columns = ['shop_id', 
       'shop_units', 'shop_mean_units_block', 'shop_day_units',
       'shop_mean_units_day', 'shop_max_units_block',
       'shop_min_units_block', 'shop_max_units_day', 'shop_min_units_day',
       'shop_turnover', 'shop_mean_turnover_block', 'shop_day_turnover',
       'shop_mean_turnover_day', 'shop_max_turnover_block',
       'shop_min_turnover_block', 'shop_max_turnover_day',
       'shop_min_turnover_day', 'shop_mean_price',
       'shop_first_two_blocks_units', 'shop_last_two_blocks_units',
       'shop_fluctuation_units_first_last_blocks',
       'shop_first_two_blocks_mean_price',
       'shop_last_two_blocks_mean_price',
       'shop_fluctuation_price_first_last_blocks', 'shop_share_of_units',
       'shop_share_of_turnover', 'shop_TC', 'shop_TRK', 'shop_SEC',
       'shop_shopping_center', 'shop_moscow', 'max_category_units',
       'max_category_turnover', 'area', 'area_units',
       'area_mean_units_block', 'area_day_units', 'area_mean_units_day',
       'area_max_units_block', 'area_min_units_block',
       'area_max_units_day', 'area_min_units_day', 'area_turnover',
     'area_mean_turnover_block',
       'area_day_turnover', 'area_mean_turnover_day',
       'area_max_turnover_block', 'area_min_turnover_block',
       'area_max_turnover_day', 'area_min_turnover_day',
       'area_mean_price',
       'area_first_two_blocks_units', 'area_last_two_blocks_units',
       'area_fluctuation_units_first_last_blocks',
       'area_first_two_blocks_mean_price',
       'area_last_two_blocks_mean_price',
       'area_fluctuation_price_first_last_blocks']

In [35]:
transactions_shops = pd.read_pickle("pickled/transactions_shops")
transactions_shops_dtypes = transactions_shops.dtypes
training = pd.merge(training, transactions_shops[transactions_shops_columns]\
                    .drop_duplicates('shop_id'), on=['shop_id'], how='left', copy=False)

del transactions_shops
fillnas(training, transactions_shops_columns, transactions_shops_dtypes)
downcast(training, transactions_shops_columns, transactions_shops_dtypes)
gc.collect()

14

In [30]:
transactions_shops_blocks_columns = ['shop_id', 'date_block_num',  'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block', 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block']

In [37]:
transactions_shops_blocks = pd.read_pickle("pickled/transactions_shops_blocks")
transactions_shops_blocks_dtypes = transactions_shops_blocks.dtypes
training = pd.merge(training, transactions_shops_blocks[transactions_shops_blocks_columns]\
                    .drop_duplicates(['shop_id', 'date_block_num']), on=['shop_id', 'date_block_num'], how='left', copy=False)

del transactions_shops_blocks
fillnas(training, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
downcast(training, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
gc.collect()

14

In [31]:
transactions_shops_categories_columns = [ 'shop_id', 
       'item_category_id',
       'shop_category_units', 'shop_category_mean_units_block',
       'shop_category_day_units', 'shop_category_mean_units_day',
       'shop_category_max_units_block', 'shop_category_min_units_block',
       'shop_category_max_units_day', 'shop_category_min_units_day',
       'shop_category_turnover', 'shop_category_mean_turnover_block',
       'shop_category_day_turnover', 'shop_category_mean_turnover_day',
       'shop_category_max_turnover_block',
       'shop_category_min_turnover_block',
       'shop_category_max_turnover_day', 'shop_category_min_turnover_day',
       'shop_category_mean_price']

In [39]:
transactions_shops_categories = pd.read_pickle("pickled/transactions_shops_categories")
transactions_shops_categories_dtypes = transactions_shops_categories.dtypes
training = pd.merge(training, transactions_shops_categories[transactions_shops_categories_columns]\
                    .drop_duplicates(['shop_id','item_category_id']), on=['shop_id','item_category_id'], how='left', copy=False)

del transactions_shops_categories
fillnas(training, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
downcast(training, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
gc.collect()

14

In [32]:
transactions_shops_categories_blocks_columns = ['shop_id', 'item_category_id', 'date_block_num',   'shop_category_block_units',
 'shop_category_block_turnover',
 'shop_category_mean_price_block']

In [41]:
transactions_shops_categories_blocks = pd.read_pickle("pickled/transactions_shops_categories_blocks")
transactions_shops_categories_blocks_dtypes = transactions_shops_categories_blocks.dtypes
training = pd.merge(training, transactions_shops_categories_blocks[transactions_shops_categories_blocks_columns]\
                    .drop_duplicates(['shop_id','item_category_id','date_block_num']),\
                    on=['shop_id','item_category_id','date_block_num'], how='left', copy=False)

del transactions_shops_categories_blocks
fillnas(training, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
downcast(training, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
gc.collect()

14

In [47]:
len(training.drop_duplicates(['shop_id','item_id','date_block_num']))

8333930

In [46]:
len(training)

8333930

In [43]:
training.dtypes

item_id                                       int16
shop_id                                       uint8
date_block_num                                uint8
item_category_id                              uint8
month                                         uint8
year                                         uint16
item_first_block                              uint8
item_last_block                               uint8
is_first_two_blocks                          object
is_last_two_blocks                           object
item_units                                  float32
item_mean_units_block                       float32
item_day_units                                int16
item_mean_units_day                         float32
item_max_units_block                          int16
item_min_units_block                          int16
item_max_units_day                            int16
item_min_units_day                             int8
item_turnover                                 int32
item_mean_tu

In [9]:
#del training
gc.collect()
#training.to_pickle("pickled/training_pre_lags")
#training = pd.read_pickle("pickled/training_pre_lags")

In [4]:
lag_columns = [
 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block',
 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block',
 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block',
 'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block',
 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block',
 'shop_category_block_units',
 'shop_category_turnover',
 'shop_category_mean_price_block'
 ]

In [47]:
def downcast_lags(df, lagged_names):
    for lagged_name in lagged_names:
        df[lagged_name].fillna(0,inplace=True)    
    for column in lagged_names:
        if "mean" in column:
            df[column] = pd.to_numeric(df[column], downcast='float')
        else:
            df[column] = pd.to_numeric(df[column].astype(int), downcast='unsigned')
    return df

In [48]:
lags = [1,2,3]
#lags = [1]


def add_lag_features(df, lag_columns, idx_columns):

    gc.collect()
    def lagged_name(lag_column, lag):
        return "%s_lag_%d" % (lag_column, lag)

    merge_columns = ['lagged_block'] + idx_columns

    for lag in lags:
        print(lag)
        lagged = df[['date_block_num'] + idx_columns + lag_columns].copy()
        lagged.rename(columns={'date_block_num':'lagged_block'},inplace=True)
        df['lagged_block'] = df['date_block_num'] - lag
        lagged_names = [lagged_name(c,lag) for c in lag_columns]
        lag_mapping = dict(zip(lag_columns, lagged_names))
        lagged.rename(columns=lag_mapping,inplace=True)
        
        df.set_index(merge_columns, inplace=True)
        #lagged.drop(columns=lag_columns, inplace=True)
        lagged.drop_duplicates(lagged_names+merge_columns, inplace=True)
        lagged.set_index(merge_columns, inplace=True)
        
        df = pd.merge(df, lagged,on=merge_columns,how='left',copy=False)
        gc.collect()
        df.reset_index(inplace=True)
    
        df = downcast_lags(df, lagged_names)
        del lagged
        gc.collect()
        
    return df

In [11]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block',
 ]

idx_columns = ['item_id']

training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [13]:

pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
training[(training['item_id'] == 30) & (training['shop_id'] == 30)]\
                .drop_duplicates(['item_id', 'date_block_num'])[['item_id','shop_id','date_block_num','item_block_units','item_block_turnover',\
                'item_block_units_lag_1',
 'item_block_turnover_lag_1', 'item_mean_price_block_lag_1',
 'item_block_units_lag_2', 'item_block_turnover_lag_2',
 'item_mean_price_block_lag_2', 'item_block_units_lag_3',
 'item_block_turnover_lag_3' ,'item_mean_price_block_lag_3'
                                                                ]]

Unnamed: 0,item_id,shop_id,date_block_num,item_block_units,item_block_turnover,item_block_units_lag_1,item_block_turnover_lag_1,item_mean_price_block_lag_1,item_block_units_lag_2,item_block_turnover_lag_2,item_mean_price_block_lag_2,item_block_units_lag_3,item_block_turnover_lag_3,item_mean_price_block_lag_3
1210,30,30,12,58,9802,0,0,0.0,0,0,0.0,0,0,0.0
1211,30,30,13,24,3986,58,9802,169.0,0,0,0.0,0,0,0.0
1212,30,30,14,31,5239,24,3986,166.083328,58,9802,169.0,0,0,0.0
1213,30,30,15,21,3479,31,5239,169.0,24,3986,166.083328,58,9802,169.0
1214,30,30,16,16,2634,21,3479,165.666672,31,5239,169.0,24,3986,166.083328
1215,30,30,17,13,2197,16,2634,164.625,21,3479,165.666672,31,5239,169.0
1216,30,30,18,13,2127,13,2197,169.0,16,2634,164.625,21,3479,165.666672
1217,30,30,19,12,2028,13,2127,163.615387,13,2197,169.0,16,2634,164.625
1218,30,30,20,11,1859,12,2028,169.0,13,2127,163.615387,13,2197,169.0
1219,30,30,21,13,2197,11,1859,169.0,12,2028,169.0,13,2127,163.615387


In [15]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block',
 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block',
 ]

idx_columns = ['item_category_id']

training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [17]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block',
 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block'
 ]

idx_columns = ['shop_id']


training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [3]:
#gc.collect()
#training.to_pickle("pickled/training_mid_lags")
#training = pd.read_pickle("pickled/training_mid_lags")

In [7]:
lag_columns = [
  'shop_category_block_units',
 'shop_category_turnover',
 'shop_category_mean_price_block'
 ]

idx_columns = ['shop_id','item_category_id']


training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [8]:
training.drop(columns=['lagged_block'],inplace=True)

In [9]:
training.columns.values

array(['shop_id', 'item_category_id', 'item_id', 'date_block_num',
       'month', 'year', 'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_day_units', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       '

In [3]:
gc.collect()
#training.to_pickle("pickled/training_post_lags")
training = pd.read_pickle("pickled/training_post_lags")

In [4]:
gc.collect()
training.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8333930 entries, 0 to 8333929
Columns: 209 entries, item_id to shop_category_mean_price_block
dtypes: bool(5), category(2), float32(79), int16(23), int32(29), int8(2), object(16), uint16(24), uint32(17), uint8(12)
memory usage: 8.9 GB


In [18]:
training.dtypes

shop_id                                  uint64
item_category_id                         uint64
item_id                                   int64
date_block_num                            uint8
month                                     uint8
year                                     uint16
item_first_block                          uint8
item_last_block                           uint8
is_first_two_blocks                      object
is_last_two_blocks                       object
item_units                              float32
item_mean_units_block                   float32
item_day_units                            int16
item_mean_units_day                     float32
item_max_units_block                      int16
item_min_units_block                      int16
item_max_units_day                        int16
item_min_units_day                         int8
item_turnover                             int32
item_mean_turnover_block                float32
item_day_turnover                       

In [11]:
training.reset_index(inplace=True)

In [12]:
cols = ['shop_id','item_id', 'date_block_num']
training.set_index(cols, inplace=True)
transactions.drop_duplicates(cols, inplace=True)
transactions.set_index(cols, inplace=True)

training = pd.merge(training, transactions['y'], on=cols, how='left', copy=False)

training.reset_index(inplace=True)

In [13]:
training['y'] = training['y'].fillna(0)

In [39]:
gc.collect()
#training.to_pickle("pickled/training_pre_catboost")
training = pd.read_pickle("pickled/training_pre_catboost")

FileNotFoundError: [Errno 2] No such file or directory: 'pickled/training_pre_catboost'

In [9]:
gc.collect()

0

In [10]:
pd.set_option('display.max_rows', 300)
for col in training.columns:
    if training.dtypes[col].kind == 'b':
        print(col, training.dtypes[col])

shop_TC bool
shop_TRK bool
shop_SEC bool
shop_shopping_center bool
shop_moscow bool


In [11]:
for m in range(1,13):
    training[str(m)] = training['month'] == m

In [12]:
x_train = training[training['date_block_num'] < 33]
y_train = x_train['y']
#x_train = x_train.drop(columns=['y'])

x_val = training[training['date_block_num'] == 33]
y_val = x_val['y']
#x_val = x_val.drop(columns=['y'])


In [13]:
del training
gc.collect()

84

In [16]:
#x_train.to_pickle("pickled/x_train")
x_train = pd.read_pickle("pickled/x_train")
#y_train.to_pickle("pickled/y_train")
y_train = pd.read_pickle("pickled/y_train")
#x_val.to_pickle("pickled/x_val")
x_val = pd.read_pickle("pickled/x_val")
#y_val.to_pickle("pickled/y_val")
y_val = pd.read_pickle("pickled/y_val")

In [19]:
x_train.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2000000 entries, 5883565 to 167468
Columns: 276 entries, shop_id to 12
dtypes: bool(17), category(2), float32(97), float64(1), int16(22), int32(29), int64(25), int8(2), object(16), uint16(30), uint32(23), uint64(3), uint8(9)
memory usage: 2.8 GB


In [10]:
pos_train_len = len(y_train[y_train != 0])
pos_train_len

887869

In [16]:
zeros_keep_indices_train = y_train[y_train == 0].sample(int(pos_train_len/4)).index
non_zeros_train_indices = y_train[y_train != 0].index
train_indices = np.append(np.array(zeros_keep_indices_train), np.array(non_zeros_train_indices))

x_train = x_train.loc[train_indices]
#x_train.drop_duplicates(['shop_id','item_id', 'date_block_num'], inplace=True)
y_train = y_train.loc[x_train.index]

In [15]:
len(non_zeros_train_indices)

887869

In [34]:
len(x_train)

7955115

In [17]:
pos_val_len = len(y_val[y_val != 0])
pos_val_len

31471

In [18]:
pos_val_len = len(y_val[y_val != 0])
zeros_keep_indices_val = y_val[y_val == 0].sample(int(pos_val_len/4)).index
non_zeros_val_indices = (y_val != 0).index
val_indices = np.append(np.array(zeros_keep_indices_val), np.array(non_zeros_val_indices))


y_val = y_val.loc[val_indices]
x_val = x_val.loc[val_indices]

In [33]:
len(x_val)

386682

In [36]:
training.columns.values

array(['shop_id', 'item_id', 'date_block_num', 'item_category_id',
       'month', 'year', 'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_day_units', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       '

In [5]:
cb_features = [
       'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       'item_blocks_between_start_and_first_activity', 'item_first_day',
       'item_last_day', 'item_activity_on_all_blocks', 'item_mean_price',
       'item_min_price', 'item_max_price', 'item_number_different_prices',
       'item_price_amplitude', 'item_deviation_mean_category_price',
       'item_first_two_blocks_units', 'item_last_two_blocks_units',
       'item_fluctuation_units_first_last_blocks',
       'item_first_two_blocks_mean_price',
       'item_last_two_blocks_mean_price',
       'item_fluctuation_price_first_last_blocks',
       'item_share_of_total_units', 'item_share_of_total_turnover',
       'category_units', 'category_mean_units_block',
       'category_mean_units_day',
       'category_max_units_block', 'category_min_units_block',
       'category_max_units_day', 'category_min_units_day',
       'category_turnover', 'category_mean_turnover_block',
       'category_mean_turnover_day',
       'category_max_turnover_block', 'category_min_turnover_block',
       'category_max_turnover_day', 'category_min_turnover_day',
       'category_mean_price', 'category_min_price', 'category_max_price',
       'category_first_two_blocks_units',
       'category_last_two_blocks_units',
       'category_fluctuation_units_first_last_blocks',
       'category_first_two_blocks_mean_price',
       'category_last_two_blocks_mean_price',
       'category_fluctuation_price_first_last_blocks',
       'video_game', 'gaming_old_gen', 'gaming_new_gen', 'pc_games',
       'payment_cards', 'movies', 'movies_niche', 'books', 'music',
       'music_CD', 'music_vinyl', 'gifts', 'software',
       'subcategory_units', 'subcategory_mean_units_block',
       'subcategory_mean_units_day',
       'subcategory_max_units_block', 'subcategory_min_units_block',
       'subcategory_max_units_day', 'subcategory_min_units_day',
       'subcategory_turnover', 'subcategory_mean_turnover_block',
       'subcategory_mean_turnover_day',
       'subcategory_max_turnover_block', 'subcategory_min_turnover_block',
       'subcategory_max_turnover_day', 'subcategory_min_turnover_day',
       'category_share_of_total_units',
       'category_share_of_total_turnover',
       'subcategory_share_of_total_units',
       'subcategory_share_of_total_turnover',
       'subcategory_first_two_blocks_units',
       'subcategory_last_two_blocks_units',
       'subcategory_fluctuation_units_first_last_blocks',
       'subcategory_first_two_blocks_mean_price',
       'subcategory_last_two_blocks_mean_price',
       'subcategory_fluctuation_price_first_last_blocks',
       'shop_units', 'shop_mean_units_block', 
       'shop_mean_units_day', 'shop_max_units_block',
       'shop_min_units_block', 'shop_max_units_day', 'shop_min_units_day',
       'shop_turnover', 'shop_mean_turnover_block', 
       'shop_mean_turnover_day', 'shop_max_turnover_block',
       'shop_min_turnover_block', 'shop_max_turnover_day',
       'shop_min_turnover_day', 'shop_mean_price',
       'shop_first_two_blocks_units', 'shop_last_two_blocks_units',
       'shop_fluctuation_units_first_last_blocks',
       'shop_first_two_blocks_mean_price',
       'shop_last_two_blocks_mean_price',
       'shop_fluctuation_price_first_last_blocks', 'shop_share_of_units',
       'shop_share_of_turnover', 'shop_TC', 'shop_TRK', 'shop_SEC',
       'shop_shopping_center', 'shop_moscow', 'max_category_units',
       'max_category_turnover', 'area_units',
       'area_mean_units_block', 'area_mean_units_day',
       'area_max_units_block', 'area_min_units_block',
       'area_max_units_day', 'area_min_units_day', 'area_turnover',
       'area_mean_turnover_block',
       'area_mean_turnover_day', 'area_max_turnover_block',
       'area_min_turnover_block', 'area_max_turnover_day',
       'area_min_turnover_day', 'area_mean_price',
       'area_first_two_blocks_units', 'area_last_two_blocks_units',
       'area_fluctuation_units_first_last_blocks',
       'area_first_two_blocks_mean_price',
       'area_last_two_blocks_mean_price',
       'area_fluctuation_price_first_last_blocks',
       'shop_category_units', 'shop_category_mean_units_block',
       'shop_category_mean_units_day',
       'shop_category_max_units_block', 'shop_category_min_units_block',
       'shop_category_max_units_day', 'shop_category_min_units_day',
       'shop_category_turnover', 'shop_category_mean_turnover_block',
       'shop_category_mean_turnover_day',
       'shop_category_max_turnover_block',
       'shop_category_min_turnover_block',
       'shop_category_max_turnover_day', 'shop_category_min_turnover_day',
       'shop_category_mean_price',
       'item_block_units_lag_1', 'item_block_turnover_lag_1',
       'item_mean_price_block_lag_1', 'item_block_units_lag_2',
       'item_block_turnover_lag_2', 'item_mean_price_block_lag_2',
       'item_block_units_lag_3', 'item_block_turnover_lag_3',
       'item_mean_price_block_lag_3', 'category_block_units_lag_1',
       'category_block_turnover_lag_1', 'category_mean_price_block_lag_1',
       'subcategory_block_units_lag_1',
       'subcategory_block_turnover_lag_1',
       'subcategory_mean_price_block_lag_1', 'category_block_units_lag_2',
       'category_block_turnover_lag_2', 'category_mean_price_block_lag_2',
       'subcategory_block_units_lag_2',
       'subcategory_block_turnover_lag_2',
       'subcategory_mean_price_block_lag_2', 'category_block_units_lag_3',
       'category_block_turnover_lag_3', 'category_mean_price_block_lag_3',
       'subcategory_block_units_lag_3',
       'subcategory_block_turnover_lag_3',
       'subcategory_mean_price_block_lag_3', 'shop_block_units_lag_1',
       'shop_block_turnover_lag_1', 'shop_mean_price_block_lag_1',
       'area_block_units_lag_1', 'area_block_turnover_lag_1',
       'area_mean_price_block_lag_1', 'shop_block_units_lag_2',
       'shop_block_turnover_lag_2', 'shop_mean_price_block_lag_2',
       'area_block_units_lag_2', 'area_block_turnover_lag_2',
       'area_mean_price_block_lag_2', 'shop_block_units_lag_3',
       'shop_block_turnover_lag_3', 'shop_mean_price_block_lag_3',
       'area_block_units_lag_3', 'area_block_turnover_lag_3',
       'area_mean_price_block_lag_3',
       '1', '2', '3', '4', '5', '6','7', '8', '9', '10', '11', '12']


In [36]:
cb_model = CatBoostRegressor(iterations=70000,
                             learning_rate=0.001,
                             eval_metric='RMSE',
                             #thread_count=16,
                             task_type = "GPU",
                             use_best_model=True,
                             #l2_leaf_reg = 1000,
                             od_type = "Iter",
                             od_wait = 30,
                             #random_strength = 10,
                             #bagging_temperature = 1,
                             #one_hot_max_size = 2,
                             random_seed = 42)

#drops = ['subcategory','area']
#x_train = x_train.drop(columns=drops)
#x_val = x_val.drop(columns=drops)


cb_model.fit(x_train[cb_features], y_train, #cat_features=categorical_features_indices,
             eval_set=(x_val[cb_features],y_val),
             #cat_features=categorical_features_pos,         
             verbose=True)

0:	learn: 1.0781136	test: 0.9142845	best: 0.9142845 (0)	total: 108ms	remaining: 2h 5m 54s
1:	learn: 1.0777921	test: 0.9140996	best: 0.9140996 (1)	total: 202ms	remaining: 1h 57m 36s
2:	learn: 1.0774676	test: 0.9139167	best: 0.9139167 (2)	total: 295ms	remaining: 1h 54m 46s
3:	learn: 1.0771447	test: 0.9137301	best: 0.9137301 (3)	total: 385ms	remaining: 1h 52m 15s
4:	learn: 1.0768218	test: 0.9135452	best: 0.9135452 (4)	total: 475ms	remaining: 1h 50m 42s
5:	learn: 1.0764999	test: 0.9133587	best: 0.9133587 (5)	total: 563ms	remaining: 1h 49m 31s
6:	learn: 1.0761791	test: 0.9131781	best: 0.9131781 (6)	total: 654ms	remaining: 1h 48m 54s
7:	learn: 1.0758570	test: 0.9129922	best: 0.9129922 (7)	total: 740ms	remaining: 1h 47m 52s
8:	learn: 1.0755377	test: 0.9128068	best: 0.9128068 (8)	total: 829ms	remaining: 1h 47m 30s
9:	learn: 1.0752194	test: 0.9126220	best: 0.9126220 (9)	total: 918ms	remaining: 1h 47m 6s
10:	learn: 1.0749000	test: 0.9124485	best: 0.9124485 (10)	total: 1.01s	remaining: 1h 47m 14s

90:	learn: 1.0508532	test: 0.8989970	best: 0.8989970 (90)	total: 8.37s	remaining: 1h 47m 8s
91:	learn: 1.0505649	test: 0.8988018	best: 0.8988018 (91)	total: 8.46s	remaining: 1h 47m 5s
92:	learn: 1.0502832	test: 0.8986667	best: 0.8986667 (92)	total: 8.54s	remaining: 1h 47m 3s
93:	learn: 1.0500031	test: 0.8985121	best: 0.8985121 (93)	total: 8.64s	remaining: 1h 47m 4s
94:	learn: 1.0497204	test: 0.8983555	best: 0.8983555 (94)	total: 8.73s	remaining: 1h 47m 3s
95:	learn: 1.0494385	test: 0.8982115	best: 0.8982115 (95)	total: 8.82s	remaining: 1h 47m 6s
96:	learn: 1.0491600	test: 0.8980586	best: 0.8980586 (96)	total: 8.92s	remaining: 1h 47m 7s
97:	learn: 1.0488807	test: 0.8979110	best: 0.8979110 (97)	total: 9.01s	remaining: 1h 47m 8s
98:	learn: 1.0486025	test: 0.8977558	best: 0.8977558 (98)	total: 9.11s	remaining: 1h 47m 10s
99:	learn: 1.0483195	test: 0.8976096	best: 0.8976096 (99)	total: 9.2s	remaining: 1h 47m 10s
100:	learn: 1.0480384	test: 0.8974666	best: 0.8974666 (100)	total: 9.29s	remain

177:	learn: 1.0276120	test: 0.8860299	best: 0.8860299 (177)	total: 16.3s	remaining: 1h 46m 36s
178:	learn: 1.0273571	test: 0.8858596	best: 0.8858596 (178)	total: 16.4s	remaining: 1h 46m 37s
179:	learn: 1.0271042	test: 0.8857313	best: 0.8857313 (179)	total: 16.5s	remaining: 1h 46m 38s
180:	learn: 1.0268530	test: 0.8855922	best: 0.8855922 (180)	total: 16.6s	remaining: 1h 46m 35s
181:	learn: 1.0266043	test: 0.8854697	best: 0.8854697 (181)	total: 16.7s	remaining: 1h 46m 34s
182:	learn: 1.0263529	test: 0.8853420	best: 0.8853420 (182)	total: 16.8s	remaining: 1h 46m 34s
183:	learn: 1.0261008	test: 0.8852051	best: 0.8852051 (183)	total: 16.8s	remaining: 1h 46m 32s
184:	learn: 1.0258532	test: 0.8850829	best: 0.8850829 (184)	total: 16.9s	remaining: 1h 46m 32s
185:	learn: 1.0256048	test: 0.8849473	best: 0.8849473 (185)	total: 17s	remaining: 1h 46m 37s
186:	learn: 1.0253607	test: 0.8848319	best: 0.8848319 (186)	total: 17.1s	remaining: 1h 46m 39s
187:	learn: 1.0251147	test: 0.8847181	best: 0.884718

264:	learn: 1.0070584	test: 0.8748710	best: 0.8748710 (264)	total: 24.3s	remaining: 1h 46m 35s
265:	learn: 1.0068355	test: 0.8747514	best: 0.8747514 (265)	total: 24.4s	remaining: 1h 46m 33s
266:	learn: 1.0066078	test: 0.8746032	best: 0.8746032 (266)	total: 24.5s	remaining: 1h 46m 33s
267:	learn: 1.0063911	test: 0.8744995	best: 0.8744995 (267)	total: 24.6s	remaining: 1h 46m 34s
268:	learn: 1.0061723	test: 0.8743986	best: 0.8743986 (268)	total: 24.7s	remaining: 1h 46m 34s
269:	learn: 1.0059555	test: 0.8742859	best: 0.8742859 (269)	total: 24.8s	remaining: 1h 46m 35s
270:	learn: 1.0057372	test: 0.8741560	best: 0.8741560 (270)	total: 24.9s	remaining: 1h 46m 39s
271:	learn: 1.0055159	test: 0.8740474	best: 0.8740474 (271)	total: 25s	remaining: 1h 46m 40s
272:	learn: 1.0052956	test: 0.8739301	best: 0.8739301 (272)	total: 25.1s	remaining: 1h 46m 41s
273:	learn: 1.0050735	test: 0.8738047	best: 0.8738047 (273)	total: 25.2s	remaining: 1h 46m 45s
274:	learn: 1.0048500	test: 0.8736565	best: 0.873656

351:	learn: 0.9888730	test: 0.8652040	best: 0.8652040 (351)	total: 32.8s	remaining: 1h 48m 16s
352:	learn: 0.9886807	test: 0.8651174	best: 0.8651174 (352)	total: 32.9s	remaining: 1h 48m 18s
353:	learn: 0.9884901	test: 0.8650120	best: 0.8650120 (353)	total: 33s	remaining: 1h 48m 19s
354:	learn: 0.9882960	test: 0.8649268	best: 0.8649268 (354)	total: 33.1s	remaining: 1h 48m 19s
355:	learn: 0.9881010	test: 0.8648180	best: 0.8648180 (355)	total: 33.2s	remaining: 1h 48m 22s
356:	learn: 0.9879048	test: 0.8647087	best: 0.8647087 (356)	total: 33.3s	remaining: 1h 48m 22s
357:	learn: 0.9877116	test: 0.8646018	best: 0.8646018 (357)	total: 33.4s	remaining: 1h 48m 23s
358:	learn: 0.9875151	test: 0.8645008	best: 0.8645008 (358)	total: 33.5s	remaining: 1h 48m 23s
359:	learn: 0.9873217	test: 0.8644029	best: 0.8644029 (359)	total: 33.6s	remaining: 1h 48m 26s
360:	learn: 0.9871298	test: 0.8643165	best: 0.8643165 (360)	total: 33.7s	remaining: 1h 48m 27s
361:	learn: 0.9869350	test: 0.8642123	best: 0.864212

438:	learn: 0.9727306	test: 0.8570725	best: 0.8570725 (438)	total: 41.7s	remaining: 1h 50m 14s
439:	learn: 0.9725576	test: 0.8569948	best: 0.8569948 (439)	total: 41.9s	remaining: 1h 50m 17s
440:	learn: 0.9723873	test: 0.8569220	best: 0.8569220 (440)	total: 42s	remaining: 1h 50m 17s
441:	learn: 0.9722154	test: 0.8568481	best: 0.8568481 (441)	total: 42.1s	remaining: 1h 50m 18s
442:	learn: 0.9720448	test: 0.8567735	best: 0.8567735 (442)	total: 42.1s	remaining: 1h 50m 17s
443:	learn: 0.9718751	test: 0.8566855	best: 0.8566855 (443)	total: 42.2s	remaining: 1h 50m 18s
444:	learn: 0.9716990	test: 0.8566044	best: 0.8566044 (444)	total: 42.4s	remaining: 1h 50m 20s
445:	learn: 0.9715254	test: 0.8565170	best: 0.8565170 (445)	total: 42.5s	remaining: 1h 50m 22s
446:	learn: 0.9713548	test: 0.8564443	best: 0.8564443 (446)	total: 42.6s	remaining: 1h 50m 22s
447:	learn: 0.9711812	test: 0.8563531	best: 0.8563531 (447)	total: 42.7s	remaining: 1h 50m 24s
448:	learn: 0.9710103	test: 0.8562586	best: 0.856258

526:	learn: 0.9582814	test: 0.8498501	best: 0.8498501 (526)	total: 50.9s	remaining: 1h 51m 49s
527:	learn: 0.9581220	test: 0.8497722	best: 0.8497722 (527)	total: 51s	remaining: 1h 51m 51s
528:	learn: 0.9579686	test: 0.8497067	best: 0.8497067 (528)	total: 51.1s	remaining: 1h 51m 53s
529:	learn: 0.9578104	test: 0.8496326	best: 0.8496326 (529)	total: 51.2s	remaining: 1h 51m 56s
530:	learn: 0.9576569	test: 0.8495600	best: 0.8495600 (530)	total: 51.3s	remaining: 1h 51m 57s
531:	learn: 0.9575017	test: 0.8494764	best: 0.8494764 (531)	total: 51.4s	remaining: 1h 51m 57s
532:	learn: 0.9573474	test: 0.8493843	best: 0.8493843 (532)	total: 51.5s	remaining: 1h 51m 57s
533:	learn: 0.9571907	test: 0.8493144	best: 0.8493144 (533)	total: 51.7s	remaining: 1h 51m 59s
534:	learn: 0.9570366	test: 0.8492397	best: 0.8492397 (534)	total: 51.8s	remaining: 1h 52m
535:	learn: 0.9568792	test: 0.8491630	best: 0.8491630 (535)	total: 51.9s	remaining: 1h 52m 3s
536:	learn: 0.9567224	test: 0.8490896	best: 0.8490896 (53

613:	learn: 0.9456363	test: 0.8436142	best: 0.8436142 (613)	total: 59.9s	remaining: 1h 52m 43s
614:	learn: 0.9455015	test: 0.8435524	best: 0.8435524 (614)	total: 59.9s	remaining: 1h 52m 43s
615:	learn: 0.9453621	test: 0.8434839	best: 0.8434839 (615)	total: 1m	remaining: 1h 52m 43s
616:	learn: 0.9452274	test: 0.8434181	best: 0.8434181 (616)	total: 1m	remaining: 1h 52m 42s
617:	learn: 0.9450905	test: 0.8433516	best: 0.8433516 (617)	total: 1m	remaining: 1h 52m 43s
618:	learn: 0.9449497	test: 0.8432895	best: 0.8432895 (618)	total: 1m	remaining: 1h 52m 45s
619:	learn: 0.9448129	test: 0.8432156	best: 0.8432156 (619)	total: 1m	remaining: 1h 52m 45s
620:	learn: 0.9446785	test: 0.8431553	best: 0.8431553 (620)	total: 1m	remaining: 1h 52m 46s
621:	learn: 0.9445461	test: 0.8430778	best: 0.8430778 (621)	total: 1m	remaining: 1h 52m 46s
622:	learn: 0.9444109	test: 0.8430171	best: 0.8430171 (622)	total: 1m	remaining: 1h 52m 46s
623:	learn: 0.9442795	test: 0.8429655	best: 0.8429655 (623)	total: 1m	rema

700:	learn: 0.9343384	test: 0.8379658	best: 0.8379658 (700)	total: 1m 8s	remaining: 1h 53m 10s
701:	learn: 0.9342172	test: 0.8379091	best: 0.8379091 (701)	total: 1m 8s	remaining: 1h 53m 10s
702:	learn: 0.9340936	test: 0.8378208	best: 0.8378208 (702)	total: 1m 8s	remaining: 1h 53m 10s
703:	learn: 0.9339717	test: 0.8377679	best: 0.8377679 (703)	total: 1m 8s	remaining: 1h 53m 11s
704:	learn: 0.9338480	test: 0.8377015	best: 0.8377015 (704)	total: 1m 9s	remaining: 1h 53m 11s
705:	learn: 0.9337259	test: 0.8376382	best: 0.8376382 (705)	total: 1m 9s	remaining: 1h 53m 10s
706:	learn: 0.9335976	test: 0.8375628	best: 0.8375628 (706)	total: 1m 9s	remaining: 1h 53m 11s
707:	learn: 0.9334727	test: 0.8375118	best: 0.8375118 (707)	total: 1m 9s	remaining: 1h 53m 12s
708:	learn: 0.9333559	test: 0.8374696	best: 0.8374696 (708)	total: 1m 9s	remaining: 1h 53m 12s
709:	learn: 0.9332333	test: 0.8374085	best: 0.8374085 (709)	total: 1m 9s	remaining: 1h 53m 12s
710:	learn: 0.9331156	test: 0.8373497	best: 0.8373

787:	learn: 0.9242282	test: 0.8325693	best: 0.8325693 (787)	total: 1m 17s	remaining: 1h 53m 34s
788:	learn: 0.9241126	test: 0.8325015	best: 0.8325015 (788)	total: 1m 17s	remaining: 1h 53m 35s
789:	learn: 0.9240022	test: 0.8324213	best: 0.8324213 (789)	total: 1m 17s	remaining: 1h 53m 35s
790:	learn: 0.9238955	test: 0.8323677	best: 0.8323677 (790)	total: 1m 17s	remaining: 1h 53m 34s
791:	learn: 0.9237903	test: 0.8323187	best: 0.8323187 (791)	total: 1m 17s	remaining: 1h 53m 33s
792:	learn: 0.9236782	test: 0.8322736	best: 0.8322736 (792)	total: 1m 18s	remaining: 1h 53m 33s
793:	learn: 0.9235691	test: 0.8322155	best: 0.8322155 (793)	total: 1m 18s	remaining: 1h 53m 33s
794:	learn: 0.9234555	test: 0.8321723	best: 0.8321723 (794)	total: 1m 18s	remaining: 1h 53m 33s
795:	learn: 0.9233510	test: 0.8321245	best: 0.8321245 (795)	total: 1m 18s	remaining: 1h 53m 32s
796:	learn: 0.9232425	test: 0.8320805	best: 0.8320805 (796)	total: 1m 18s	remaining: 1h 53m 33s
797:	learn: 0.9231325	test: 0.8320236	be

875:	learn: 0.9149914	test: 0.8275996	best: 0.8275996 (875)	total: 1m 26s	remaining: 1h 53m 49s
876:	learn: 0.9148898	test: 0.8275501	best: 0.8275501 (876)	total: 1m 26s	remaining: 1h 53m 49s
877:	learn: 0.9147873	test: 0.8274962	best: 0.8274962 (877)	total: 1m 26s	remaining: 1h 53m 50s
878:	learn: 0.9146842	test: 0.8274435	best: 0.8274435 (878)	total: 1m 26s	remaining: 1h 53m 50s
879:	learn: 0.9145906	test: 0.8274003	best: 0.8274003 (879)	total: 1m 26s	remaining: 1h 53m 51s
880:	learn: 0.9144930	test: 0.8273498	best: 0.8273498 (880)	total: 1m 27s	remaining: 1h 53m 51s
881:	learn: 0.9143949	test: 0.8273062	best: 0.8273062 (881)	total: 1m 27s	remaining: 1h 53m 52s
882:	learn: 0.9142930	test: 0.8272465	best: 0.8272465 (882)	total: 1m 27s	remaining: 1h 53m 53s
883:	learn: 0.9141900	test: 0.8271940	best: 0.8271940 (883)	total: 1m 27s	remaining: 1h 53m 53s
884:	learn: 0.9140928	test: 0.8271444	best: 0.8271444 (884)	total: 1m 27s	remaining: 1h 53m 53s
885:	learn: 0.9139965	test: 0.8270957	be

961:	learn: 0.9069072	test: 0.8232370	best: 0.8232370 (961)	total: 1m 35s	remaining: 1h 53m 48s
962:	learn: 0.9068074	test: 0.8231888	best: 0.8231888 (962)	total: 1m 35s	remaining: 1h 53m 48s
963:	learn: 0.9067151	test: 0.8231354	best: 0.8231354 (963)	total: 1m 35s	remaining: 1h 53m 48s
964:	learn: 0.9066293	test: 0.8230932	best: 0.8230932 (964)	total: 1m 35s	remaining: 1h 53m 49s
965:	learn: 0.9065374	test: 0.8230496	best: 0.8230496 (965)	total: 1m 35s	remaining: 1h 53m 49s
966:	learn: 0.9064449	test: 0.8229904	best: 0.8229904 (966)	total: 1m 35s	remaining: 1h 53m 49s
967:	learn: 0.9063548	test: 0.8229433	best: 0.8229433 (967)	total: 1m 35s	remaining: 1h 53m 50s
968:	learn: 0.9062606	test: 0.8228927	best: 0.8228927 (968)	total: 1m 35s	remaining: 1h 53m 50s
969:	learn: 0.9061730	test: 0.8228496	best: 0.8228496 (969)	total: 1m 35s	remaining: 1h 53m 49s
970:	learn: 0.9060847	test: 0.8227735	best: 0.8227735 (970)	total: 1m 36s	remaining: 1h 53m 49s
971:	learn: 0.9060019	test: 0.8227419	be

1046:	learn: 0.8995913	test: 0.8193564	best: 0.8193564 (1046)	total: 1m 43s	remaining: 1h 53m 53s
1047:	learn: 0.8995066	test: 0.8193056	best: 0.8193056 (1047)	total: 1m 43s	remaining: 1h 53m 54s
1048:	learn: 0.8994215	test: 0.8192508	best: 0.8192508 (1048)	total: 1m 43s	remaining: 1h 53m 53s
1049:	learn: 0.8993425	test: 0.8192056	best: 0.8192056 (1049)	total: 1m 44s	remaining: 1h 53m 54s
1050:	learn: 0.8992652	test: 0.8191544	best: 0.8191544 (1050)	total: 1m 44s	remaining: 1h 53m 53s
1051:	learn: 0.8991837	test: 0.8191125	best: 0.8191125 (1051)	total: 1m 44s	remaining: 1h 53m 53s
1052:	learn: 0.8991049	test: 0.8190753	best: 0.8190753 (1052)	total: 1m 44s	remaining: 1h 53m 52s
1053:	learn: 0.8990249	test: 0.8190068	best: 0.8190068 (1053)	total: 1m 44s	remaining: 1h 53m 53s
1054:	learn: 0.8989400	test: 0.8189601	best: 0.8189601 (1054)	total: 1m 44s	remaining: 1h 53m 53s
1055:	learn: 0.8988619	test: 0.8189145	best: 0.8189145 (1055)	total: 1m 44s	remaining: 1h 53m 53s
1056:	learn: 0.89878

1130:	learn: 0.8931020	test: 0.8158729	best: 0.8158729 (1130)	total: 1m 52s	remaining: 1h 53m 54s
1131:	learn: 0.8930245	test: 0.8158328	best: 0.8158328 (1131)	total: 1m 52s	remaining: 1h 53m 54s
1132:	learn: 0.8929533	test: 0.8157839	best: 0.8157839 (1132)	total: 1m 52s	remaining: 1h 53m 54s
1133:	learn: 0.8928809	test: 0.8157448	best: 0.8157448 (1133)	total: 1m 52s	remaining: 1h 53m 54s
1134:	learn: 0.8928038	test: 0.8157043	best: 0.8157043 (1134)	total: 1m 52s	remaining: 1h 53m 54s
1135:	learn: 0.8927315	test: 0.8156365	best: 0.8156365 (1135)	total: 1m 52s	remaining: 1h 53m 55s
1136:	learn: 0.8926637	test: 0.8156099	best: 0.8156099 (1136)	total: 1m 52s	remaining: 1h 53m 54s
1137:	learn: 0.8925901	test: 0.8155799	best: 0.8155799 (1137)	total: 1m 52s	remaining: 1h 53m 54s
1138:	learn: 0.8925216	test: 0.8155439	best: 0.8155439 (1138)	total: 1m 53s	remaining: 1h 53m 54s
1139:	learn: 0.8924490	test: 0.8155062	best: 0.8155062 (1139)	total: 1m 53s	remaining: 1h 53m 53s
1140:	learn: 0.89236

1215:	learn: 0.8871410	test: 0.8125510	best: 0.8125510 (1215)	total: 2m	remaining: 1h 53m 57s
1216:	learn: 0.8870678	test: 0.8125073	best: 0.8125073 (1216)	total: 2m	remaining: 1h 53m 57s
1217:	learn: 0.8869999	test: 0.8124745	best: 0.8124745 (1217)	total: 2m 1s	remaining: 1h 53m 57s
1218:	learn: 0.8869357	test: 0.8124503	best: 0.8124503 (1218)	total: 2m 1s	remaining: 1h 53m 58s
1219:	learn: 0.8868707	test: 0.8124159	best: 0.8124159 (1219)	total: 2m 1s	remaining: 1h 53m 58s
1220:	learn: 0.8868042	test: 0.8123543	best: 0.8123543 (1220)	total: 2m 1s	remaining: 1h 53m 57s
1221:	learn: 0.8867400	test: 0.8123199	best: 0.8123199 (1221)	total: 2m 1s	remaining: 1h 53m 57s
1222:	learn: 0.8866742	test: 0.8122961	best: 0.8122961 (1222)	total: 2m 1s	remaining: 1h 53m 57s
1223:	learn: 0.8866087	test: 0.8122380	best: 0.8122380 (1223)	total: 2m 1s	remaining: 1h 53m 57s
1224:	learn: 0.8865427	test: 0.8122070	best: 0.8122070 (1224)	total: 2m 1s	remaining: 1h 53m 57s
1225:	learn: 0.8864799	test: 0.81218

1302:	learn: 0.8815832	test: 0.8097246	best: 0.8097246 (1302)	total: 2m 9s	remaining: 1h 54m 2s
1303:	learn: 0.8815231	test: 0.8096932	best: 0.8096932 (1303)	total: 2m 9s	remaining: 1h 54m 1s
1304:	learn: 0.8814671	test: 0.8096699	best: 0.8096699 (1304)	total: 2m 9s	remaining: 1h 54m 1s
1305:	learn: 0.8814056	test: 0.8096399	best: 0.8096399 (1305)	total: 2m 10s	remaining: 1h 54m 1s
1306:	learn: 0.8813438	test: 0.8096180	best: 0.8096180 (1306)	total: 2m 10s	remaining: 1h 54m 1s
1307:	learn: 0.8812845	test: 0.8095910	best: 0.8095910 (1307)	total: 2m 10s	remaining: 1h 54m 1s
1308:	learn: 0.8812267	test: 0.8095634	best: 0.8095634 (1308)	total: 2m 10s	remaining: 1h 54m 1s
1309:	learn: 0.8811683	test: 0.8095329	best: 0.8095329 (1309)	total: 2m 10s	remaining: 1h 54m
1310:	learn: 0.8811117	test: 0.8095212	best: 0.8095212 (1310)	total: 2m 10s	remaining: 1h 54m 1s
1311:	learn: 0.8810518	test: 0.8094908	best: 0.8094908 (1311)	total: 2m 10s	remaining: 1h 54m 1s
1312:	learn: 0.8809965	test: 0.80946

1389:	learn: 0.8765442	test: 0.8069983	best: 0.8069983 (1389)	total: 2m 18s	remaining: 1h 54m 16s
1390:	learn: 0.8764916	test: 0.8069758	best: 0.8069758 (1390)	total: 2m 19s	remaining: 1h 54m 15s
1391:	learn: 0.8764362	test: 0.8069494	best: 0.8069494 (1391)	total: 2m 19s	remaining: 1h 54m 16s
1392:	learn: 0.8763850	test: 0.8069224	best: 0.8069224 (1392)	total: 2m 19s	remaining: 1h 54m 16s
1393:	learn: 0.8763331	test: 0.8068962	best: 0.8068962 (1393)	total: 2m 19s	remaining: 1h 54m 15s
1394:	learn: 0.8762816	test: 0.8068861	best: 0.8068861 (1394)	total: 2m 19s	remaining: 1h 54m 16s
1395:	learn: 0.8762269	test: 0.8068601	best: 0.8068601 (1395)	total: 2m 19s	remaining: 1h 54m 15s
1396:	learn: 0.8761766	test: 0.8068222	best: 0.8068222 (1396)	total: 2m 19s	remaining: 1h 54m 15s
1397:	learn: 0.8761248	test: 0.8067927	best: 0.8067927 (1397)	total: 2m 19s	remaining: 1h 54m 14s
1398:	learn: 0.8760732	test: 0.8067660	best: 0.8067660 (1398)	total: 2m 19s	remaining: 1h 54m 13s
1399:	learn: 0.87601

1474:	learn: 0.8720624	test: 0.8048339	best: 0.8048339 (1474)	total: 2m 27s	remaining: 1h 54m 7s
1475:	learn: 0.8720157	test: 0.8048177	best: 0.8048177 (1475)	total: 2m 27s	remaining: 1h 54m 7s
1476:	learn: 0.8719683	test: 0.8047840	best: 0.8047840 (1476)	total: 2m 27s	remaining: 1h 54m 8s
1477:	learn: 0.8719208	test: 0.8047579	best: 0.8047579 (1477)	total: 2m 27s	remaining: 1h 54m 7s
1478:	learn: 0.8718672	test: 0.8047268	best: 0.8047268 (1478)	total: 2m 27s	remaining: 1h 54m 7s
1479:	learn: 0.8718157	test: 0.8047020	best: 0.8047020 (1479)	total: 2m 27s	remaining: 1h 54m 7s
1480:	learn: 0.8717669	test: 0.8046884	best: 0.8046884 (1480)	total: 2m 27s	remaining: 1h 54m 7s
1481:	learn: 0.8717157	test: 0.8046706	best: 0.8046706 (1481)	total: 2m 28s	remaining: 1h 54m 7s
1482:	learn: 0.8716683	test: 0.8046485	best: 0.8046485 (1482)	total: 2m 28s	remaining: 1h 54m 6s
1483:	learn: 0.8716173	test: 0.8046313	best: 0.8046313 (1483)	total: 2m 28s	remaining: 1h 54m 7s
1484:	learn: 0.8715619	test: 0

1560:	learn: 0.8678965	test: 0.8027480	best: 0.8027480 (1560)	total: 2m 35s	remaining: 1h 53m 58s
1561:	learn: 0.8678492	test: 0.8027236	best: 0.8027236 (1561)	total: 2m 36s	remaining: 1h 53m 58s
1562:	learn: 0.8678074	test: 0.8027142	best: 0.8027142 (1562)	total: 2m 36s	remaining: 1h 53m 58s
1563:	learn: 0.8677593	test: 0.8026793	best: 0.8026793 (1563)	total: 2m 36s	remaining: 1h 53m 57s
1564:	learn: 0.8677164	test: 0.8026650	best: 0.8026650 (1564)	total: 2m 36s	remaining: 1h 53m 57s
1565:	learn: 0.8676702	test: 0.8026421	best: 0.8026421 (1565)	total: 2m 36s	remaining: 1h 53m 57s
1566:	learn: 0.8676267	test: 0.8026242	best: 0.8026242 (1566)	total: 2m 36s	remaining: 1h 53m 56s
1567:	learn: 0.8675791	test: 0.8026053	best: 0.8026053 (1567)	total: 2m 36s	remaining: 1h 53m 56s
1568:	learn: 0.8675324	test: 0.8025832	best: 0.8025832 (1568)	total: 2m 36s	remaining: 1h 53m 56s
1569:	learn: 0.8674793	test: 0.8025618	best: 0.8025618 (1569)	total: 2m 36s	remaining: 1h 53m 56s
1570:	learn: 0.86743

1646:	learn: 0.8640847	test: 0.8010000	best: 0.8010000 (1646)	total: 2m 44s	remaining: 1h 53m 46s
1647:	learn: 0.8640407	test: 0.8009832	best: 0.8009832 (1647)	total: 2m 44s	remaining: 1h 53m 47s
1648:	learn: 0.8640016	test: 0.8009724	best: 0.8009724 (1648)	total: 2m 44s	remaining: 1h 53m 47s
1649:	learn: 0.8639585	test: 0.8009533	best: 0.8009533 (1649)	total: 2m 44s	remaining: 1h 53m 47s
1650:	learn: 0.8639176	test: 0.8009406	best: 0.8009406 (1650)	total: 2m 44s	remaining: 1h 53m 47s
1651:	learn: 0.8638765	test: 0.8009145	best: 0.8009145 (1651)	total: 2m 45s	remaining: 1h 53m 47s
1652:	learn: 0.8638342	test: 0.8008967	best: 0.8008967 (1652)	total: 2m 45s	remaining: 1h 53m 47s
1653:	learn: 0.8637908	test: 0.8008783	best: 0.8008783 (1653)	total: 2m 45s	remaining: 1h 53m 47s
1654:	learn: 0.8637507	test: 0.8008659	best: 0.8008659 (1654)	total: 2m 45s	remaining: 1h 53m 47s
1655:	learn: 0.8637049	test: 0.8008379	best: 0.8008379 (1655)	total: 2m 45s	remaining: 1h 53m 47s
1656:	learn: 0.86366

1731:	learn: 0.8606101	test: 0.7995558	best: 0.7995558 (1731)	total: 2m 53s	remaining: 1h 53m 43s
1732:	learn: 0.8605728	test: 0.7995472	best: 0.7995472 (1732)	total: 2m 53s	remaining: 1h 53m 43s
1733:	learn: 0.8605365	test: 0.7995342	best: 0.7995342 (1733)	total: 2m 53s	remaining: 1h 53m 43s
1734:	learn: 0.8604957	test: 0.7995073	best: 0.7995073 (1734)	total: 2m 53s	remaining: 1h 53m 43s
1735:	learn: 0.8604544	test: 0.7994928	best: 0.7994928 (1735)	total: 2m 53s	remaining: 1h 53m 43s
1736:	learn: 0.8604188	test: 0.7994632	best: 0.7994632 (1736)	total: 2m 53s	remaining: 1h 53m 43s
1737:	learn: 0.8603778	test: 0.7994431	best: 0.7994431 (1737)	total: 2m 53s	remaining: 1h 53m 44s
1738:	learn: 0.8603327	test: 0.7994211	best: 0.7994211 (1738)	total: 2m 53s	remaining: 1h 53m 45s
1739:	learn: 0.8602960	test: 0.7994177	best: 0.7994177 (1739)	total: 2m 53s	remaining: 1h 53m 45s
1740:	learn: 0.8602565	test: 0.7994005	best: 0.7994005 (1740)	total: 2m 54s	remaining: 1h 53m 45s
1741:	learn: 0.86021

1816:	learn: 0.8574075	test: 0.7981340	best: 0.7981340 (1816)	total: 3m 2s	remaining: 1h 53m 49s
1817:	learn: 0.8573735	test: 0.7981183	best: 0.7981183 (1817)	total: 3m 2s	remaining: 1h 53m 49s
1818:	learn: 0.8573394	test: 0.7981050	best: 0.7981050 (1818)	total: 3m 2s	remaining: 1h 53m 48s
1819:	learn: 0.8573076	test: 0.7980878	best: 0.7980878 (1819)	total: 3m 2s	remaining: 1h 53m 48s
1820:	learn: 0.8572743	test: 0.7980720	best: 0.7980720 (1820)	total: 3m 2s	remaining: 1h 53m 48s
1821:	learn: 0.8572411	test: 0.7980657	best: 0.7980657 (1821)	total: 3m 2s	remaining: 1h 53m 48s
1822:	learn: 0.8572090	test: 0.7980610	best: 0.7980610 (1822)	total: 3m 2s	remaining: 1h 53m 48s
1823:	learn: 0.8571771	test: 0.7980567	best: 0.7980567 (1823)	total: 3m 2s	remaining: 1h 53m 47s
1824:	learn: 0.8571453	test: 0.7980514	best: 0.7980514 (1824)	total: 3m 2s	remaining: 1h 53m 47s
1825:	learn: 0.8571127	test: 0.7980396	best: 0.7980396 (1825)	total: 3m 2s	remaining: 1h 53m 46s
1826:	learn: 0.8570795	test: 0

1901:	learn: 0.8544551	test: 0.7970093	best: 0.7970093 (1901)	total: 3m 10s	remaining: 1h 53m 53s
1902:	learn: 0.8544235	test: 0.7969966	best: 0.7969966 (1902)	total: 3m 10s	remaining: 1h 53m 53s
1903:	learn: 0.8543921	test: 0.7969838	best: 0.7969838 (1903)	total: 3m 11s	remaining: 1h 53m 52s
1904:	learn: 0.8543546	test: 0.7969633	best: 0.7969633 (1904)	total: 3m 11s	remaining: 1h 53m 53s
1905:	learn: 0.8543241	test: 0.7969622	best: 0.7969622 (1905)	total: 3m 11s	remaining: 1h 53m 52s
1906:	learn: 0.8542946	test: 0.7969576	best: 0.7969576 (1906)	total: 3m 11s	remaining: 1h 53m 52s
1907:	learn: 0.8542622	test: 0.7969331	best: 0.7969331 (1907)	total: 3m 11s	remaining: 1h 53m 52s
1908:	learn: 0.8542210	test: 0.7969167	best: 0.7969167 (1908)	total: 3m 11s	remaining: 1h 53m 52s
1909:	learn: 0.8541852	test: 0.7969005	best: 0.7969005 (1909)	total: 3m 11s	remaining: 1h 53m 52s
1910:	learn: 0.8541562	test: 0.7968950	best: 0.7968950 (1910)	total: 3m 11s	remaining: 1h 53m 52s
1911:	learn: 0.85412

1985:	learn: 0.8517413	test: 0.7959101	best: 0.7959101 (1985)	total: 3m 19s	remaining: 1h 53m 52s
1986:	learn: 0.8517114	test: 0.7959034	best: 0.7959034 (1986)	total: 3m 19s	remaining: 1h 53m 53s
1987:	learn: 0.8516775	test: 0.7958856	best: 0.7958856 (1987)	total: 3m 19s	remaining: 1h 53m 53s
1988:	learn: 0.8516447	test: 0.7958723	best: 0.7958723 (1988)	total: 3m 19s	remaining: 1h 53m 54s
1989:	learn: 0.8516145	test: 0.7958551	best: 0.7958551 (1989)	total: 3m 19s	remaining: 1h 53m 53s
1990:	learn: 0.8515756	test: 0.7958459	best: 0.7958459 (1990)	total: 3m 20s	remaining: 1h 53m 54s
1991:	learn: 0.8515489	test: 0.7958418	best: 0.7958418 (1991)	total: 3m 20s	remaining: 1h 53m 53s
1992:	learn: 0.8515196	test: 0.7957772	best: 0.7957772 (1992)	total: 3m 20s	remaining: 1h 53m 53s
1993:	learn: 0.8514849	test: 0.7957599	best: 0.7957599 (1993)	total: 3m 20s	remaining: 1h 53m 53s
1994:	learn: 0.8514562	test: 0.7957528	best: 0.7957528 (1994)	total: 3m 20s	remaining: 1h 53m 53s
1995:	learn: 0.85142

2069:	learn: 0.8492315	test: 0.7949585	best: 0.7949585 (2069)	total: 3m 28s	remaining: 1h 53m 56s
2070:	learn: 0.8492022	test: 0.7949503	best: 0.7949503 (2070)	total: 3m 28s	remaining: 1h 53m 57s
2071:	learn: 0.8491692	test: 0.7949382	best: 0.7949382 (2071)	total: 3m 28s	remaining: 1h 53m 57s
2072:	learn: 0.8491272	test: 0.7949175	best: 0.7949175 (2072)	total: 3m 28s	remaining: 1h 53m 58s
2073:	learn: 0.8490985	test: 0.7949108	best: 0.7949108 (2073)	total: 3m 28s	remaining: 1h 53m 57s
2074:	learn: 0.8490730	test: 0.7949036	best: 0.7949036 (2074)	total: 3m 28s	remaining: 1h 53m 57s
2075:	learn: 0.8490486	test: 0.7949009	best: 0.7949009 (2075)	total: 3m 28s	remaining: 1h 53m 57s
2076:	learn: 0.8490212	test: 0.7948870	best: 0.7948870 (2076)	total: 3m 29s	remaining: 1h 53m 57s
2077:	learn: 0.8489939	test: 0.7948711	best: 0.7948711 (2077)	total: 3m 29s	remaining: 1h 53m 57s
2078:	learn: 0.8489645	test: 0.7948494	best: 0.7948494 (2078)	total: 3m 29s	remaining: 1h 53m 57s
2079:	learn: 0.84893

2155:	learn: 0.8467278	test: 0.7939810	best: 0.7939810 (2155)	total: 3m 37s	remaining: 1h 54m 7s
2156:	learn: 0.8467009	test: 0.7939640	best: 0.7939640 (2156)	total: 3m 37s	remaining: 1h 54m 7s
2157:	learn: 0.8466757	test: 0.7939616	best: 0.7939616 (2157)	total: 3m 37s	remaining: 1h 54m 7s
2158:	learn: 0.8466486	test: 0.7939597	best: 0.7939597 (2158)	total: 3m 37s	remaining: 1h 54m 7s
2159:	learn: 0.8466174	test: 0.7939467	best: 0.7939467 (2159)	total: 3m 38s	remaining: 1h 54m 7s
2160:	learn: 0.8465855	test: 0.7939332	best: 0.7939332 (2160)	total: 3m 38s	remaining: 1h 54m 6s
2161:	learn: 0.8465595	test: 0.7939217	best: 0.7939217 (2161)	total: 3m 38s	remaining: 1h 54m 6s
2162:	learn: 0.8465348	test: 0.7939116	best: 0.7939116 (2162)	total: 3m 38s	remaining: 1h 54m 6s
2163:	learn: 0.8464982	test: 0.7939027	best: 0.7939027 (2163)	total: 3m 38s	remaining: 1h 54m 6s
2164:	learn: 0.8464692	test: 0.7938931	best: 0.7938931 (2164)	total: 3m 38s	remaining: 1h 54m 7s
2165:	learn: 0.8464403	test: 0

2240:	learn: 0.8444043	test: 0.7930305	best: 0.7930305 (2240)	total: 3m 46s	remaining: 1h 54m 17s
2241:	learn: 0.8443797	test: 0.7930199	best: 0.7930199 (2241)	total: 3m 46s	remaining: 1h 54m 17s
2242:	learn: 0.8443551	test: 0.7930105	best: 0.7930105 (2242)	total: 3m 47s	remaining: 1h 54m 17s
2243:	learn: 0.8443314	test: 0.7930065	best: 0.7930065 (2243)	total: 3m 47s	remaining: 1h 54m 17s
2244:	learn: 0.8443020	test: 0.7929939	best: 0.7929939 (2244)	total: 3m 47s	remaining: 1h 54m 17s
2245:	learn: 0.8442698	test: 0.7929730	best: 0.7929730 (2245)	total: 3m 47s	remaining: 1h 54m 17s
2246:	learn: 0.8442437	test: 0.7929656	best: 0.7929656 (2246)	total: 3m 47s	remaining: 1h 54m 17s
2247:	learn: 0.8442158	test: 0.7929500	best: 0.7929500 (2247)	total: 3m 47s	remaining: 1h 54m 17s
2248:	learn: 0.8441916	test: 0.7929456	best: 0.7929456 (2248)	total: 3m 47s	remaining: 1h 54m 17s
2249:	learn: 0.8441561	test: 0.7929373	best: 0.7929373 (2249)	total: 3m 47s	remaining: 1h 54m 18s
2250:	learn: 0.84413

2326:	learn: 0.8420784	test: 0.7919179	best: 0.7919179 (2326)	total: 3m 56s	remaining: 1h 54m 30s
2327:	learn: 0.8420482	test: 0.7919066	best: 0.7919066 (2327)	total: 3m 56s	remaining: 1h 54m 30s
2328:	learn: 0.8420284	test: 0.7919056	best: 0.7919056 (2328)	total: 3m 56s	remaining: 1h 54m 30s
2329:	learn: 0.8420076	test: 0.7919040	best: 0.7919040 (2329)	total: 3m 56s	remaining: 1h 54m 30s
2330:	learn: 0.8419865	test: 0.7918993	best: 0.7918993 (2330)	total: 3m 56s	remaining: 1h 54m 30s
2331:	learn: 0.8419649	test: 0.7918960	best: 0.7918960 (2331)	total: 3m 56s	remaining: 1h 54m 30s
2332:	learn: 0.8419433	test: 0.7918294	best: 0.7918294 (2332)	total: 3m 56s	remaining: 1h 54m 29s
2333:	learn: 0.8419196	test: 0.7918199	best: 0.7918199 (2333)	total: 3m 56s	remaining: 1h 54m 29s
2334:	learn: 0.8418869	test: 0.7918123	best: 0.7918123 (2334)	total: 3m 57s	remaining: 1h 54m 30s
2335:	learn: 0.8418558	test: 0.7917955	best: 0.7917955 (2335)	total: 3m 57s	remaining: 1h 54m 30s
2336:	learn: 0.84183

2412:	learn: 0.8399601	test: 0.7910327	best: 0.7910317 (2411)	total: 4m 5s	remaining: 1h 54m 30s
2413:	learn: 0.8399317	test: 0.7910225	best: 0.7910225 (2413)	total: 4m 5s	remaining: 1h 54m 31s
2414:	learn: 0.8399062	test: 0.7910142	best: 0.7910142 (2414)	total: 4m 5s	remaining: 1h 54m 31s
2415:	learn: 0.8398800	test: 0.7910034	best: 0.7910034 (2415)	total: 4m 5s	remaining: 1h 54m 30s
2416:	learn: 0.8398592	test: 0.7909924	best: 0.7909924 (2416)	total: 4m 5s	remaining: 1h 54m 30s
2417:	learn: 0.8398399	test: 0.7909439	best: 0.7909439 (2417)	total: 4m 5s	remaining: 1h 54m 30s
2418:	learn: 0.8398122	test: 0.7909361	best: 0.7909361 (2418)	total: 4m 5s	remaining: 1h 54m 30s
2419:	learn: 0.8397856	test: 0.7909178	best: 0.7909178 (2419)	total: 4m 6s	remaining: 1h 54m 30s
2420:	learn: 0.8397630	test: 0.7909160	best: 0.7909160 (2420)	total: 4m 6s	remaining: 1h 54m 30s
2421:	learn: 0.8397406	test: 0.7909033	best: 0.7909033 (2421)	total: 4m 6s	remaining: 1h 54m 30s
2422:	learn: 0.8397114	test: 0

2497:	learn: 0.8379643	test: 0.7901542	best: 0.7901542 (2497)	total: 4m 14s	remaining: 1h 54m 37s
2498:	learn: 0.8379363	test: 0.7901377	best: 0.7901377 (2498)	total: 4m 14s	remaining: 1h 54m 38s
2499:	learn: 0.8379178	test: 0.7901365	best: 0.7901365 (2499)	total: 4m 14s	remaining: 1h 54m 37s
2500:	learn: 0.8378934	test: 0.7901313	best: 0.7901313 (2500)	total: 4m 14s	remaining: 1h 54m 37s
2501:	learn: 0.8378641	test: 0.7901216	best: 0.7901216 (2501)	total: 4m 14s	remaining: 1h 54m 38s
2502:	learn: 0.8378460	test: 0.7901209	best: 0.7901209 (2502)	total: 4m 15s	remaining: 1h 54m 38s
2503:	learn: 0.8378159	test: 0.7901016	best: 0.7901016 (2503)	total: 4m 15s	remaining: 1h 54m 39s
2504:	learn: 0.8377908	test: 0.7900902	best: 0.7900902 (2504)	total: 4m 15s	remaining: 1h 54m 39s
2505:	learn: 0.8377577	test: 0.7900757	best: 0.7900757 (2505)	total: 4m 15s	remaining: 1h 54m 40s
2506:	learn: 0.8377369	test: 0.7900739	best: 0.7900739 (2506)	total: 4m 15s	remaining: 1h 54m 40s
2507:	learn: 0.83770

2581:	learn: 0.8360050	test: 0.7893299	best: 0.7893299 (2581)	total: 4m 23s	remaining: 1h 54m 44s
2582:	learn: 0.8359779	test: 0.7893147	best: 0.7893147 (2582)	total: 4m 23s	remaining: 1h 54m 45s
2583:	learn: 0.8359616	test: 0.7893076	best: 0.7893076 (2583)	total: 4m 23s	remaining: 1h 54m 45s
2584:	learn: 0.8359422	test: 0.7892788	best: 0.7892788 (2584)	total: 4m 24s	remaining: 1h 54m 45s
2585:	learn: 0.8359248	test: 0.7892748	best: 0.7892748 (2585)	total: 4m 24s	remaining: 1h 54m 45s
2586:	learn: 0.8359036	test: 0.7892626	best: 0.7892626 (2586)	total: 4m 24s	remaining: 1h 54m 45s
2587:	learn: 0.8358859	test: 0.7892588	best: 0.7892588 (2587)	total: 4m 24s	remaining: 1h 54m 45s
2588:	learn: 0.8358670	test: 0.7892513	best: 0.7892513 (2588)	total: 4m 24s	remaining: 1h 54m 45s
2589:	learn: 0.8358492	test: 0.7892450	best: 0.7892450 (2589)	total: 4m 24s	remaining: 1h 54m 45s
2590:	learn: 0.8358311	test: 0.7892334	best: 0.7892334 (2590)	total: 4m 24s	remaining: 1h 54m 45s
2591:	learn: 0.83581

2665:	learn: 0.8342441	test: 0.7886347	best: 0.7886347 (2665)	total: 4m 32s	remaining: 1h 54m 54s
2666:	learn: 0.8342148	test: 0.7886144	best: 0.7886144 (2666)	total: 4m 33s	remaining: 1h 54m 55s
2667:	learn: 0.8341919	test: 0.7886084	best: 0.7886084 (2667)	total: 4m 33s	remaining: 1h 54m 55s
2668:	learn: 0.8341659	test: 0.7885991	best: 0.7885991 (2668)	total: 4m 33s	remaining: 1h 54m 56s
2669:	learn: 0.8341440	test: 0.7885902	best: 0.7885902 (2669)	total: 4m 33s	remaining: 1h 54m 56s
2670:	learn: 0.8341175	test: 0.7885819	best: 0.7885819 (2670)	total: 4m 33s	remaining: 1h 54m 56s
2671:	learn: 0.8340954	test: 0.7885768	best: 0.7885768 (2671)	total: 4m 33s	remaining: 1h 54m 57s
2672:	learn: 0.8340790	test: 0.7885744	best: 0.7885744 (2672)	total: 4m 33s	remaining: 1h 54m 56s
2673:	learn: 0.8340642	test: 0.7885750	best: 0.7885744 (2672)	total: 4m 33s	remaining: 1h 54m 56s
2674:	learn: 0.8340451	test: 0.7885270	best: 0.7885270 (2674)	total: 4m 34s	remaining: 1h 54m 56s
2675:	learn: 0.83402

2751:	learn: 0.8324627	test: 0.7879045	best: 0.7879045 (2751)	total: 4m 42s	remaining: 1h 55m 6s
2752:	learn: 0.8324435	test: 0.7879016	best: 0.7879016 (2752)	total: 4m 42s	remaining: 1h 55m 6s
2753:	learn: 0.8324258	test: 0.7879020	best: 0.7879016 (2752)	total: 4m 42s	remaining: 1h 55m 6s
2754:	learn: 0.8324071	test: 0.7879006	best: 0.7879006 (2754)	total: 4m 42s	remaining: 1h 55m 6s
2755:	learn: 0.8323901	test: 0.7878977	best: 0.7878977 (2755)	total: 4m 43s	remaining: 1h 55m 6s
2756:	learn: 0.8323690	test: 0.7878950	best: 0.7878950 (2756)	total: 4m 43s	remaining: 1h 55m 6s
2757:	learn: 0.8323480	test: 0.7878774	best: 0.7878774 (2757)	total: 4m 43s	remaining: 1h 55m 6s
2758:	learn: 0.8323256	test: 0.7878669	best: 0.7878669 (2758)	total: 4m 43s	remaining: 1h 55m 7s
2759:	learn: 0.8323043	test: 0.7878642	best: 0.7878642 (2759)	total: 4m 43s	remaining: 1h 55m 7s
2760:	learn: 0.8322834	test: 0.7878472	best: 0.7878472 (2760)	total: 4m 43s	remaining: 1h 55m 7s
2761:	learn: 0.8322630	test: 0

2836:	learn: 0.8307825	test: 0.7872130	best: 0.7872130 (2836)	total: 4m 52s	remaining: 1h 55m 15s
2837:	learn: 0.8307685	test: 0.7872102	best: 0.7872102 (2837)	total: 4m 52s	remaining: 1h 55m 15s
2838:	learn: 0.8307457	test: 0.7871987	best: 0.7871987 (2838)	total: 4m 52s	remaining: 1h 55m 15s
2839:	learn: 0.8307254	test: 0.7871745	best: 0.7871745 (2839)	total: 4m 52s	remaining: 1h 55m 15s
2840:	learn: 0.8307018	test: 0.7871647	best: 0.7871647 (2840)	total: 4m 52s	remaining: 1h 55m 15s
2841:	learn: 0.8306821	test: 0.7871569	best: 0.7871569 (2841)	total: 4m 52s	remaining: 1h 55m 15s
2842:	learn: 0.8306625	test: 0.7871463	best: 0.7871463 (2842)	total: 4m 52s	remaining: 1h 55m 15s
2843:	learn: 0.8306393	test: 0.7871445	best: 0.7871445 (2843)	total: 4m 52s	remaining: 1h 55m 16s
2844:	learn: 0.8306246	test: 0.7871417	best: 0.7871417 (2844)	total: 4m 53s	remaining: 1h 55m 16s
2845:	learn: 0.8306048	test: 0.7871354	best: 0.7871354 (2845)	total: 4m 53s	remaining: 1h 55m 16s
2846:	learn: 0.83058

2921:	learn: 0.8291443	test: 0.7863497	best: 0.7863497 (2921)	total: 5m 1s	remaining: 1h 55m 23s
2922:	learn: 0.8291291	test: 0.7863247	best: 0.7863247 (2922)	total: 5m 1s	remaining: 1h 55m 22s
2923:	learn: 0.8291073	test: 0.7863154	best: 0.7863154 (2923)	total: 5m 1s	remaining: 1h 55m 23s
2924:	learn: 0.8290896	test: 0.7863151	best: 0.7863151 (2924)	total: 5m 1s	remaining: 1h 55m 23s
2925:	learn: 0.8290714	test: 0.7863148	best: 0.7863148 (2925)	total: 5m 2s	remaining: 1h 55m 23s
2926:	learn: 0.8290519	test: 0.7863106	best: 0.7863106 (2926)	total: 5m 2s	remaining: 1h 55m 23s
2927:	learn: 0.8290307	test: 0.7863013	best: 0.7863013 (2927)	total: 5m 2s	remaining: 1h 55m 24s
2928:	learn: 0.8290123	test: 0.7862949	best: 0.7862949 (2928)	total: 5m 2s	remaining: 1h 55m 24s
2929:	learn: 0.8289904	test: 0.7862876	best: 0.7862876 (2929)	total: 5m 2s	remaining: 1h 55m 24s
2930:	learn: 0.8289714	test: 0.7862662	best: 0.7862662 (2930)	total: 5m 2s	remaining: 1h 55m 24s
2931:	learn: 0.8289529	test: 0

3006:	learn: 0.8275708	test: 0.7857235	best: 0.7857215 (3004)	total: 5m 11s	remaining: 1h 55m 33s
3007:	learn: 0.8275554	test: 0.7857176	best: 0.7857176 (3007)	total: 5m 11s	remaining: 1h 55m 33s
3008:	learn: 0.8275383	test: 0.7857172	best: 0.7857172 (3008)	total: 5m 11s	remaining: 1h 55m 33s
3009:	learn: 0.8275196	test: 0.7857093	best: 0.7857093 (3009)	total: 5m 11s	remaining: 1h 55m 33s
3010:	learn: 0.8275016	test: 0.7857019	best: 0.7857019 (3010)	total: 5m 11s	remaining: 1h 55m 33s
3011:	learn: 0.8274754	test: 0.7856928	best: 0.7856928 (3011)	total: 5m 11s	remaining: 1h 55m 33s
3012:	learn: 0.8274566	test: 0.7856933	best: 0.7856928 (3011)	total: 5m 11s	remaining: 1h 55m 33s
3013:	learn: 0.8274392	test: 0.7856942	best: 0.7856928 (3011)	total: 5m 11s	remaining: 1h 55m 34s
3014:	learn: 0.8274233	test: 0.7856839	best: 0.7856839 (3014)	total: 5m 12s	remaining: 1h 55m 34s
3015:	learn: 0.8274077	test: 0.7856295	best: 0.7856295 (3015)	total: 5m 12s	remaining: 1h 55m 34s
3016:	learn: 0.82739

3091:	learn: 0.8260673	test: 0.7852109	best: 0.7852109 (3091)	total: 5m 20s	remaining: 1h 55m 38s
3092:	learn: 0.8260494	test: 0.7852006	best: 0.7852006 (3092)	total: 5m 20s	remaining: 1h 55m 38s
3093:	learn: 0.8260378	test: 0.7852015	best: 0.7852006 (3092)	total: 5m 20s	remaining: 1h 55m 38s
3094:	learn: 0.8260242	test: 0.7851969	best: 0.7851969 (3094)	total: 5m 20s	remaining: 1h 55m 38s
3095:	learn: 0.8260055	test: 0.7851901	best: 0.7851901 (3095)	total: 5m 21s	remaining: 1h 55m 38s
3096:	learn: 0.8259844	test: 0.7851835	best: 0.7851835 (3096)	total: 5m 21s	remaining: 1h 55m 38s
3097:	learn: 0.8259712	test: 0.7851812	best: 0.7851812 (3097)	total: 5m 21s	remaining: 1h 55m 38s
3098:	learn: 0.8259459	test: 0.7851730	best: 0.7851730 (3098)	total: 5m 21s	remaining: 1h 55m 38s
3099:	learn: 0.8259270	test: 0.7851642	best: 0.7851642 (3099)	total: 5m 21s	remaining: 1h 55m 38s
3100:	learn: 0.8259139	test: 0.7851596	best: 0.7851596 (3100)	total: 5m 21s	remaining: 1h 55m 38s
3101:	learn: 0.82589

3175:	learn: 0.8246307	test: 0.7845716	best: 0.7845716 (3175)	total: 5m 30s	remaining: 1h 55m 45s
3176:	learn: 0.8246124	test: 0.7845689	best: 0.7845689 (3176)	total: 5m 30s	remaining: 1h 55m 45s
3177:	learn: 0.8245941	test: 0.7845653	best: 0.7845653 (3177)	total: 5m 30s	remaining: 1h 55m 45s
3178:	learn: 0.8245789	test: 0.7845648	best: 0.7845648 (3178)	total: 5m 30s	remaining: 1h 55m 45s
3179:	learn: 0.8245590	test: 0.7845582	best: 0.7845582 (3179)	total: 5m 30s	remaining: 1h 55m 46s
3180:	learn: 0.8245429	test: 0.7844679	best: 0.7844679 (3180)	total: 5m 30s	remaining: 1h 55m 46s
3181:	learn: 0.8245249	test: 0.7844659	best: 0.7844659 (3181)	total: 5m 30s	remaining: 1h 55m 45s
3182:	learn: 0.8245027	test: 0.7844590	best: 0.7844590 (3182)	total: 5m 30s	remaining: 1h 55m 46s
3183:	learn: 0.8244910	test: 0.7844588	best: 0.7844588 (3183)	total: 5m 31s	remaining: 1h 55m 46s
3184:	learn: 0.8244751	test: 0.7844427	best: 0.7844427 (3184)	total: 5m 31s	remaining: 1h 55m 46s
3185:	learn: 0.82445

3259:	learn: 0.8232185	test: 0.7838973	best: 0.7838973 (3259)	total: 5m 39s	remaining: 1h 55m 47s
3260:	learn: 0.8232011	test: 0.7838958	best: 0.7838958 (3260)	total: 5m 39s	remaining: 1h 55m 47s
3261:	learn: 0.8231870	test: 0.7838300	best: 0.7838300 (3261)	total: 5m 39s	remaining: 1h 55m 47s
3262:	learn: 0.8231710	test: 0.7838289	best: 0.7838289 (3262)	total: 5m 39s	remaining: 1h 55m 47s
3263:	learn: 0.8231584	test: 0.7838235	best: 0.7838235 (3263)	total: 5m 39s	remaining: 1h 55m 47s
3264:	learn: 0.8231427	test: 0.7838151	best: 0.7838151 (3264)	total: 5m 39s	remaining: 1h 55m 47s
3265:	learn: 0.8231290	test: 0.7837949	best: 0.7837949 (3265)	total: 5m 40s	remaining: 1h 55m 47s
3266:	learn: 0.8231065	test: 0.7837888	best: 0.7837888 (3266)	total: 5m 40s	remaining: 1h 55m 47s
3267:	learn: 0.8230840	test: 0.7837704	best: 0.7837704 (3267)	total: 5m 40s	remaining: 1h 55m 47s
3268:	learn: 0.8230687	test: 0.7836931	best: 0.7836931 (3268)	total: 5m 40s	remaining: 1h 55m 47s
3269:	learn: 0.82305

3343:	learn: 0.8217977	test: 0.7832030	best: 0.7832030 (3343)	total: 5m 48s	remaining: 1h 55m 48s
3344:	learn: 0.8217790	test: 0.7831922	best: 0.7831922 (3344)	total: 5m 48s	remaining: 1h 55m 49s
3345:	learn: 0.8217619	test: 0.7831831	best: 0.7831831 (3345)	total: 5m 48s	remaining: 1h 55m 49s
3346:	learn: 0.8217424	test: 0.7831770	best: 0.7831770 (3346)	total: 5m 48s	remaining: 1h 55m 49s
3347:	learn: 0.8217309	test: 0.7831709	best: 0.7831709 (3347)	total: 5m 49s	remaining: 1h 55m 49s
3348:	learn: 0.8217116	test: 0.7831654	best: 0.7831654 (3348)	total: 5m 49s	remaining: 1h 55m 49s
3349:	learn: 0.8216943	test: 0.7831603	best: 0.7831603 (3349)	total: 5m 49s	remaining: 1h 55m 49s
3350:	learn: 0.8216803	test: 0.7831593	best: 0.7831593 (3350)	total: 5m 49s	remaining: 1h 55m 49s
3351:	learn: 0.8216710	test: 0.7831559	best: 0.7831559 (3351)	total: 5m 49s	remaining: 1h 55m 49s
3352:	learn: 0.8216531	test: 0.7831563	best: 0.7831559 (3351)	total: 5m 49s	remaining: 1h 55m 49s
3353:	learn: 0.82163

3428:	learn: 0.8204404	test: 0.7826317	best: 0.7826317 (3428)	total: 5m 58s	remaining: 1h 55m 51s
3429:	learn: 0.8204265	test: 0.7826228	best: 0.7826228 (3429)	total: 5m 58s	remaining: 1h 55m 51s
3430:	learn: 0.8204098	test: 0.7826239	best: 0.7826228 (3429)	total: 5m 58s	remaining: 1h 55m 51s
3431:	learn: 0.8203936	test: 0.7826146	best: 0.7826146 (3431)	total: 5m 58s	remaining: 1h 55m 51s
3432:	learn: 0.8203747	test: 0.7826068	best: 0.7826068 (3432)	total: 5m 58s	remaining: 1h 55m 51s
3433:	learn: 0.8203652	test: 0.7826313	best: 0.7826068 (3432)	total: 5m 58s	remaining: 1h 55m 51s
3434:	learn: 0.8203480	test: 0.7826265	best: 0.7826068 (3432)	total: 5m 58s	remaining: 1h 55m 51s
3435:	learn: 0.8203278	test: 0.7826195	best: 0.7826068 (3432)	total: 5m 58s	remaining: 1h 55m 51s
3436:	learn: 0.8203159	test: 0.7826152	best: 0.7826068 (3432)	total: 5m 58s	remaining: 1h 55m 51s
3437:	learn: 0.8203039	test: 0.7826141	best: 0.7826068 (3432)	total: 5m 59s	remaining: 1h 55m 51s
3438:	learn: 0.82028

3514:	learn: 0.8191243	test: 0.7820653	best: 0.7820653 (3514)	total: 6m 7s	remaining: 1h 55m 53s
3515:	learn: 0.8191129	test: 0.7820610	best: 0.7820610 (3515)	total: 6m 7s	remaining: 1h 55m 53s
3516:	learn: 0.8190947	test: 0.7820608	best: 0.7820608 (3516)	total: 6m 7s	remaining: 1h 55m 53s
3517:	learn: 0.8190783	test: 0.7820538	best: 0.7820538 (3517)	total: 6m 7s	remaining: 1h 55m 53s
3518:	learn: 0.8190638	test: 0.7820540	best: 0.7820538 (3517)	total: 6m 8s	remaining: 1h 55m 53s
3519:	learn: 0.8190473	test: 0.7820409	best: 0.7820409 (3519)	total: 6m 8s	remaining: 1h 55m 53s
3520:	learn: 0.8190376	test: 0.7820349	best: 0.7820349 (3520)	total: 6m 8s	remaining: 1h 55m 53s
3521:	learn: 0.8190202	test: 0.7820272	best: 0.7820272 (3521)	total: 6m 8s	remaining: 1h 55m 53s
3522:	learn: 0.8190024	test: 0.7820234	best: 0.7820234 (3522)	total: 6m 8s	remaining: 1h 55m 53s
3523:	learn: 0.8189862	test: 0.7820125	best: 0.7820125 (3523)	total: 6m 8s	remaining: 1h 55m 53s
3524:	learn: 0.8189768	test: 0

3599:	learn: 0.8178639	test: 0.7816496	best: 0.7816496 (3599)	total: 6m 16s	remaining: 1h 55m 53s
3600:	learn: 0.8178505	test: 0.7816426	best: 0.7816426 (3600)	total: 6m 17s	remaining: 1h 55m 52s
3601:	learn: 0.8178361	test: 0.7816370	best: 0.7816370 (3601)	total: 6m 17s	remaining: 1h 55m 52s
3602:	learn: 0.8178228	test: 0.7816319	best: 0.7816319 (3602)	total: 6m 17s	remaining: 1h 55m 52s
3603:	learn: 0.8178095	test: 0.7816277	best: 0.7816277 (3603)	total: 6m 17s	remaining: 1h 55m 52s
3604:	learn: 0.8177953	test: 0.7816262	best: 0.7816262 (3604)	total: 6m 17s	remaining: 1h 55m 52s
3605:	learn: 0.8177802	test: 0.7816197	best: 0.7816197 (3605)	total: 6m 17s	remaining: 1h 55m 52s
3606:	learn: 0.8177702	test: 0.7816153	best: 0.7816153 (3606)	total: 6m 17s	remaining: 1h 55m 52s
3607:	learn: 0.8177611	test: 0.7816157	best: 0.7816153 (3606)	total: 6m 17s	remaining: 1h 55m 52s
3608:	learn: 0.8177503	test: 0.7816093	best: 0.7816093 (3608)	total: 6m 17s	remaining: 1h 55m 52s
3609:	learn: 0.81773

3684:	learn: 0.8166901	test: 0.7812096	best: 0.7811958 (3683)	total: 6m 26s	remaining: 1h 55m 51s
3685:	learn: 0.8166819	test: 0.7812071	best: 0.7811958 (3683)	total: 6m 26s	remaining: 1h 55m 51s
3686:	learn: 0.8166691	test: 0.7811994	best: 0.7811958 (3683)	total: 6m 26s	remaining: 1h 55m 51s
3687:	learn: 0.8166553	test: 0.7811986	best: 0.7811958 (3683)	total: 6m 26s	remaining: 1h 55m 51s
3688:	learn: 0.8166421	test: 0.7811960	best: 0.7811958 (3683)	total: 6m 26s	remaining: 1h 55m 51s
3689:	learn: 0.8166278	test: 0.7811877	best: 0.7811877 (3689)	total: 6m 26s	remaining: 1h 55m 51s
3690:	learn: 0.8166113	test: 0.7811883	best: 0.7811877 (3689)	total: 6m 26s	remaining: 1h 55m 52s
3691:	learn: 0.8165980	test: 0.7811819	best: 0.7811819 (3691)	total: 6m 27s	remaining: 1h 55m 52s
3692:	learn: 0.8165876	test: 0.7811793	best: 0.7811793 (3692)	total: 6m 27s	remaining: 1h 55m 52s
3693:	learn: 0.8165744	test: 0.7811781	best: 0.7811781 (3693)	total: 6m 27s	remaining: 1h 55m 52s
3694:	learn: 0.81656

3769:	learn: 0.8155150	test: 0.7807045	best: 0.7807045 (3769)	total: 6m 35s	remaining: 1h 55m 55s
3770:	learn: 0.8154966	test: 0.7807012	best: 0.7807012 (3770)	total: 6m 36s	remaining: 1h 55m 55s
3771:	learn: 0.8154814	test: 0.7806911	best: 0.7806911 (3771)	total: 6m 36s	remaining: 1h 55m 55s
3772:	learn: 0.8154609	test: 0.7806813	best: 0.7806813 (3772)	total: 6m 36s	remaining: 1h 55m 55s
3773:	learn: 0.8154432	test: 0.7806827	best: 0.7806813 (3772)	total: 6m 36s	remaining: 1h 55m 55s
3774:	learn: 0.8154336	test: 0.7806781	best: 0.7806781 (3774)	total: 6m 36s	remaining: 1h 55m 55s
3775:	learn: 0.8154148	test: 0.7806725	best: 0.7806725 (3775)	total: 6m 36s	remaining: 1h 55m 56s
3776:	learn: 0.8153990	test: 0.7806673	best: 0.7806673 (3776)	total: 6m 36s	remaining: 1h 55m 56s
3777:	learn: 0.8153846	test: 0.7806583	best: 0.7806583 (3777)	total: 6m 36s	remaining: 1h 55m 56s
3778:	learn: 0.8153710	test: 0.7806499	best: 0.7806499 (3778)	total: 6m 36s	remaining: 1h 55m 56s
3779:	learn: 0.81535

3854:	learn: 0.8144040	test: 0.7803378	best: 0.7803108 (3853)	total: 6m 45s	remaining: 1h 55m 56s
3855:	learn: 0.8143922	test: 0.7803200	best: 0.7803108 (3853)	total: 6m 45s	remaining: 1h 55m 56s
3856:	learn: 0.8143767	test: 0.7803120	best: 0.7803108 (3853)	total: 6m 45s	remaining: 1h 55m 56s
3857:	learn: 0.8143688	test: 0.7803084	best: 0.7803084 (3857)	total: 6m 45s	remaining: 1h 55m 56s
3858:	learn: 0.8143566	test: 0.7803035	best: 0.7803035 (3858)	total: 6m 45s	remaining: 1h 55m 56s
3859:	learn: 0.8143399	test: 0.7802949	best: 0.7802949 (3859)	total: 6m 45s	remaining: 1h 55m 56s
3860:	learn: 0.8143246	test: 0.7802945	best: 0.7802945 (3860)	total: 6m 46s	remaining: 1h 55m 56s
3861:	learn: 0.8143150	test: 0.7802964	best: 0.7802945 (3860)	total: 6m 46s	remaining: 1h 55m 55s
3862:	learn: 0.8143084	test: 0.7802925	best: 0.7802925 (3862)	total: 6m 46s	remaining: 1h 55m 55s
3863:	learn: 0.8143003	test: 0.7802901	best: 0.7802901 (3863)	total: 6m 46s	remaining: 1h 55m 55s
3864:	learn: 0.81428

3938:	learn: 0.8133558	test: 0.7798514	best: 0.7798514 (3938)	total: 6m 54s	remaining: 1h 55m 54s
3939:	learn: 0.8133403	test: 0.7798521	best: 0.7798514 (3938)	total: 6m 54s	remaining: 1h 55m 54s
3940:	learn: 0.8133316	test: 0.7798491	best: 0.7798491 (3940)	total: 6m 54s	remaining: 1h 55m 54s
3941:	learn: 0.8133250	test: 0.7798437	best: 0.7798437 (3941)	total: 6m 55s	remaining: 1h 55m 54s
3942:	learn: 0.8133093	test: 0.7798359	best: 0.7798359 (3942)	total: 6m 55s	remaining: 1h 55m 54s
3943:	learn: 0.8132949	test: 0.7798344	best: 0.7798344 (3943)	total: 6m 55s	remaining: 1h 55m 54s
3944:	learn: 0.8132858	test: 0.7798277	best: 0.7798277 (3944)	total: 6m 55s	remaining: 1h 55m 54s
3945:	learn: 0.8132785	test: 0.7798236	best: 0.7798236 (3945)	total: 6m 55s	remaining: 1h 55m 54s
3946:	learn: 0.8132654	test: 0.7798165	best: 0.7798165 (3946)	total: 6m 55s	remaining: 1h 55m 54s
3947:	learn: 0.8132533	test: 0.7798089	best: 0.7798089 (3947)	total: 6m 55s	remaining: 1h 55m 54s
3948:	learn: 0.81324

4023:	learn: 0.8123775	test: 0.7796298	best: 0.7796268 (4021)	total: 7m 3s	remaining: 1h 55m 51s
4024:	learn: 0.8123633	test: 0.7796283	best: 0.7796268 (4021)	total: 7m 4s	remaining: 1h 55m 51s
4025:	learn: 0.8123492	test: 0.7796241	best: 0.7796241 (4025)	total: 7m 4s	remaining: 1h 55m 51s
4026:	learn: 0.8123400	test: 0.7796246	best: 0.7796241 (4025)	total: 7m 4s	remaining: 1h 55m 51s
4027:	learn: 0.8123287	test: 0.7796222	best: 0.7796222 (4027)	total: 7m 4s	remaining: 1h 55m 51s
4028:	learn: 0.8123143	test: 0.7796179	best: 0.7796179 (4028)	total: 7m 4s	remaining: 1h 55m 51s
4029:	learn: 0.8123053	test: 0.7796127	best: 0.7796127 (4029)	total: 7m 4s	remaining: 1h 55m 51s
4030:	learn: 0.8122961	test: 0.7796122	best: 0.7796122 (4030)	total: 7m 4s	remaining: 1h 55m 51s
4031:	learn: 0.8122836	test: 0.7796063	best: 0.7796063 (4031)	total: 7m 4s	remaining: 1h 55m 51s
4032:	learn: 0.8122673	test: 0.7796077	best: 0.7796063 (4031)	total: 7m 4s	remaining: 1h 55m 51s
4033:	learn: 0.8122577	test: 0

4109:	learn: 0.8114118	test: 0.7793297	best: 0.7793297 (4109)	total: 7m 13s	remaining: 1h 55m 45s
4110:	learn: 0.8113980	test: 0.7793220	best: 0.7793220 (4110)	total: 7m 13s	remaining: 1h 55m 45s
4111:	learn: 0.8113851	test: 0.7793111	best: 0.7793111 (4111)	total: 7m 13s	remaining: 1h 55m 46s
4112:	learn: 0.8113783	test: 0.7793090	best: 0.7793090 (4112)	total: 7m 13s	remaining: 1h 55m 45s
4113:	learn: 0.8113653	test: 0.7793096	best: 0.7793090 (4112)	total: 7m 13s	remaining: 1h 55m 45s
4114:	learn: 0.8113538	test: 0.7793072	best: 0.7793072 (4114)	total: 7m 13s	remaining: 1h 55m 45s
4115:	learn: 0.8113475	test: 0.7793063	best: 0.7793063 (4115)	total: 7m 13s	remaining: 1h 55m 45s
4116:	learn: 0.8113430	test: 0.7793046	best: 0.7793046 (4116)	total: 7m 13s	remaining: 1h 55m 44s
4117:	learn: 0.8113298	test: 0.7793003	best: 0.7793003 (4117)	total: 7m 14s	remaining: 1h 55m 44s
4118:	learn: 0.8113221	test: 0.7792957	best: 0.7792957 (4118)	total: 7m 14s	remaining: 1h 55m 44s
4119:	learn: 0.81130

4195:	learn: 0.8104784	test: 0.7790176	best: 0.7790176 (4195)	total: 7m 22s	remaining: 1h 55m 38s
4196:	learn: 0.8104633	test: 0.7790193	best: 0.7790176 (4195)	total: 7m 22s	remaining: 1h 55m 38s
4197:	learn: 0.8104501	test: 0.7790189	best: 0.7790176 (4195)	total: 7m 22s	remaining: 1h 55m 38s
4198:	learn: 0.8104409	test: 0.7790113	best: 0.7790113 (4198)	total: 7m 22s	remaining: 1h 55m 38s
4199:	learn: 0.8104292	test: 0.7790040	best: 0.7790040 (4199)	total: 7m 22s	remaining: 1h 55m 38s
4200:	learn: 0.8104163	test: 0.7790031	best: 0.7790031 (4200)	total: 7m 22s	remaining: 1h 55m 38s
4201:	learn: 0.8104063	test: 0.7790040	best: 0.7790031 (4200)	total: 7m 23s	remaining: 1h 55m 38s
4202:	learn: 0.8103965	test: 0.7789992	best: 0.7789992 (4202)	total: 7m 23s	remaining: 1h 55m 38s
4203:	learn: 0.8103876	test: 0.7789992	best: 0.7789992 (4202)	total: 7m 23s	remaining: 1h 55m 38s
4204:	learn: 0.8103785	test: 0.7789955	best: 0.7789955 (4204)	total: 7m 23s	remaining: 1h 55m 38s
4205:	learn: 0.81035

4280:	learn: 0.8095312	test: 0.7786855	best: 0.7786855 (4280)	total: 7m 31s	remaining: 1h 55m 38s
4281:	learn: 0.8095225	test: 0.7786819	best: 0.7786819 (4281)	total: 7m 32s	remaining: 1h 55m 38s
4282:	learn: 0.8095091	test: 0.7786814	best: 0.7786814 (4282)	total: 7m 32s	remaining: 1h 55m 38s
4283:	learn: 0.8094990	test: 0.7786757	best: 0.7786757 (4283)	total: 7m 32s	remaining: 1h 55m 38s
4284:	learn: 0.8094880	test: 0.7786748	best: 0.7786748 (4284)	total: 7m 32s	remaining: 1h 55m 38s
4285:	learn: 0.8094789	test: 0.7786702	best: 0.7786702 (4285)	total: 7m 32s	remaining: 1h 55m 38s
4286:	learn: 0.8094667	test: 0.7786693	best: 0.7786693 (4286)	total: 7m 32s	remaining: 1h 55m 38s
4287:	learn: 0.8094603	test: 0.7786650	best: 0.7786650 (4287)	total: 7m 32s	remaining: 1h 55m 38s
4288:	learn: 0.8094560	test: 0.7786632	best: 0.7786632 (4288)	total: 7m 32s	remaining: 1h 55m 38s
4289:	learn: 0.8094493	test: 0.7786602	best: 0.7786602 (4289)	total: 7m 32s	remaining: 1h 55m 37s
4290:	learn: 0.80943

4365:	learn: 0.8086152	test: 0.7784386	best: 0.7784352 (4362)	total: 7m 41s	remaining: 1h 55m 38s
4366:	learn: 0.8086008	test: 0.7784410	best: 0.7784352 (4362)	total: 7m 41s	remaining: 1h 55m 38s
4367:	learn: 0.8085873	test: 0.7784425	best: 0.7784352 (4362)	total: 7m 41s	remaining: 1h 55m 38s
4368:	learn: 0.8085729	test: 0.7784391	best: 0.7784352 (4362)	total: 7m 41s	remaining: 1h 55m 38s
4369:	learn: 0.8085611	test: 0.7784391	best: 0.7784352 (4362)	total: 7m 42s	remaining: 1h 55m 38s
4370:	learn: 0.8085494	test: 0.7783676	best: 0.7783676 (4370)	total: 7m 42s	remaining: 1h 55m 38s
4371:	learn: 0.8085452	test: 0.7783644	best: 0.7783644 (4371)	total: 7m 42s	remaining: 1h 55m 38s
4372:	learn: 0.8085313	test: 0.7783577	best: 0.7783577 (4372)	total: 7m 42s	remaining: 1h 55m 38s
4373:	learn: 0.8085224	test: 0.7783652	best: 0.7783577 (4372)	total: 7m 42s	remaining: 1h 55m 38s
4374:	learn: 0.8085100	test: 0.7783586	best: 0.7783577 (4372)	total: 7m 42s	remaining: 1h 55m 38s
4375:	learn: 0.80850

4449:	learn: 0.8077353	test: 0.7781782	best: 0.7781758 (4448)	total: 7m 50s	remaining: 1h 55m 37s
4450:	learn: 0.8077209	test: 0.7781724	best: 0.7781724 (4450)	total: 7m 51s	remaining: 1h 55m 37s
4451:	learn: 0.8077149	test: 0.7781727	best: 0.7781724 (4450)	total: 7m 51s	remaining: 1h 55m 37s
4452:	learn: 0.8077002	test: 0.7781741	best: 0.7781724 (4450)	total: 7m 51s	remaining: 1h 55m 37s
4453:	learn: 0.8076869	test: 0.7781718	best: 0.7781718 (4453)	total: 7m 51s	remaining: 1h 55m 37s
4454:	learn: 0.8076742	test: 0.7781672	best: 0.7781672 (4454)	total: 7m 51s	remaining: 1h 55m 37s
4455:	learn: 0.8076659	test: 0.7781603	best: 0.7781603 (4455)	total: 7m 51s	remaining: 1h 55m 38s
4456:	learn: 0.8076530	test: 0.7781609	best: 0.7781603 (4455)	total: 7m 51s	remaining: 1h 55m 38s
4457:	learn: 0.8076421	test: 0.7781561	best: 0.7781561 (4457)	total: 7m 51s	remaining: 1h 55m 38s
4458:	learn: 0.8076293	test: 0.7781596	best: 0.7781561 (4457)	total: 7m 52s	remaining: 1h 55m 38s
4459:	learn: 0.80761

4534:	learn: 0.8068342	test: 0.7779503	best: 0.7779431 (4527)	total: 8m	remaining: 1h 55m 38s
4535:	learn: 0.8068236	test: 0.7779457	best: 0.7779431 (4527)	total: 8m	remaining: 1h 55m 38s
4536:	learn: 0.8068103	test: 0.7779491	best: 0.7779431 (4527)	total: 8m	remaining: 1h 55m 38s
4537:	learn: 0.8067977	test: 0.7779438	best: 0.7779431 (4527)	total: 8m 1s	remaining: 1h 55m 38s
4538:	learn: 0.8067881	test: 0.7779448	best: 0.7779431 (4527)	total: 8m 1s	remaining: 1h 55m 38s
4539:	learn: 0.8067795	test: 0.7779348	best: 0.7779348 (4539)	total: 8m 1s	remaining: 1h 55m 38s
4540:	learn: 0.8067733	test: 0.7779337	best: 0.7779337 (4540)	total: 8m 1s	remaining: 1h 55m 38s
4541:	learn: 0.8067652	test: 0.7779256	best: 0.7779256 (4541)	total: 8m 1s	remaining: 1h 55m 38s
4542:	learn: 0.8067554	test: 0.7779209	best: 0.7779209 (4542)	total: 8m 1s	remaining: 1h 55m 38s
4543:	learn: 0.8067443	test: 0.7779228	best: 0.7779209 (4542)	total: 8m 1s	remaining: 1h 55m 38s
4544:	learn: 0.8067337	test: 0.7779207	

<catboost.core.CatBoostRegressor at 0x7f5ecce89588>

In [3]:
#training.to_pickle("pickled/training")
#training = pd.read_pickle("pickled/training")

#pickle.dump(cb_model, open( "pickled/cb_model", "wb"), protocol=4)

cb_model = pickle.load( open( "pickled/cb_model", "rb" ) )

In [6]:
scores = {}
for i,score in enumerate(cb_model.get_feature_importance()):
    scores[cb_features[i]] = score

In [15]:
best_cb_features = [item[0] for item in scores.items() if item[1] > 0.1]

In [10]:
sorted(scores.items(), key=lambda x: x[1])[::-1]

[('item_block_units_lag_1', 32.740321314769616),
 ('shop_category_units', 7.3037773859846205),
 ('item_share_of_total_units', 4.906273669850913),
 ('shop_first_two_blocks_units', 4.547441700039596),
 ('item_units', 4.53217847313849),
 ('item_mean_price_block_lag_2', 3.5639239412024306),
 ('item_number_of_consecutive_days_with_activity', 3.514854655108098),
 ('item_first_two_blocks_units', 3.287811617438042),
 ('shop_max_turnover_block', 3.215991046364685),
 ('shop_category_turnover', 2.1883809310526185),
 ('shop_max_units_block', 2.1503020232523826),
 ('shop_share_of_units', 1.6670817918624485),
 ('shop_units', 1.4990774111978884),
 ('shop_block_units_lag_1', 1.4677487535371527),
 ('item_last_two_blocks_units', 1.4319509614392278),
 ('item_mean_units_block', 1.4086443066998504),
 ('category_mean_turnover_day', 1.304919157401533),
 ('shop_block_turnover_lag_1', 1.1531907893360216),
 ('12', 1.0944565263354495),
 ('shop_min_turnover_block', 1.0516317566694033),
 ('item_first_day', 1.01175

In [27]:
cb_features = xg_features

In [19]:
xg_features = best_cb_features

gc.collect()
params =   {
    'objective' : 'gpu:reg:linear',
    'tree_method':'gpu_hist',
    #'gpu_id': 0,
    'learning_rate': 0.001, 
    #'gamma' : 0.3, 
    #'min_child_weight' : 3,
    #'nthread' : 16,
    #'max_depth' : 30,
    #'subsample' : 0.9, 
    #'colsample_bytree' : 0.5, 
    'seed':42, 
    'eval_metric' : "rmse",
    'num_boost_round' : 70000,
    #'n_estimators':999,
    #'max_leaves': 300
}


tr_data = xgb.DMatrix(x_train[xg_features], y_train)
va_data = xgb.DMatrix(x_val[xg_features], y_val)


watchlist = [(tr_data, 'train'), (va_data, 'valid')]

xg_model = xgb.train(params, tr_data, 70000, watchlist, maximize=False, early_stopping_rounds = 30, verbose_eval=True)

[13:12:28] /workspace/src/objective/regression_obj.cu:153: gpu:reg:linear is now deprecated, use reg:linear instead.
[0]	train-rmse:1.0897	valid-rmse:0.962499
Multiple eval metrics have been passed: 'valid-rmse' will be used for early stopping.

Will train until valid-rmse hasn't improved in 30 rounds.
[1]	train-rmse:1.08928	valid-rmse:0.962189
[2]	train-rmse:1.08886	valid-rmse:0.961879
[3]	train-rmse:1.08845	valid-rmse:0.961571
[4]	train-rmse:1.08803	valid-rmse:0.961262
[5]	train-rmse:1.08761	valid-rmse:0.960954
[6]	train-rmse:1.0872	valid-rmse:0.960647
[7]	train-rmse:1.08679	valid-rmse:0.960341
[8]	train-rmse:1.08637	valid-rmse:0.960035
[9]	train-rmse:1.08596	valid-rmse:0.95973
[10]	train-rmse:1.08555	valid-rmse:0.959424
[11]	train-rmse:1.08514	valid-rmse:0.95912
[12]	train-rmse:1.08473	valid-rmse:0.958816
[13]	train-rmse:1.08432	valid-rmse:0.958513
[14]	train-rmse:1.08391	valid-rmse:0.958209
[15]	train-rmse:1.0835	valid-rmse:0.957907
[16]	train-rmse:1.08309	valid-rmse:0.957604
[17]	

[180]	train-rmse:1.02339	valid-rmse:0.913933
[181]	train-rmse:1.02307	valid-rmse:0.913714
[182]	train-rmse:1.02275	valid-rmse:0.913469
[183]	train-rmse:1.02243	valid-rmse:0.91325
[184]	train-rmse:1.02211	valid-rmse:0.913016
[185]	train-rmse:1.02179	valid-rmse:0.912796
[186]	train-rmse:1.02147	valid-rmse:0.912553
[187]	train-rmse:1.02116	valid-rmse:0.912336
[188]	train-rmse:1.02084	valid-rmse:0.912101
[189]	train-rmse:1.02053	valid-rmse:0.911883
[190]	train-rmse:1.02021	valid-rmse:0.911652
[191]	train-rmse:1.0199	valid-rmse:0.911438
[192]	train-rmse:1.01957	valid-rmse:0.911204
[193]	train-rmse:1.01926	valid-rmse:0.910989
[194]	train-rmse:1.01895	valid-rmse:0.910751
[195]	train-rmse:1.01863	valid-rmse:0.910539
[196]	train-rmse:1.01832	valid-rmse:0.910325
[197]	train-rmse:1.01801	valid-rmse:0.910094
[198]	train-rmse:1.0177	valid-rmse:0.90988
[199]	train-rmse:1.01738	valid-rmse:0.909644
[200]	train-rmse:1.01707	valid-rmse:0.90943
[201]	train-rmse:1.01676	valid-rmse:0.909197
[202]	train-rms

[361]	train-rmse:0.97249	valid-rmse:0.878608
[362]	train-rmse:0.972242	valid-rmse:0.878428
[363]	train-rmse:0.971997	valid-rmse:0.878267
[364]	train-rmse:0.971747	valid-rmse:0.878087
[365]	train-rmse:0.971502	valid-rmse:0.877918
[366]	train-rmse:0.971253	valid-rmse:0.877759
[367]	train-rmse:0.971004	valid-rmse:0.87758
[368]	train-rmse:0.97076	valid-rmse:0.87741
[369]	train-rmse:0.970513	valid-rmse:0.877228
[370]	train-rmse:0.97027	valid-rmse:0.877061
[371]	train-rmse:0.970028	valid-rmse:0.876902
[372]	train-rmse:0.969781	valid-rmse:0.876725
[373]	train-rmse:0.969539	valid-rmse:0.876556
[374]	train-rmse:0.969294	valid-rmse:0.876381
[375]	train-rmse:0.969052	valid-rmse:0.876214
[376]	train-rmse:0.968806	valid-rmse:0.876059
[377]	train-rmse:0.968562	valid-rmse:0.87588
[378]	train-rmse:0.968321	valid-rmse:0.87572
[379]	train-rmse:0.968078	valid-rmse:0.875546
[380]	train-rmse:0.967839	valid-rmse:0.875388
[381]	train-rmse:0.9676	valid-rmse:0.875223
[382]	train-rmse:0.96736	valid-rmse:0.87504

[541]	train-rmse:0.933296	valid-rmse:0.852148
[542]	train-rmse:0.933111	valid-rmse:0.852025
[543]	train-rmse:0.932926	valid-rmse:0.851901
[544]	train-rmse:0.932739	valid-rmse:0.851777
[545]	train-rmse:0.932549	valid-rmse:0.85165
[546]	train-rmse:0.932361	valid-rmse:0.851526
[547]	train-rmse:0.932177	valid-rmse:0.851403
[548]	train-rmse:0.931995	valid-rmse:0.851288
[549]	train-rmse:0.931807	valid-rmse:0.851169
[550]	train-rmse:0.931623	valid-rmse:0.851054
[551]	train-rmse:0.93144	valid-rmse:0.850935
[552]	train-rmse:0.931258	valid-rmse:0.850813
[553]	train-rmse:0.931073	valid-rmse:0.850693
[554]	train-rmse:0.930885	valid-rmse:0.850563
[555]	train-rmse:0.930705	valid-rmse:0.85045
[556]	train-rmse:0.930519	valid-rmse:0.850331
[557]	train-rmse:0.930337	valid-rmse:0.850211
[558]	train-rmse:0.930153	valid-rmse:0.850092
[559]	train-rmse:0.929972	valid-rmse:0.84998
[560]	train-rmse:0.929789	valid-rmse:0.849854
[561]	train-rmse:0.929605	valid-rmse:0.849744
[562]	train-rmse:0.929419	valid-rmse:0

[720]	train-rmse:0.903477	valid-rmse:0.833034
[721]	train-rmse:0.903331	valid-rmse:0.832936
[722]	train-rmse:0.903187	valid-rmse:0.832843
[723]	train-rmse:0.903042	valid-rmse:0.832743
[724]	train-rmse:0.902898	valid-rmse:0.832644
[725]	train-rmse:0.902756	valid-rmse:0.832554
[726]	train-rmse:0.902613	valid-rmse:0.832466
[727]	train-rmse:0.902469	valid-rmse:0.832364
[728]	train-rmse:0.902326	valid-rmse:0.832268
[729]	train-rmse:0.902183	valid-rmse:0.832175
[730]	train-rmse:0.902036	valid-rmse:0.832086
[731]	train-rmse:0.901893	valid-rmse:0.831992
[732]	train-rmse:0.901751	valid-rmse:0.831905
[733]	train-rmse:0.90161	valid-rmse:0.831807
[734]	train-rmse:0.901468	valid-rmse:0.831711
[735]	train-rmse:0.901322	valid-rmse:0.831623
[736]	train-rmse:0.901179	valid-rmse:0.831528
[737]	train-rmse:0.901038	valid-rmse:0.831433
[738]	train-rmse:0.900894	valid-rmse:0.831344
[739]	train-rmse:0.900754	valid-rmse:0.831257
[740]	train-rmse:0.900614	valid-rmse:0.831167
[741]	train-rmse:0.900475	valid-rms

[899]	train-rmse:0.879908	valid-rmse:0.818768
[900]	train-rmse:0.879792	valid-rmse:0.818688
[901]	train-rmse:0.879679	valid-rmse:0.818622
[902]	train-rmse:0.879566	valid-rmse:0.818554
[903]	train-rmse:0.879454	valid-rmse:0.818487
[904]	train-rmse:0.879338	valid-rmse:0.818416
[905]	train-rmse:0.879214	valid-rmse:0.818342
[906]	train-rmse:0.879099	valid-rmse:0.818262
[907]	train-rmse:0.878987	valid-rmse:0.818199
[908]	train-rmse:0.878882	valid-rmse:0.81814
[909]	train-rmse:0.878771	valid-rmse:0.818079
[910]	train-rmse:0.878657	valid-rmse:0.818005
[911]	train-rmse:0.878546	valid-rmse:0.817944
[912]	train-rmse:0.87843	valid-rmse:0.817881
[913]	train-rmse:0.878316	valid-rmse:0.817804
[914]	train-rmse:0.878205	valid-rmse:0.81774
[915]	train-rmse:0.878085	valid-rmse:0.817675
[916]	train-rmse:0.877975	valid-rmse:0.817608
[917]	train-rmse:0.877865	valid-rmse:0.817544
[918]	train-rmse:0.877752	valid-rmse:0.817465
[919]	train-rmse:0.877642	valid-rmse:0.817404
[920]	train-rmse:0.877532	valid-rmse:

[1077]	train-rmse:0.861411	valid-rmse:0.807993
[1078]	train-rmse:0.861311	valid-rmse:0.807945
[1079]	train-rmse:0.861215	valid-rmse:0.807886
[1080]	train-rmse:0.861121	valid-rmse:0.807833
[1081]	train-rmse:0.86103	valid-rmse:0.80779
[1082]	train-rmse:0.860933	valid-rmse:0.807746
[1083]	train-rmse:0.860838	valid-rmse:0.807699
[1084]	train-rmse:0.860752	valid-rmse:0.807649
[1085]	train-rmse:0.860655	valid-rmse:0.807586
[1086]	train-rmse:0.860556	valid-rmse:0.807539
[1087]	train-rmse:0.860469	valid-rmse:0.807484
[1088]	train-rmse:0.860372	valid-rmse:0.807435
[1089]	train-rmse:0.86028	valid-rmse:0.80738
[1090]	train-rmse:0.860193	valid-rmse:0.807329
[1091]	train-rmse:0.860103	valid-rmse:0.807283
[1092]	train-rmse:0.860013	valid-rmse:0.807228
[1093]	train-rmse:0.859915	valid-rmse:0.807174
[1094]	train-rmse:0.859824	valid-rmse:0.807128
[1095]	train-rmse:0.859731	valid-rmse:0.807067
[1096]	train-rmse:0.85964	valid-rmse:0.807012
[1097]	train-rmse:0.859556	valid-rmse:0.806961
[1098]	train-rmse:

[1252]	train-rmse:0.846922	valid-rmse:0.800039
[1253]	train-rmse:0.846848	valid-rmse:0.79999
[1254]	train-rmse:0.846775	valid-rmse:0.799953
[1255]	train-rmse:0.846699	valid-rmse:0.79991
[1256]	train-rmse:0.846626	valid-rmse:0.799866
[1257]	train-rmse:0.846553	valid-rmse:0.799826
[1258]	train-rmse:0.846482	valid-rmse:0.799793
[1259]	train-rmse:0.846408	valid-rmse:0.79976
[1260]	train-rmse:0.846333	valid-rmse:0.799721
[1261]	train-rmse:0.846264	valid-rmse:0.799682
[1262]	train-rmse:0.846188	valid-rmse:0.799639
[1263]	train-rmse:0.846114	valid-rmse:0.799601
[1264]	train-rmse:0.846044	valid-rmse:0.799565
[1265]	train-rmse:0.845972	valid-rmse:0.799528
[1266]	train-rmse:0.845896	valid-rmse:0.799489
[1267]	train-rmse:0.845822	valid-rmse:0.79945
[1268]	train-rmse:0.845751	valid-rmse:0.799407
[1269]	train-rmse:0.845679	valid-rmse:0.799366
[1270]	train-rmse:0.845606	valid-rmse:0.799332
[1271]	train-rmse:0.845533	valid-rmse:0.799295
[1272]	train-rmse:0.845457	valid-rmse:0.799257
[1273]	train-rmse

[1428]	train-rmse:0.834888	valid-rmse:0.793962
[1429]	train-rmse:0.834821	valid-rmse:0.793933
[1430]	train-rmse:0.834761	valid-rmse:0.793908
[1431]	train-rmse:0.834693	valid-rmse:0.793872
[1432]	train-rmse:0.834623	valid-rmse:0.793841
[1433]	train-rmse:0.834562	valid-rmse:0.793804
[1434]	train-rmse:0.834495	valid-rmse:0.793768
[1435]	train-rmse:0.834437	valid-rmse:0.793736
[1436]	train-rmse:0.83437	valid-rmse:0.7937
[1437]	train-rmse:0.8343	valid-rmse:0.79367
[1438]	train-rmse:0.834233	valid-rmse:0.793635
[1439]	train-rmse:0.834174	valid-rmse:0.793611
[1440]	train-rmse:0.834108	valid-rmse:0.793583
[1441]	train-rmse:0.834042	valid-rmse:0.793545
[1442]	train-rmse:0.833984	valid-rmse:0.793524
[1443]	train-rmse:0.833917	valid-rmse:0.79349
[1444]	train-rmse:0.833847	valid-rmse:0.793461
[1445]	train-rmse:0.833783	valid-rmse:0.793429
[1446]	train-rmse:0.833717	valid-rmse:0.793393
[1447]	train-rmse:0.83365	valid-rmse:0.793359
[1448]	train-rmse:0.833591	valid-rmse:0.793324
[1449]	train-rmse:0.8

[1604]	train-rmse:0.82451	valid-rmse:0.789459
[1605]	train-rmse:0.82446	valid-rmse:0.789439
[1606]	train-rmse:0.824406	valid-rmse:0.789417
[1607]	train-rmse:0.824353	valid-rmse:0.789395
[1608]	train-rmse:0.824301	valid-rmse:0.789378
[1609]	train-rmse:0.824248	valid-rmse:0.789364
[1610]	train-rmse:0.824196	valid-rmse:0.789338
[1611]	train-rmse:0.824147	valid-rmse:0.78932
[1612]	train-rmse:0.824093	valid-rmse:0.789298
[1613]	train-rmse:0.824044	valid-rmse:0.78928
[1614]	train-rmse:0.823994	valid-rmse:0.789255
[1615]	train-rmse:0.823942	valid-rmse:0.789237
[1616]	train-rmse:0.823891	valid-rmse:0.789208
[1617]	train-rmse:0.823838	valid-rmse:0.789188
[1618]	train-rmse:0.823791	valid-rmse:0.789171
[1619]	train-rmse:0.823738	valid-rmse:0.789155
[1620]	train-rmse:0.82369	valid-rmse:0.78914
[1621]	train-rmse:0.823637	valid-rmse:0.789118
[1622]	train-rmse:0.823586	valid-rmse:0.78909
[1623]	train-rmse:0.82353	valid-rmse:0.789073
[1624]	train-rmse:0.823479	valid-rmse:0.789052
[1625]	train-rmse:0.8

[1780]	train-rmse:0.81618	valid-rmse:0.786345
[1781]	train-rmse:0.816139	valid-rmse:0.78634
[1782]	train-rmse:0.816096	valid-rmse:0.786324
[1783]	train-rmse:0.816057	valid-rmse:0.786308
[1784]	train-rmse:0.816014	valid-rmse:0.786293
[1785]	train-rmse:0.815964	valid-rmse:0.786275
[1786]	train-rmse:0.815924	valid-rmse:0.786257
[1787]	train-rmse:0.815882	valid-rmse:0.786249
[1788]	train-rmse:0.815839	valid-rmse:0.786228
[1789]	train-rmse:0.815792	valid-rmse:0.786211
[1790]	train-rmse:0.815754	valid-rmse:0.786195
[1791]	train-rmse:0.815709	valid-rmse:0.78619
[1792]	train-rmse:0.815667	valid-rmse:0.786191
[1793]	train-rmse:0.815624	valid-rmse:0.786176
[1794]	train-rmse:0.815578	valid-rmse:0.786154
[1795]	train-rmse:0.815539	valid-rmse:0.786137
[1796]	train-rmse:0.815493	valid-rmse:0.78613
[1797]	train-rmse:0.815454	valid-rmse:0.786113
[1798]	train-rmse:0.815411	valid-rmse:0.7861
[1799]	train-rmse:0.815364	valid-rmse:0.786083
[1800]	train-rmse:0.815325	valid-rmse:0.786064
[1801]	train-rmse:0

[1956]	train-rmse:0.809169	valid-rmse:0.78419
[1957]	train-rmse:0.809129	valid-rmse:0.784177
[1958]	train-rmse:0.809096	valid-rmse:0.78418
[1959]	train-rmse:0.809052	valid-rmse:0.784178
[1960]	train-rmse:0.809015	valid-rmse:0.784162
[1961]	train-rmse:0.808984	valid-rmse:0.784149
[1962]	train-rmse:0.80894	valid-rmse:0.784147
[1963]	train-rmse:0.808908	valid-rmse:0.784133
[1964]	train-rmse:0.808868	valid-rmse:0.78414
[1965]	train-rmse:0.808829	valid-rmse:0.784128
[1966]	train-rmse:0.808796	valid-rmse:0.784114
[1967]	train-rmse:0.808752	valid-rmse:0.784112
[1968]	train-rmse:0.808719	valid-rmse:0.7841
[1969]	train-rmse:0.808676	valid-rmse:0.784098
[1970]	train-rmse:0.808644	valid-rmse:0.784084
[1971]	train-rmse:0.808601	valid-rmse:0.784088
[1972]	train-rmse:0.808562	valid-rmse:0.784076
[1973]	train-rmse:0.808531	valid-rmse:0.784063
[1974]	train-rmse:0.808488	valid-rmse:0.784063
[1975]	train-rmse:0.808455	valid-rmse:0.784066
[1976]	train-rmse:0.808417	valid-rmse:0.78406
[1977]	train-rmse:0.

[2132]	train-rmse:0.802999	valid-rmse:0.78273
[2133]	train-rmse:0.802968	valid-rmse:0.782715
[2134]	train-rmse:0.802928	valid-rmse:0.782714
[2135]	train-rmse:0.8029	valid-rmse:0.782692
[2136]	train-rmse:0.802864	valid-rmse:0.782688
[2137]	train-rmse:0.802827	valid-rmse:0.782675
[2138]	train-rmse:0.802799	valid-rmse:0.782667
[2139]	train-rmse:0.802761	valid-rmse:0.782663
[2140]	train-rmse:0.802733	valid-rmse:0.782651
[2141]	train-rmse:0.802696	valid-rmse:0.782647
[2142]	train-rmse:0.802666	valid-rmse:0.782626
[2143]	train-rmse:0.802629	valid-rmse:0.782623
[2144]	train-rmse:0.802599	valid-rmse:0.7826
[2145]	train-rmse:0.802559	valid-rmse:0.782599
[2146]	train-rmse:0.802533	valid-rmse:0.782592
[2147]	train-rmse:0.802497	valid-rmse:0.782583
[2148]	train-rmse:0.802461	valid-rmse:0.782581
[2149]	train-rmse:0.802431	valid-rmse:0.78256
[2150]	train-rmse:0.802393	valid-rmse:0.782555
[2151]	train-rmse:0.802365	valid-rmse:0.782535
[2152]	train-rmse:0.80233	valid-rmse:0.782531
[2153]	train-rmse:0.

[2308]	train-rmse:0.79743	valid-rmse:0.78126
[2309]	train-rmse:0.797406	valid-rmse:0.781253
[2310]	train-rmse:0.797369	valid-rmse:0.781243
[2311]	train-rmse:0.797342	valid-rmse:0.781232
[2312]	train-rmse:0.797317	valid-rmse:0.781219
[2313]	train-rmse:0.79728	valid-rmse:0.781218
[2314]	train-rmse:0.797255	valid-rmse:0.781212
[2315]	train-rmse:0.797221	valid-rmse:0.781199
[2316]	train-rmse:0.797196	valid-rmse:0.781184
[2317]	train-rmse:0.797166	valid-rmse:0.78122
[2318]	train-rmse:0.797131	valid-rmse:0.781208
[2319]	train-rmse:0.797104	valid-rmse:0.7812
[2320]	train-rmse:0.797067	valid-rmse:0.781189
[2321]	train-rmse:0.797042	valid-rmse:0.781181
[2322]	train-rmse:0.797018	valid-rmse:0.781173
[2323]	train-rmse:0.796993	valid-rmse:0.781165
[2324]	train-rmse:0.796959	valid-rmse:0.781164
[2325]	train-rmse:0.796933	valid-rmse:0.78115
[2326]	train-rmse:0.796899	valid-rmse:0.781137
[2327]	train-rmse:0.796875	valid-rmse:0.781123
[2328]	train-rmse:0.796851	valid-rmse:0.781115
[2329]	train-rmse:0.

[2484]	train-rmse:0.792371	valid-rmse:0.779795
[2485]	train-rmse:0.792351	valid-rmse:0.779797
[2486]	train-rmse:0.79233	valid-rmse:0.779789
[2487]	train-rmse:0.792304	valid-rmse:0.77977
[2488]	train-rmse:0.792275	valid-rmse:0.779765
[2489]	train-rmse:0.792248	valid-rmse:0.779758
[2490]	train-rmse:0.792227	valid-rmse:0.779752
[2491]	train-rmse:0.792201	valid-rmse:0.779746
[2492]	train-rmse:0.79218	valid-rmse:0.779738
[2493]	train-rmse:0.792153	valid-rmse:0.779729
[2494]	train-rmse:0.792132	valid-rmse:0.779721
[2495]	train-rmse:0.792106	valid-rmse:0.779716
[2496]	train-rmse:0.792075	valid-rmse:0.779706
[2497]	train-rmse:0.792053	valid-rmse:0.779699
[2498]	train-rmse:0.792023	valid-rmse:0.779698
[2499]	train-rmse:0.791997	valid-rmse:0.77968
[2500]	train-rmse:0.791976	valid-rmse:0.779672
[2501]	train-rmse:0.791946	valid-rmse:0.77967
[2502]	train-rmse:0.791925	valid-rmse:0.779663
[2503]	train-rmse:0.791891	valid-rmse:0.779667
[2504]	train-rmse:0.791858	valid-rmse:0.779663
[2505]	train-rmse:

[2660]	train-rmse:0.787973	valid-rmse:0.778613
[2661]	train-rmse:0.787948	valid-rmse:0.778611
[2662]	train-rmse:0.787918	valid-rmse:0.778604
[2663]	train-rmse:0.787894	valid-rmse:0.778592
[2664]	train-rmse:0.787869	valid-rmse:0.778587
[2665]	train-rmse:0.787846	valid-rmse:0.778572
[2666]	train-rmse:0.787829	valid-rmse:0.778565
[2667]	train-rmse:0.787807	valid-rmse:0.77856
[2668]	train-rmse:0.787784	valid-rmse:0.778548
[2669]	train-rmse:0.787763	valid-rmse:0.77854
[2670]	train-rmse:0.787733	valid-rmse:0.778492
[2671]	train-rmse:0.787708	valid-rmse:0.778487
[2672]	train-rmse:0.787684	valid-rmse:0.778472
[2673]	train-rmse:0.787661	valid-rmse:0.778452
[2674]	train-rmse:0.787631	valid-rmse:0.778404
[2675]	train-rmse:0.787615	valid-rmse:0.778397
[2676]	train-rmse:0.787594	valid-rmse:0.778393
[2677]	train-rmse:0.787576	valid-rmse:0.778385
[2678]	train-rmse:0.787552	valid-rmse:0.7784
[2679]	train-rmse:0.787531	valid-rmse:0.778387
[2680]	train-rmse:0.787501	valid-rmse:0.778386
[2681]	train-rmse

[2836]	train-rmse:0.78429	valid-rmse:0.777737
[2837]	train-rmse:0.784271	valid-rmse:0.777723
[2838]	train-rmse:0.784253	valid-rmse:0.777717
[2839]	train-rmse:0.784236	valid-rmse:0.777709
[2840]	train-rmse:0.784219	valid-rmse:0.777702
[2841]	train-rmse:0.784199	valid-rmse:0.777706
[2842]	train-rmse:0.784177	valid-rmse:0.777693
[2843]	train-rmse:0.78416	valid-rmse:0.777685
[2844]	train-rmse:0.784141	valid-rmse:0.777672
[2845]	train-rmse:0.784122	valid-rmse:0.77772
[2846]	train-rmse:0.784102	valid-rmse:0.777723
[2847]	train-rmse:0.784084	valid-rmse:0.777711
[2848]	train-rmse:0.784067	valid-rmse:0.777707
[2849]	train-rmse:0.78405	valid-rmse:0.7777
[2850]	train-rmse:0.784029	valid-rmse:0.777688
[2851]	train-rmse:0.784006	valid-rmse:0.777685
[2852]	train-rmse:0.783986	valid-rmse:0.777681
[2853]	train-rmse:0.783968	valid-rmse:0.777667
[2854]	train-rmse:0.783951	valid-rmse:0.777658
[2855]	train-rmse:0.78393	valid-rmse:0.777644
[2856]	train-rmse:0.783912	valid-rmse:0.777638
[2857]	train-rmse:0.

[3012]	train-rmse:0.781022	valid-rmse:0.77722
[3013]	train-rmse:0.781	valid-rmse:0.777223
[3014]	train-rmse:0.780986	valid-rmse:0.777228
[3015]	train-rmse:0.780969	valid-rmse:0.777209
[3016]	train-rmse:0.780953	valid-rmse:0.777221
[3017]	train-rmse:0.780935	valid-rmse:0.777211
[3018]	train-rmse:0.78092	valid-rmse:0.777211
[3019]	train-rmse:0.780895	valid-rmse:0.777215
[3020]	train-rmse:0.780879	valid-rmse:0.777209
[3021]	train-rmse:0.78086	valid-rmse:0.777211
[3022]	train-rmse:0.780847	valid-rmse:0.777207
[3023]	train-rmse:0.780825	valid-rmse:0.777199
[3024]	train-rmse:0.780809	valid-rmse:0.777199
[3025]	train-rmse:0.780793	valid-rmse:0.777212
[3026]	train-rmse:0.780772	valid-rmse:0.777204
[3027]	train-rmse:0.780758	valid-rmse:0.777217
[3028]	train-rmse:0.780742	valid-rmse:0.777231
[3029]	train-rmse:0.780726	valid-rmse:0.77723
[3030]	train-rmse:0.780711	valid-rmse:0.777222
[3031]	train-rmse:0.78069	valid-rmse:0.777224
[3032]	train-rmse:0.780673	valid-rmse:0.777215
[3033]	train-rmse:0.7

[3188]	train-rmse:0.777954	valid-rmse:0.776458
[3189]	train-rmse:0.777939	valid-rmse:0.776454
[3190]	train-rmse:0.777921	valid-rmse:0.776458
[3191]	train-rmse:0.777907	valid-rmse:0.776453
[3192]	train-rmse:0.777887	valid-rmse:0.776446
[3193]	train-rmse:0.777869	valid-rmse:0.776443
[3194]	train-rmse:0.777851	valid-rmse:0.776446
[3195]	train-rmse:0.777837	valid-rmse:0.776439
[3196]	train-rmse:0.777819	valid-rmse:0.776436
[3197]	train-rmse:0.777795	valid-rmse:0.77643
[3198]	train-rmse:0.777778	valid-rmse:0.776428
[3199]	train-rmse:0.777764	valid-rmse:0.776424
[3200]	train-rmse:0.777744	valid-rmse:0.776429
[3201]	train-rmse:0.777726	valid-rmse:0.776425
[3202]	train-rmse:0.777712	valid-rmse:0.776422
[3203]	train-rmse:0.777695	valid-rmse:0.77642
[3204]	train-rmse:0.777677	valid-rmse:0.776424
[3205]	train-rmse:0.77766	valid-rmse:0.776432
[3206]	train-rmse:0.777644	valid-rmse:0.776418
[3207]	train-rmse:0.77763	valid-rmse:0.776413
[3208]	train-rmse:0.777617	valid-rmse:0.776418
[3209]	train-rmse

In [20]:
pickle.dump(xg_model, open( "pickled/xg_model", "wb"), protocol=4)

#xg_model = pickle.load( open( "pickled/xg_model", "rb" ) )

In [21]:
lg_features = best_cb_features


gc.collect()
lgtrain = lgbm.Dataset(x_train[lg_features], label=y_train)
lgval = lgbm.Dataset(x_val[lg_features], label=y_val)



#[0.00542047893814942, 29, 24, 0.39949465609514856, 1, 0.67943500, 10]
params = {
        "num_threads": 16,
        "device": "gpu",
        "verbosity": -1,
        #"zero_as_missing": "true",
        "boosting":'gbdt',
        "objective" : "regression",
        "metric" : "rmse",
        "seed": 42,
        #"max_bin": 10,#default 255
        #"num_leaves": 10, #default 31
        #"bagging_fraction": 0.7,
        #"bagging_freq": 1,
        #"min_data_in_leaf": 50000,
        #"feature_fraction": 0.7,
        #"lambda_l2": 3,
        #"max_depth": 2,
        #"min_gain_to_split": 10,
        "learning_rate" : 0.001,
        #"histogram_pool_size": 1000,
        #"categorical_column": [0,1,2,3,4]
}

evals_result = {}
model_lgb = lgbm.train(params, lgtrain, 20000, 
                      valid_sets=[lgval], 
                      early_stopping_rounds=100, 
                      verbose_eval=10, 
                      evals_result=evals_result)



Training until validation scores don't improve for 100 rounds.
[10]	valid_0's rmse: 0.900989
[20]	valid_0's rmse: 0.89913
[30]	valid_0's rmse: 0.897319
[40]	valid_0's rmse: 0.895552
[50]	valid_0's rmse: 0.893811
[60]	valid_0's rmse: 0.892121
[70]	valid_0's rmse: 0.890454
[80]	valid_0's rmse: 0.888839
[90]	valid_0's rmse: 0.887264
[100]	valid_0's rmse: 0.885691
[110]	valid_0's rmse: 0.884163
[120]	valid_0's rmse: 0.882629
[130]	valid_0's rmse: 0.88112
[140]	valid_0's rmse: 0.87962
[150]	valid_0's rmse: 0.87816
[160]	valid_0's rmse: 0.876703
[170]	valid_0's rmse: 0.875295
[180]	valid_0's rmse: 0.873887
[190]	valid_0's rmse: 0.872521
[200]	valid_0's rmse: 0.871154
[210]	valid_0's rmse: 0.869823
[220]	valid_0's rmse: 0.868482
[230]	valid_0's rmse: 0.867166
[240]	valid_0's rmse: 0.865872
[250]	valid_0's rmse: 0.864607
[260]	valid_0's rmse: 0.863334
[270]	valid_0's rmse: 0.862117
[280]	valid_0's rmse: 0.860869
[290]	valid_0's rmse: 0.859629
[300]	valid_0's rmse: 0.858423
[310]	valid_0's rmse

[2600]	valid_0's rmse: 0.777565
[2610]	valid_0's rmse: 0.777435
[2620]	valid_0's rmse: 0.777323
[2630]	valid_0's rmse: 0.777199
[2640]	valid_0's rmse: 0.777106
[2650]	valid_0's rmse: 0.776995
[2660]	valid_0's rmse: 0.776876
[2670]	valid_0's rmse: 0.776831
[2680]	valid_0's rmse: 0.776716
[2690]	valid_0's rmse: 0.776609
[2700]	valid_0's rmse: 0.776497
[2710]	valid_0's rmse: 0.776411
[2720]	valid_0's rmse: 0.776331
[2730]	valid_0's rmse: 0.776217
[2740]	valid_0's rmse: 0.776096
[2750]	valid_0's rmse: 0.775994
[2760]	valid_0's rmse: 0.775902
[2770]	valid_0's rmse: 0.775794
[2780]	valid_0's rmse: 0.77575
[2790]	valid_0's rmse: 0.775642
[2800]	valid_0's rmse: 0.77558
[2810]	valid_0's rmse: 0.775519
[2820]	valid_0's rmse: 0.775566
[2830]	valid_0's rmse: 0.775547
[2840]	valid_0's rmse: 0.775493
[2850]	valid_0's rmse: 0.775443
[2860]	valid_0's rmse: 0.775468
[2870]	valid_0's rmse: 0.775472
[2880]	valid_0's rmse: 0.775527
[2890]	valid_0's rmse: 0.775497
[2900]	valid_0's rmse: 0.775519
[2910]	val

In [22]:
pickle.dump(model_lgb, open( "pickled/model_lgb", "wb"), protocol=4)

#model_lgb = pickle.load( open( "pickled/model_lgb", "rb" ) )

In [47]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
#x_train[cb_features].sample(10)
training.dtypes

item_id                                     int16  
shop_id                                     uint8  
date_block_num                              uint8  
item_category_id                            uint8  
month                                       uint8  
year                                        uint16 
item_first_block                            uint8  
item_last_block                             uint8  
is_first_two_blocks                         object 
is_last_two_blocks                          object 
item_units                                  float32
item_mean_units_block                       float32
item_day_units                              int16  
item_mean_units_day                         float32
item_max_units_block                        int16  
item_min_units_block                        int16  
item_max_units_day                          int16  
item_min_units_day                          int8   
item_turnover                               int32  
item_mean_tu

In [39]:
test = test.set_index('item_id').join(items.set_index('item_id'))
test['date_block_num'] = 34

In [36]:
transactions_items = pd.read_pickle("pickled/transactions_items")
transactions_items_dtypes = transactions_items.dtypes
test = pd.merge(test, transactions_items[transactions_items_columns].drop_duplicates('item_id'), on=['item_id'], how='left', copy=False)

del transactions_items
fillnas(test, transactions_items_columns, transactions_items_dtypes)
downcast(test, transactions_items_columns, transactions_items_dtypes)
gc.collect()

28

In [38]:
test.head()

Unnamed: 0,ID,shop_id,item_id,item_first_block,item_last_block,is_first_two_blocks,is_last_two_blocks,item_units,item_mean_units_block,item_day_units,...,item_price_amplitude,item_deviation_mean_category_price,item_first_two_blocks_units,item_last_two_blocks_units,item_fluctuation_units_first_last_blocks,item_first_two_blocks_mean_price,item_last_two_blocks_mean_price,item_fluctuation_price_first_last_blocks,item_share_of_total_units,item_share_of_total_turnover
0,0,5,5037,20,33,False,False,1270.0,126.292007,5,...,246.996002,27.636459,127.0,135.0,6.299212,2570.850342,1077.921509,-58.0714,0.060897,0.110578
1,1,5,5320,0,0,,,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,5,5233,27,33,False,False,507.0,99.613335,1,...,100.166946,-46.963413,190.0,122.0,-35.789474,703.52301,901.857117,28.191566,0.024311,0.018165
3,3,5,5232,31,33,False,False,141.0,50.446156,7,...,100.166946,-45.054775,76.0,76.0,-0.0,895.618408,895.618408,-0.0,0.006761,0.004957
4,4,5,5268,0,0,,,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
transactions_items_blocks = pd.read_pickle("pickled/transactions_items_blocks")
transactions_items_blocks_dtypes = transactions_items_blocks.dtypes

test = pd.merge(test, transactions_items_blocks[transactions_items_blocks_columns]\
                    .drop_duplicates(['item_id', 'date_block_num']), on=['item_id','date_block_num'], how='left', copy=False)

del transactions_items_blocks
test.fillna(0, inplace=True)
downcast(test, transactions_items_blocks_columns, transactions_items_blocks_dtypes)
gc.collect()

11238

In [41]:
transactions_categories = pd.read_pickle("pickled/transactions_categories")
transactions_categories_dtypes = transactions_categories.dtypes
test = pd.merge(test, transactions_categories[transactions_categories_columns]\
                    .drop_duplicates('item_category_id'), on=['item_category_id'], how='left', copy=False)

del transactions_categories
fillnas(test, transactions_categories_columns, transactions_categories_dtypes)
downcast(test, transactions_categories_columns, transactions_categories_dtypes)
gc.collect()

14

In [42]:
transactions_categories_blocks = pd.read_pickle("pickled/transactions_categories_blocks")
transactions_categories_blocks_dtypes = transactions_categories_blocks.dtypes
test = pd.merge(test, transactions_categories_blocks[transactions_categories_blocks_columns]\
                    .drop_duplicates(['item_category_id', 'date_block_num']), on=['item_category_id', 'date_block_num'], how='left', copy=False)

del transactions_categories_blocks
fillnas(test, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
downcast(test, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
gc.collect()

14

In [43]:
transactions_shops = pd.read_pickle("pickled/transactions_shops")
transactions_shops_dtypes = transactions_shops.dtypes
test = pd.merge(test, transactions_shops[transactions_shops_columns]\
                    .drop_duplicates('shop_id'), on=['shop_id'], how='left', copy=False)

del transactions_shops
fillnas(test, transactions_shops_columns, transactions_shops_dtypes)
downcast(test, transactions_shops_columns, transactions_shops_dtypes)
gc.collect()

14

In [45]:
transactions_shops_blocks = pd.read_pickle("pickled/transactions_shops_blocks")
transactions_shops_blocks_dtypes = transactions_shops_blocks.dtypes
test = pd.merge(test, transactions_shops_blocks[transactions_shops_blocks_columns]\
                    .drop_duplicates(['shop_id', 'date_block_num']), on=['shop_id', 'date_block_num'], how='left', copy=False)

del transactions_shops_blocks
fillnas(test, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
downcast(test, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
gc.collect()

14

In [46]:
transactions_shops_categories = pd.read_pickle("pickled/transactions_shops_categories")
transactions_shops_categories_dtypes = transactions_shops_categories.dtypes
test = pd.merge(test, transactions_shops_categories[transactions_shops_categories_columns]\
                    .drop_duplicates(['shop_id','item_category_id']), on=['shop_id','item_category_id'], how='left', copy=False)

del transactions_shops_categories
fillnas(test, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
downcast(test, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
gc.collect()

14

In [44]:
transactions_shops_categories_blocks = pd.read_pickle("pickled/transactions_shops_categories_blocks")
transactions_shops_categories_blocks_dtypes = transactions_shops_categories_blocks.dtypes
test = pd.merge(test, transactions_shops_categories_blocks[transactions_shops_categories_blocks_columns]\
                    .drop_duplicates(['shop_id','item_category_id','date_block_num']),\
                    on=['shop_id','item_category_id','date_block_num'], how='left', copy=False)

del transactions_shops_categories_blocks
fillnas(test, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
downcast(test, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
gc.collect()

14

In [49]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block',
 ]

idx_columns = ['item_id']

test = add_lag_features(test,lag_columns,idx_columns)

1
2
3


In [51]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block',
 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block',
 ]

idx_columns = ['item_category_id']

test = add_lag_features(test,lag_columns,idx_columns)

1
2
3


In [52]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block',
 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block'
 ]

idx_columns = ['shop_id']


test = add_lag_features(test,lag_columns,idx_columns)

1
2
3


In [3]:
#gc.collect()
#training.to_pickle("pickled/training_mid_lags")
#training = pd.read_pickle("pickled/training_mid_lags")

In [53]:
lag_columns = [
  'shop_category_block_units',
 'shop_category_turnover',
 'shop_category_mean_price_block'
 ]

idx_columns = ['shop_id','item_category_id']


test = add_lag_features(test,lag_columns,idx_columns)

1
2
3


In [54]:
test.drop(columns=['lagged_block'],inplace=True)

In [60]:
for i in range(1,13):
    test[str(i)] = False

test['11'] = True


In [83]:
cb_preds = cb_model.predict(test[cb_features])
cb_preds.clip(0,20,out=cb_preds)

array([0.3875993 , 0.31189891, 0.50810608, ..., 0.48477869, 0.48491959,
       0.47320142])

In [73]:
lg_preds = model_lgb.predict(test[lg_features])
lg_preds.clip(0,20,out=lg_preds)

array([0.33054925, 0.13510253, 0.42999917, ..., 0.40670206, 0.39667443,
       0.38160782])

In [74]:

xg_preds = xg_model.predict(xgb.DMatrix(test[xg_features]))
xg_preds.clip(0,20,out=xg_preds)

array([0.37465316, 0.21515614, 0.47788906, ..., 0.4456131 , 0.40560868,
       0.45044884], dtype=float32)

In [81]:
preds = np.mean(np.array([cb_preds,lg_preds,xg_preds]),axis=0)

In [84]:
submission = test.loc[:,['ID']]
submission['item_cnt_month'] = cb_preds.astype(int)
#submission['item_cnt_month'] = ensemble_preds.astype(int)


submission.to_csv('submission.csv', index=False)