In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline 
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz
import matplotlib.pyplot as plt
import seaborn as sns

import lightgbm as lgbm
import gc
import xgboost as xgb
import pickle as pickle


from catboost import CatBoostRegressor


In [3]:
items           = pd.read_csv('items.csv',usecols=["item_id", "item_category_id"])
item_categories = pd.read_csv('item_categories.csv')
shops           = pd.read_csv('shops.csv')
sales_train     = pd.read_csv('sales_train.csv.gz')
test            = pd.read_csv('test.csv.gz')

In [4]:
items['item_id'] = pd.to_numeric(items['item_id'],downcast='unsigned')
items['item_category_id'] = pd.to_numeric(items['item_category_id'],downcast='unsigned')

In [5]:
sales_train['date'] = sales_train['date'].astype('category')
sales_train['date_block_num'] = pd.to_numeric(sales_train['date_block_num'],downcast='unsigned')
sales_train['shop_id'] = pd.to_numeric(sales_train['shop_id'],downcast='unsigned')
sales_train['item_price'] = sales_train['item_price'].astype('int')
sales_train['item_price'] = pd.to_numeric(sales_train['item_price'],downcast='unsigned')
sales_train['item_cnt_day'] = pd.to_numeric(sales_train['item_cnt_day'],downcast='signed')


In [6]:
transactions = sales_train
transactions[['day','month', 'year']] = transactions['date'].str.split('.', expand=True).astype(int)
transactions['day'] = pd.to_numeric(transactions['day'],downcast='unsigned')
transactions['month'] = pd.to_numeric(transactions['month'],downcast='unsigned')
transactions['year'] = pd.to_numeric(transactions['year'],downcast='unsigned')


In [7]:
transactions = transactions.set_index('item_id').join(items.set_index('item_id'))
transactions.reset_index(inplace=True)
transactions = transactions[transactions['year'] != 2013]
transactions['y'] = pd.to_numeric(transactions.groupby(['date_block_num', 'item_id', 'shop_id'])['item_cnt_day'].transform('sum')\
                .clip(0,20), downcast='unsigned')

In [8]:
transactions['y'].dtype

dtype('uint8')

In [9]:
len(transactions)

1668287

In [10]:
transactions['turnover'] = transactions['item_price'] * transactions['item_cnt_day']
transactions['turnover'] = pd.to_numeric(transactions['turnover'], downcast='unsigned')

In [11]:
transactions['item_first_block'] = pd.to_numeric(transactions.groupby('item_id')['date_block_num'].transform(np.min), downcast='unsigned')
transactions['item_last_block'] = pd.to_numeric(transactions.groupby('item_id')['date_block_num'].transform(np.max), downcast='unsigned')

transactions['is_first_two_blocks'] = \
                    transactions['date_block_num'].isin([transactions['item_first_block']+1,transactions['item_first_block']+2])


transactions['is_last_two_blocks'] = \
                transactions['date_block_num'].isin([transactions['item_last_block']-1,transactions['item_last_block']])
                                

In [12]:
number_of_items = transactions['item_id'].nunique()
print("number_of_items:", number_of_items)
number_of_categories = transactions['item_category_id'].nunique()
print("number_of_categories:", number_of_categories)
number_of_shops = transactions['shop_id'].nunique()
print("number_of_shops:", number_of_shops)
number_of_days = 365 + 365 - 30 - 31
print("number_of_days:", number_of_days)
number_of_blocks = transactions['date_block_num'].nunique()
print("number_of_blocks:", number_of_blocks)
total_sales = transactions['item_cnt_day'].sum()
print("total_sales:", total_sales)
total_turnover = transactions['turnover'].sum()
print("total_turnover:", total_turnover)
average_price = transactions['item_price'].mean()
print("average_price:", average_price)

number_of_items: 17054
number_of_categories: 79
number_of_shops: 55
number_of_days: 669
number_of_blocks: 22
total_sales: 2085473
total_turnover: 2181307117
average_price: 1015.4701882829513


#ITEM

-UNITS
item_units
item_block_units
item_mean_units_block
item_day_units
item_mean_units_day
item_max_units_block
item_min_units_block
item_max_units_day
item_min_units_day

-TURNOVER
item_turnover
item_block_turnover
item_mean_turnover_block
item_day_turnover
item_mean_turnover_day
item_max_turnover_block
item_min_turnover_block
item_max_turnover_day
item_min_turnover_day


-TIME
item_days_of_activity
item_blocks_of_activity
item_mean_day_between_activity
item_longest_stretch_days_without_activity
item_longest_stretch_blocks_without_activity
item_longest_stretch_block_with_activity
item_number_of_consecutive_days_with_activity
item_days_between_start_and_first_activity
item_blocks_between_start_and_first_activity
item_first_block
item_last_block
item_first_day
item_last_day
item_activity_on_all_blocks


-PRICE
item_mean_price
item_mean_price_block
item_min_price
item_max_price
item_number_different_prices
item_price_amplitude (%age min/max)
item_deviation_mean_category_price


-TREND
is_first_two_full_blocks (actually second/third to make sure we have a "full" block if this was a new release !!!!
is_last_two_blocks
item_first_two_blocks_units
item_last_two_blocks_units
item_fluctuation_units_first_last_blocks
item_first_two_blocks_mean_price
item_last_two_blocks_mean_price
item_fluctuation_price_first_last_blocks

-ENCODINGS
item_share_of_total_units
item_share_of_total_gross
item_share_of_category_units
item_share_of_category_turnover

In [13]:
gc.collect()
transactions_items = transactions.copy()
transactions_items_blocks = transactions.copy()

In [14]:
transactions_items_blocks['item_block_units'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')    
transactions_items_blocks['item_block_turnover'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')    
transactions_items_blocks['item_mean_price_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float')    

In [15]:
transactions_items['item_units'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.mean), downcast='float') 
transactions_items['item_day_units'] = pd.to_numeric(transactions_items.groupby(['item_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.mean), downcast='float') 
transactions_items['item_max_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_units_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_units'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_units_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_units'].transform(np.min), downcast='unsigned') 

In [16]:
transactions_items['item_turnover'] = pd.to_numeric(transactions_items.groupby(['item_id'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.mean), downcast='float') 
transactions_items['item_day_turnover'] = pd.to_numeric(transactions_items.groupby(['item_id','date'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_mean_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['turnover'].transform(np.mean), downcast='float') 
transactions_items['item_max_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_turnover_block'] = pd.to_numeric(transactions_items_blocks.groupby(['item_id'])['item_block_turnover'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_turnover'].transform(np.max), downcast='unsigned') 
transactions_items['item_min_turnover_day'] = pd.to_numeric(transactions_items.groupby(['item_id'])['item_day_turnover'].transform(np.min), downcast='unsigned') 

In [17]:
transactions_items['item_days_of_activity'] = pd.to_numeric(transactions_items.groupby(['item_id'])['date'].transform("nunique"), downcast='unsigned') 
transactions_items['item_blocks_of_activity'] = pd.to_numeric(transactions_items.groupby(['item_id'])['date_block_num'].transform("nunique"), downcast='unsigned') 

def get_number_of_days_since_start(day,month, year):
    days = 0
    if year == 2015:
        days = 365
    def is_even(num):
        return num % 2 == 0
    half_of_month = int(month/2)
    even = (30*half_of_month) + (31*half_of_month)
    if is_even(month):
        days = days + even - 30 - day
    else:
        days = days + even + day
    return days

transactions_items['item_days_since_start'] = pd.to_numeric(transactions_items.apply(lambda row: get_number_of_days_since_start(row['day'],row['month'], row['year']),axis=1), downcast='unsigned') 

def get_average_days_between_sales(days):
    days = sorted(np.unique(days))
    if len(days) == 0:
        return 9999
    if len(days) == 1:
        return 999
    return np.mean(np.ediff1d(days)) / len(days)

average_days_between_sales = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_average_days_between_sales(x))

transactions_items['item_mean_day_between_activity'] = pd.to_numeric(transactions_items['item_id'].map(average_days_between_sales), downcast='unsigned') 


def get_max_stretch_without_sales_days(days):
    days = np.unique(days)
    max_stretch = 0
    len_days = len(days)
    for index,day in enumerate(sorted(days)):
        if index == len_days - 1:
            return max_stretch
        next_day = days[index+1]
        stretch = next_day - day
        if stretch > max_stretch:
            max_stretch = stretch
            

        
max_stretch_without_sales_day = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_max_stretch_without_sales_days(x))

transactions_items['item_longest_stretch_days_without_activity'] = pd.to_numeric(transactions_items['item_id'].map(max_stretch_without_sales_day), downcast='unsigned') 

In [18]:
gc.collect()

def get_max_stretch_without_sales_block(blocks):
    blocks = np.unique(blocks)
    max_stretch = 0
    len_blocks = len(blocks)
    for index,block in enumerate(sorted(blocks)):
        if index == len_blocks - 1:
            return max_stretch
        next_block = blocks[index+1]
        stretch = next_block - block
        if stretch > max_stretch:
            max_stretch = stretch
            

        
item_longest_stretch_blocks_without_activity = transactions_items.groupby(['item_id'])['date_block_num']\
                                    .apply(list).apply(lambda x: get_max_stretch_without_sales_block(x))

transactions_items['item_longest_stretch_blocks_without_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_longest_stretch_blocks_without_activity), downcast='unsigned') 



def get_longest_stretch(following_pairs, n=1,new_n=1):
    #print("following_pairs", following_pairs, " n: ", n, " new_n: ", new_n)
    len_pairs = len(following_pairs)
    if len_pairs == 0:
        return 0
    if len_pairs == 1:
        if new_n > n:
            return new_n
        return n
    if following_pairs[1][0] == following_pairs[0][1]:
        new_n+=1
    else:
        if new_n > n:
            n=new_n
        new_n=1
    return get_longest_stretch(following_pairs[1:], n,new_n)


assert(get_longest_stretch([]) == 0)
assert(get_longest_stretch([[1, 2], [2,3], [3, 4], [4,5] ,[8,9], [11, 12]]) == 4)
assert(get_longest_stretch([[-1, 0],[1, 2], [2,3], [3, 4], [4,5] ,[8,9], [11, 12]]) == 4)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[20, 21], [25,26]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[14, 15], [15,16]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5] ,[8,9], [9,10],[10, 11],[14, 15], [15,16],[18,19] ,[22,23], [23,24],[24, 25]]) == 3)
assert(get_longest_stretch([[1, 2], [4,5], [7, 8]]) == 1)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [15, 16]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [15, 16], [16, 17]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [14, 15], [15, 16], [16, 17]]) == 3)
assert(get_longest_stretch([[1, 2], [5, 6], [6, 7], [7,8], [14, 15], [20, 21], [21, 22], [22,23],[23,24]]) == 4)


def get_following_pairs(pairs):
    pairs = np.unique(pairs)
    len_pairs = len(pairs)
    following = []
    for index,pair in enumerate(sorted(pairs)):
        if index == len_pairs - 1:
            return following
        next_pair = pairs[index+1]
        if next_pair == pair + 1:
            following.append([pair, next_pair])
        
assert(get_following_pairs([1,2,5,6,7,8,9,11,12,15]) == [[1, 2], [5, 6], [6, 7], [7, 8], [8, 9], [11, 12]])
assert(get_following_pairs([1,2,5,6,7,10]) == [[1, 2], [5, 6], [6, 7]])
assert(get_following_pairs([1,2,4,5,7,9,10]) == [[1, 2], [4, 5], [9,10]])
assert(get_following_pairs([1,2,4,5,7,9,10,11,12,15]) == [[1, 2], [4, 5], [9,10],[10,11],[11,12]])


item_longest_stretch_block_with_activity = transactions_items.groupby(['item_id'])['date_block_num']\
                                    .apply(list).apply(lambda x: get_longest_stretch(get_following_pairs(x)))

transactions_items['item_longest_stretch_block_with_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_longest_stretch_block_with_activity), downcast='unsigned') 


item_number_of_consecutive_days_with_activity = transactions_items.groupby(['item_id'])['item_days_since_start']\
                                    .apply(list).apply(lambda x: len(get_following_pairs(x)))
    
transactions_items['item_number_of_consecutive_days_with_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_number_of_consecutive_days_with_activity), downcast='unsigned') 

In [19]:
def get_units_between_first_and_last(units):
    return np.max(units) - np.min(units)

item_days_between_start_and_first_activity = transactions_items.groupby(['item_id'])['item_days_since_start'].apply(list).apply(lambda x: get_units_between_first_and_last(x))
transactions_items['item_days_between_start_and_first_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_days_between_start_and_first_activity), downcast='unsigned') 

item_blocks_between_start_and_first_activity = transactions_items.groupby(['item_id'])['date_block_num'].apply(list).apply(lambda x: get_units_between_first_and_last(x))
transactions_items['item_blocks_between_start_and_first_activity'] = pd.to_numeric(transactions_items['item_id'].map(item_blocks_between_start_and_first_activity), downcast='unsigned') 

In [20]:

transactions_items['item_first_day'] = pd.to_numeric(transactions_items.groupby('item_id')['item_days_since_start'].transform(np.min), downcast='unsigned') 
transactions_items['item_last_day'] = pd.to_numeric(transactions_items.groupby('item_id')['item_days_since_start'].transform(np.max), downcast='unsigned') 

item_activity_on_all_blocks = transactions_items.groupby('item_id')['date_block_num'].nunique().apply(lambda x: x==number_of_blocks)
transactions_items['item_activity_on_all_blocks'] = transactions_items['item_id'].map(item_activity_on_all_blocks)

In [21]:
transactions_items['item_mean_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.mean), downcast='float') 
transactions_items['item_min_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.min), downcast='unsigned') 
transactions_items['item_max_price'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform(np.max), downcast='unsigned') 
transactions_items['item_number_different_prices'] = pd.to_numeric(transactions_items.groupby('item_id')['item_price'].transform('nunique'), downcast='unsigned') 
transactions_items['item_price_amplitude'] = pd.to_numeric(((transactions_items['item_max_price'] - transactions_items['item_min_price'] ) / transactions_items['item_min_price']) * 100, downcast='float') 
transactions_items['category_mean_price'] = pd.to_numeric(transactions_items.groupby('item_category_id')['item_price'].transform(np.mean), downcast='float') 
transactions_items['item_deviation_mean_category_price'] =  pd.to_numeric(((transactions_items['item_mean_price'] - transactions_items['category_mean_price'] ) / transactions_items['category_mean_price']) * 100, downcast='float') 

In [22]:
item_first_two_blocks_units = transactions_items.groupby(['item_id','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
item_first_two_blocks_units = item_first_two_blocks_units[item_first_two_blocks_units['is_first_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_first_two_blocks_units'] = pd.to_numeric(transactions_items['item_id'].map(item_first_two_blocks_units), downcast='unsigned') 

item_last_two_blocks_units = transactions_items.groupby(['item_id','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
item_last_two_blocks_units = item_last_two_blocks_units[item_last_two_blocks_units['is_last_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_last_two_blocks_units'] = pd.to_numeric(transactions_items['item_id'].map(item_last_two_blocks_units), downcast='unsigned') 

transactions_items['item_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_items['item_first_two_blocks_units'] - transactions_items['item_last_two_blocks_units'] ) / \
                                                             transactions_items['item_first_two_blocks_units']) * 100 * -1, downcast='float') 


item_first_two_blocks_mean_price = transactions_items.groupby(['item_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
item_first_two_blocks_mean_price = item_first_two_blocks_mean_price[item_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_first_two_blocks_mean_price'] = pd.to_numeric(transactions_items['item_id'].map(item_first_two_blocks_mean_price), downcast='unsigned') 

item_last_two_blocks_mean_price = transactions_items.groupby(['item_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
item_last_two_blocks_mean_price = item_last_two_blocks_mean_price[item_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('item_id').iloc[:,1]
transactions_items['item_last_two_blocks_mean_price'] = pd.to_numeric(transactions_items['item_id'].map(item_last_two_blocks_mean_price), downcast='unsigned') 

transactions_items['item_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_items['item_first_two_blocks_mean_price'] - transactions_items['item_last_two_blocks_mean_price'] ) / \
                                                             transactions_items['item_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [23]:
transactions_items['item_share_of_total_units'] = pd.to_numeric(transactions_items['item_units'] * 100 / total_sales , downcast='float') 

transactions_items['item_share_of_total_turnover'] = pd.to_numeric(transactions_items['item_turnover'] * 100 / total_turnover, downcast='float') 

transactions_items['category_units'] = pd.to_numeric(transactions_items.groupby('item_category_id')['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_items['item_share_of_category_units'] = pd.to_numeric(transactions_items['item_units'] * 100 / transactions_items['category_units'], downcast='float') 

transactions_items['category_turnover'] = pd.to_numeric(transactions_items.groupby('item_category_id')['turnover'].transform(np.sum), downcast='unsigned') 
transactions_items['item_share_of_category_turnover'] = pd.to_numeric(transactions_items['item_turnover'] * 100 / transactions_items['category_turnover'], downcast='float') 


In [24]:
transactions_items.to_pickle("pickled/transactions_items")
transactions_items_blocks.to_pickle("pickled/transactions_items_blocks")

del transactions_items
del transactions_items_blocks
gc.collect()

336

#CATEGORY

-UNITS
category_units
category_block_units
category_mean_units_block
category_day_units
category_mean_units_day
category_max_units_block
category_min_units_block
category_max_units_day
category_min_units_day

-TURNOVER
category_turnover
category_block_turnover
category_mean_turnover_block
category_day_turnover
category_mean_turnover_day
category_max_turnover_block
category_min_turnover_block
category_max_turnover_day
category_min_turnover_day


-PRICE
category_mean_price
category_mean_price_block
category_min_price
category_max_price


-TREND
category_first_two_blocks_units
category_last_two_blocks_units
category_fluctuation_units_first_last_blocks
category_first_two_blocks_mean_price
category_last_two_blocks_mean_price
category_fluctuation_price_first_last_blocks

-SUBCATEGORY
subcategory
subcategory 1hot

-UNITS
subcategory_units
subcategory_block_units
subcategory_mean_units_block
subcategory_day_units
subcategory_mean_units_day
subcategory_max_units_block
subcategory_min_units_block
subcategory_max_units_day
subcategory_min_units_day

-TURNOVER
subcategory_turnover
subcategory_block_turnover
subcategory_mean_turnover_block
subcategory_day_turnover
subcategory_mean_turnover_day
subcategory_max_turnover_block
subcategory_min_turnover_block
subcategory_max_turnover_day
subcategory_min_turnover_day

-ENCODINGS
category_share_of_total_units
category_share_of_total_gross
subcategory_share_of_total_units
subcategory_share_of_total_gross

-TREND
subcategory_first_two_blocks_units
subcategory_last_two_blocks_units
subcategory_fluctuation_units_first_last_blocks
subcategory_first_two_blocks_mean_price
subcategory_last_two_blocks_mean_price
subcategory_fluctuation_price_first_last_blocks

In [25]:
gc.collect()
transactions_categories = transactions.copy()
transactions_categories_blocks = transactions.copy()

In [26]:
sub_cats = {}
for i in range(1,8):
    sub_cats[i] = "Accessories"
sub_cats[8] = "Tickets"
sub_cats[9] = "Delivery of goods"
for i in range(10,18):
    sub_cats[i] = "Consoles"
for i in range(18,25):
    sub_cats[i] = "Game for Consoles"
sub_cats[25] = "Accessories for Games"
sub_cats[26] = "Android Games"
sub_cats[27] = "MAC Games"
for i in range(28,32):
    sub_cats[i] = "PC Games"
for i in range(32,37):
    sub_cats[i] = "Payment Cards"
for i in range(37,40):
    sub_cats[i] = "Cinema - Blu-ray"
sub_cats[40] = "Cinema - DVD"
sub_cats[41] = "Cinema - Collectible"
for i in range(42,46):
    sub_cats[i] = "Audiobooks"
for i in range(46,55):
    sub_cats[i] = "Books"
for i in range(55,57):
    sub_cats[i] = "Music - CD"
sub_cats[57] = "Music - MP3"
sub_cats[58] = "Music - Vinyl"
sub_cats[59] = "Music - Music Video"
sub_cats[60] = "Music - Gift Edition"
for i in range(61,74):
    sub_cats[i] = "Gifts"
for i in range(73,79):
    sub_cats[i] = "Software"
sub_cats[79] = "Utility"
for i in range(80,84):
    sub_cats[i] = "Misc"
    
    
transactions_categories['subcategory'] = transactions_categories['item_category_id'].apply(lambda x: sub_cats[x]).astype('category')
transactions_categories_blocks['subcategory'] = transactions_categories_blocks['item_category_id'].apply(lambda x: sub_cats[x]).astype('category')

In [27]:
transactions_categories_blocks['category_block_units'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['category_block_turnover'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['category_mean_price_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 
transactions_categories_blocks['subcategory_block_units'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['subcategory_block_turnover'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories_blocks['subcategory_mean_price_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 


In [28]:
transactions_categories['category_units'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.mean), downcast='float') 
transactions_categories['category_day_units'] = pd.to_numeric(transactions_categories.groupby(['item_category_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.mean), downcast='float') 
transactions_categories['category_max_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_units'].transform(np.min), downcast='unsigned') 
transactions_categories['category_max_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_units_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_units'].transform(np.min), downcast='unsigned') 

In [29]:
transactions_categories['category_turnover'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.mean), downcast='float') 
transactions_categories['category_day_turnover'] = pd.to_numeric(transactions_categories.groupby(['item_category_id','date'])['turnover'].transform(np.sum), downcast='unsigned') 
transactions_categories['category_mean_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.mean), downcast='float') 
transactions_categories['category_max_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['item_category_id'])['category_block_turnover'].transform(np.min), downcast='unsigned') 
transactions_categories['category_max_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.max), downcast='unsigned') 
transactions_categories['category_min_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['item_category_id'])['category_day_turnover'].transform(np.min), downcast='unsigned') 

In [30]:
transactions_categories['category_mean_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.mean), downcast='float') 
transactions_categories['category_min_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.min), downcast='unsigned')
transactions_categories['category_max_price'] = pd.to_numeric(transactions_categories.groupby('item_category_id')['item_price'].transform(np.max), downcast='unsigned')

In [31]:
category_first_two_blocks_units = transactions_categories.groupby(['item_category_id','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
category_first_two_blocks_units = category_first_two_blocks_units[category_first_two_blocks_units['is_first_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_first_two_blocks_units'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_first_two_blocks_units), downcast='unsigned')

category_last_two_blocks_units = transactions_categories.groupby(['item_category_id','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
category_last_two_blocks_units = category_last_two_blocks_units[category_last_two_blocks_units['is_last_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_last_two_blocks_units'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_last_two_blocks_units), downcast='unsigned')

transactions_categories['category_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_categories['category_first_two_blocks_units'] - transactions_categories['category_last_two_blocks_units'] ) / \
                                                             transactions_categories['category_first_two_blocks_units']) * 100 * -1, downcast='float') 


category_first_two_blocks_mean_price = transactions_categories.groupby(['item_category_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
category_first_two_blocks_mean_price = category_first_two_blocks_mean_price[category_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_first_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_first_two_blocks_mean_price), downcast='unsigned')

category_last_two_blocks_mean_price = transactions_categories.groupby(['item_category_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
category_last_two_blocks_mean_price = category_last_two_blocks_mean_price[category_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('item_category_id').iloc[:,1]
transactions_categories['category_last_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['item_category_id'].map(category_last_two_blocks_mean_price), downcast='unsigned')

transactions_categories['category_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_categories['category_first_two_blocks_mean_price'] - transactions_categories['category_last_two_blocks_mean_price'] ) / \
                                                             transactions_categories['category_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [32]:
transactions_categories['video_game'] = transactions_categories["item_category_id"].isin(list(range(18,32)))
transactions_categories['gaming_old_gen'] = transactions_categories["item_category_id"].isin([10,11,15,18,19,23])
transactions_categories['gaming_new_gen'] = transactions_categories["item_category_id"].isin([12,14,16,20,22,24])
transactions_categories['pc_games'] = transactions_categories["item_category_id"].isin(list(range(27,32)))
transactions_categories['payment_cards'] = transactions_categories["item_category_id"].isin(list(range(32,37)))
transactions_categories['movies'] = transactions_categories["item_category_id"].isin(list(range(37,42)))
transactions_categories['movies_niche'] = transactions_categories["item_category_id"].isin([38,39])
transactions_categories['books'] = transactions_categories["item_category_id"].isin([42,55])
transactions_categories['music'] = transactions_categories["item_category_id"].isin(list(range(55,61)))
transactions_categories['music_CD'] = transactions_categories["item_category_id"].isin([55,56])
transactions_categories['music_vinyl'] = transactions_categories["item_category_id"].isin([58])
transactions_categories['gifts'] = transactions_categories["item_category_id"].isin(list(range(61,72)))
transactions_categories['software'] = transactions_categories["item_category_id"].isin(list(range(73,79)))

In [33]:
transactions_categories['subcategory_units'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_day_units'] = pd.to_numeric(transactions_categories.groupby(['subcategory','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_max_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_units_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_units'].transform(np.min), downcast='unsigned')
transactions_categories['subcategory_max_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_units_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_units'].transform(np.min), downcast='unsigned')

In [34]:
transactions_categories['subcategory_turnover'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_day_turnover'] = pd.to_numeric(transactions_categories.groupby(['subcategory','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_mean_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.mean), downcast='float') 
transactions_categories['subcategory_max_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_turnover_block'] = pd.to_numeric(transactions_categories_blocks.groupby(['subcategory'])['subcategory_block_turnover'].transform(np.min), downcast='unsigned')
transactions_categories['subcategory_max_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.max), downcast='unsigned')
transactions_categories['subcategory_min_turnover_day'] = pd.to_numeric(transactions_categories.groupby(['subcategory'])['subcategory_day_turnover'].transform(np.min), downcast='unsigned')

In [35]:
transactions_categories['category_share_of_total_units'] = pd.to_numeric(transactions_categories['category_units'] * 100 / total_sales , downcast='float') 
transactions_categories['category_share_of_total_turnover'] = pd.to_numeric(transactions_categories['category_turnover']* 100 / total_turnover, downcast='float') 

transactions_categories['subcategory_units'] = pd.to_numeric(transactions_categories.groupby("subcategory")['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_share_of_total_units'] = pd.to_numeric(transactions_categories['subcategory_units'] * 100 / total_sales, downcast='float') 
transactions_categories['subcategory_turnover'] = pd.to_numeric(transactions_categories.groupby("subcategory")['turnover'].transform(np.sum), downcast='unsigned')
transactions_categories['subcategory_share_of_total_turnover'] = pd.to_numeric(transactions_categories['subcategory_turnover']* 100 / total_turnover, downcast='float') 

In [36]:
subcategory_first_two_blocks_units = transactions_categories.groupby(['subcategory','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
subcategory_first_two_blocks_units = subcategory_first_two_blocks_units[subcategory_first_two_blocks_units['is_first_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_first_two_blocks_units'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_first_two_blocks_units), downcast='unsigned')

subcategory_last_two_blocks_units = transactions_categories.groupby(['subcategory','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
subcategory_last_two_blocks_units = subcategory_last_two_blocks_units[subcategory_last_two_blocks_units['is_last_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_last_two_blocks_units'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_last_two_blocks_units), downcast='unsigned')

transactions_categories['subcategory_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_categories['subcategory_first_two_blocks_units'] - transactions_categories['subcategory_last_two_blocks_units'] ) / \
                                                             transactions_categories['subcategory_first_two_blocks_units']) * 100 * -1, downcast='float') 


subcategory_first_two_blocks_mean_price = transactions_categories.groupby(['subcategory','is_first_two_blocks'], as_index=False)['item_price'].mean()
subcategory_first_two_blocks_mean_price = subcategory_first_two_blocks_mean_price[subcategory_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_first_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_first_two_blocks_mean_price), downcast='unsigned')

subcategory_last_two_blocks_mean_price = transactions_categories.groupby(['subcategory','is_last_two_blocks'], as_index=False)['item_price'].mean()
subcategory_last_two_blocks_mean_price = subcategory_last_two_blocks_mean_price[subcategory_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('subcategory').iloc[:,1]
transactions_categories['subcategory_last_two_blocks_mean_price'] = pd.to_numeric(transactions_categories['subcategory'].map(subcategory_last_two_blocks_mean_price), downcast='unsigned')

transactions_categories['subcategory_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_categories['subcategory_first_two_blocks_mean_price'] - transactions_categories['subcategory_last_two_blocks_mean_price'] ) / \
                                                             transactions_categories['subcategory_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [37]:

transactions_categories.to_pickle("pickled/transactions_categories")
transactions_categories_blocks.to_pickle("pickled/transactions_categories_blocks")

del transactions_categories
del transactions_categories_blocks
gc.collect()

147

#SHOP

-UNITS
shop_units
shop_block_units
shop_mean_units_block
shop_day_units
shop_mean_units_day
shop_max_units_block
shop_min_units_block
shop_max_units_day
shop_min_units_day

-TURNOVER
shop_turnover
shop_block_turnover
shop_mean_turnover_block
shop_day_turnover
shop_mean_turnover_day
shop_max_turnover_block
shop_min_turnover_block
shop_max_turnover_day
shop_min_turnover_day

-PRICE
shop_mean_price
shop_mean_price_block


-TREND
shop_first_two_blocks_units
shop_last_two_blocks_units
shop_fluctuation_units_first_last_blocks
shop_first_two_blocks_mean_price
shop_last_two_blocks_mean_price
shop_fluctuation_price_first_last_blocks

-ENCODINGS
shop_share_of_total_units
shop_share_of_total_gross

-MISC
shop_ids_TC
shop_ids_TRK
shop_ids_SEC
shop_ids_shopping_center
shop_ids_moscow

-CATEGORY
shop_top_category_units
shop_top_category_turnover
shop_top_subcategory_units
shop_top_subcategory_turnover

In [38]:
gc.collect()
transactions_shops = transactions.copy()
transactions_shops_blocks = transactions.copy()

In [39]:
shop_areas = {}
for i in range(0,2):
    shop_areas[i] = "Yakutsk"
shop_areas[2] = "Adygea"
shop_areas[3] = "Balashikha"
shop_areas[4] = "Volga"
shop_areas[5] = "Vologda"
for i in range(6,9):
    shop_areas[i] = "Voronezh"
shop_areas[9] = "Outbound Trading"
for i in range(10,12):
    shop_areas[i] = "Zhukovsky"
shop_areas[12] = "Online store emergency"
for i in range(13,15):
    shop_areas[i] = "Kazan"
shop_areas[15] = "Kaluga"
shop_areas[16] = "Kolomna"
for i in range(17,19):
    shop_areas[i] = "Krasnoyarsk"
shop_areas[19] = "Kursk"
for i in range(20,33):
    shop_areas[i] = "Moscow"
shop_areas[33] = "Mytishchi"
for i in range(34,36):
    shop_areas[i] = "N.Novgorod"
for i in range(36,38):
    shop_areas[i] = "Novosibirsk"
shop_areas[38] = "Omsk"
for i in range(39,42):
    shop_areas[i] = "RostovNaDonu"
for i in range(42,44):
    shop_areas[i] = "St. Petersburg"
for i in range(44,46):
    shop_areas[i] = "Samara"
shop_areas[46] = "Sergiev Posad"
shop_areas[47] = "Surgut"
shop_areas[48] = "Tomsk"
for i in range(49,52):
    shop_areas[i] = "Tyumen TC"
for i in range(52,54):
    shop_areas[i] = "Ufa"
shop_areas[54] = "Khimki"
shop_areas[55] = "Digital warehouse"
shop_areas[56] = "Chekhov"
for i in range(57,59):
    shop_areas[i] = "Yakutsk"
shop_areas[59] = "Yaroslavl"

transactions_shops['area'] = transactions_shops['shop_id'].apply(lambda x: shop_areas[x]).astype('category')

transactions_shops_blocks['area'] = transactions_shops['shop_id'].apply(lambda x: shop_areas[x]).astype('category')



In [40]:
transactions_shops_blocks['shop_block_units'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['shop_block_turnover'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['shop_mean_price_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 
transactions_shops_blocks['area_block_units'] = pd.to_numeric(transactions_shops_blocks.groupby(['area','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['area_block_turnover'] = pd.to_numeric(transactions_shops_blocks.groupby(['area','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_blocks['area_mean_price_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 


In [41]:
transactions_shops['shop_units'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.mean), downcast='float') 
transactions_shops['shop_day_units'] = pd.to_numeric(transactions_shops.groupby(['shop_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.mean), downcast='float') 
transactions_shops['shop_max_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_units'].transform(np.min), downcast='unsigned')
transactions_shops['shop_max_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_units_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_units'].transform(np.min), downcast='unsigned')

In [42]:
transactions_shops['shop_turnover'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops['shop_day_turnover'] = pd.to_numeric(transactions_shops.groupby(['shop_id','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['shop_mean_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops['shop_max_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['shop_id'])['shop_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops['shop_max_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['shop_min_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['shop_id'])['shop_day_turnover'].transform(np.min), downcast='unsigned')

In [43]:
transactions_shops['shop_mean_price'] = pd.to_numeric(transactions_shops.groupby('shop_id')['item_price'].transform(np.mean), downcast='float') 


In [44]:
shop_first_two_blocks_units = transactions_shops.groupby(['shop_id','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
shop_first_two_blocks_units = shop_first_two_blocks_units[shop_first_two_blocks_units['is_first_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_first_two_blocks_units'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_first_two_blocks_units), downcast='unsigned')

shop_last_two_blocks_units = transactions_shops.groupby(['shop_id','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
shop_last_two_blocks_units = shop_last_two_blocks_units[shop_last_two_blocks_units['is_last_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_last_two_blocks_units'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_last_two_blocks_units), downcast='unsigned')

transactions_shops['shop_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_shops['shop_first_two_blocks_units'] - transactions_shops['shop_last_two_blocks_units'] ) / \
                                                             transactions_shops['shop_first_two_blocks_units']) * 100 * -1, downcast='float') 


shop_first_two_blocks_mean_price = transactions_shops.groupby(['shop_id','is_first_two_blocks'], as_index=False)['item_price'].mean()
shop_first_two_blocks_mean_price = shop_first_two_blocks_mean_price[shop_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_first_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_first_two_blocks_mean_price), downcast='unsigned')

shop_last_two_blocks_mean_price = transactions_shops.groupby(['shop_id','is_last_two_blocks'], as_index=False)['item_price'].mean()
shop_last_two_blocks_mean_price = shop_last_two_blocks_mean_price[shop_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('shop_id').iloc[:,1]
transactions_shops['shop_last_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['shop_id'].map(shop_last_two_blocks_mean_price), downcast='unsigned')

transactions_shops['shop_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_shops['shop_first_two_blocks_mean_price'] - transactions_shops['shop_last_two_blocks_mean_price'] ) / \
                                                             transactions_shops['shop_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [45]:
transactions_shops['shop_share_of_units'] = pd.to_numeric(transactions_shops['shop_units'] * 100 / total_sales, downcast='float') 
transactions_shops['shop_share_of_turnover'] = pd.to_numeric(transactions_shops['shop_turnover'] * 100 / total_turnover, downcast='float') 

In [46]:
shop_ids_TC = [1,2,13,14,16,23,24,26,28,31,37,38,42,43,44,46,50,54,58]
shop_ids_TRK = [3,33,39,40]
shop_ids_SEC = [7,34,36,47,48,49,56]
shop_ids_shopping_center = [4,5,8,15,17,18,19,27,29,30,32,41,45,51,53,59]
shop_ids_moscow = list(range(20,33))


transactions_shops['shop_TC'] = transactions_shops['shop_id'].isin(shop_ids_TC)
transactions_shops['shop_TRK'] = transactions_shops['shop_id'].isin(shop_ids_TRK)
transactions_shops['shop_SEC'] = transactions_shops['shop_id'].isin(shop_ids_SEC)
transactions_shops['shop_shopping_center'] = transactions_shops['shop_id'].isin(shop_ids_shopping_center)
transactions_shops['shop_moscow'] = transactions_shops['shop_id'].isin(shop_ids_moscow)

In [47]:
a = transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['item_cnt_day'].sum()

b = pd.DataFrame(transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['item_cnt_day'].sum()\
                  .groupby(['shop_id'])['item_cnt_day'].max()).reset_index()

c = b.merge(a, on=['shop_id', 'item_cnt_day'],how='left')[['shop_id', 'item_category_id']].rename(columns={'item_category_id':'max_category_units'})

transactions_shops = transactions_shops.merge(c, on=['shop_id'], how='left')

transactions_shops['max_category_units'] = pd.to_numeric(transactions_shops['max_category_units'], downcast='unsigned')

In [48]:
a = transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['turnover'].sum()

b = pd.DataFrame(transactions_shops.groupby(['shop_id', 'item_category_id'],as_index=False)['turnover'].sum()\
                  .groupby(['shop_id'])['turnover'].max()).reset_index()

c = b.merge(a, on=['shop_id', 'turnover'],how='left')[['shop_id', 'item_category_id']].rename(columns={'item_category_id':'max_category_turnover'})

transactions_shops = transactions_shops.merge(c, on=['shop_id'], how='left')
transactions_shops['max_category_turnover'] = pd.to_numeric(transactions_shops['max_category_turnover'], downcast='unsigned')

-AREA
area



-UNITS
area_units
area_block_units
area_mean_units_block
area_day_units
area_mean_units_day
area_max_units_block
area_min_units_block
area_max_units_day
area_min_units_day

-TURNOVER
area_turnover
area_block_turnover
area_mean_turnover_block
area_day_turnover
area_mean_turnover_day
area_max_turnover_block
area_min_turnover_block
area_max_turnover_day
area_min_turnover_day

-PRICE
area_mean_price
area_mean_price_block


-TREND
area_first_two_blocks_units
area_last_two_blocks_units
area_fluctuation_units_first_last_blocks
area_first_two_blocks_mean_price
area_last_two_blocks_mean_price
area_fluctuation_price_first_last_blocks

-ENCODINGS
area_share_of_total_units
area_share_of_total_gross

In [49]:
transactions_shops['area_units'] = pd.to_numeric(transactions_shops.groupby(['area'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.mean), downcast='float') 
transactions_shops['area_day_units'] = pd.to_numeric(transactions_shops.groupby(['area','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.mean), downcast='float') 
transactions_shops['area_max_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_units_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_units'].transform(np.min), downcast='unsigned')
transactions_shops['area_max_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_units_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_units'].transform(np.min), downcast='unsigned')

In [50]:
transactions_shops['area_turnover'] = pd.to_numeric(transactions_shops.groupby(['area'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops['area_day_turnover'] = pd.to_numeric(transactions_shops.groupby(['area','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops['area_mean_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops['area_max_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_turnover_block'] = pd.to_numeric(transactions_shops_blocks.groupby(['area'])['area_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops['area_max_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops['area_min_turnover_day'] = pd.to_numeric(transactions_shops.groupby(['area'])['area_day_turnover'].transform(np.min), downcast='unsigned')

In [51]:
transactions_shops['area_mean_price'] = pd.to_numeric(transactions_shops.groupby('area')['item_price'].transform(np.mean), downcast='float') 


In [52]:
area_first_two_blocks_units = transactions_shops.groupby(['area','is_first_two_blocks'], as_index=False)['item_cnt_day'].sum()
area_first_two_blocks_units = area_first_two_blocks_units[area_first_two_blocks_units['is_first_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_first_two_blocks_units'] = pd.to_numeric(transactions_shops['area'].map(area_first_two_blocks_units), downcast='unsigned')

area_last_two_blocks_units = transactions_shops.groupby(['area','is_last_two_blocks'], as_index=False)['item_cnt_day'].sum()
area_last_two_blocks_units = area_last_two_blocks_units[area_last_two_blocks_units['is_last_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_last_two_blocks_units'] = pd.to_numeric(transactions_shops['area'].map(area_last_two_blocks_units), downcast='unsigned')

transactions_shops['area_fluctuation_units_first_last_blocks'] =  pd.to_numeric(((transactions_shops['area_first_two_blocks_units'] - transactions_shops['area_last_two_blocks_units'] ) / \
                                                             transactions_shops['area_first_two_blocks_units']) * 100 * -1, downcast='float') 


area_first_two_blocks_mean_price = transactions_shops.groupby(['area','is_first_two_blocks'], as_index=False)['item_price'].mean()
area_first_two_blocks_mean_price = area_first_two_blocks_mean_price[area_first_two_blocks_mean_price['is_first_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_first_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['area'].map(area_first_two_blocks_mean_price), downcast='unsigned')

area_last_two_blocks_mean_price = transactions_shops.groupby(['area','is_last_two_blocks'], as_index=False)['item_price'].mean()
area_last_two_blocks_mean_price = area_last_two_blocks_mean_price[area_last_two_blocks_mean_price['is_last_two_blocks'] == True].set_index('area').iloc[:,1]
transactions_shops['area_last_two_blocks_mean_price'] = pd.to_numeric(transactions_shops['area'].map(area_last_two_blocks_mean_price), downcast='unsigned')

transactions_shops['area_fluctuation_price_first_last_blocks'] =  pd.to_numeric(((transactions_shops['area_first_two_blocks_mean_price'] - transactions_shops['area_last_two_blocks_mean_price'] ) / \
                                                             transactions_shops['area_first_two_blocks_mean_price'])  * 100 * -1, downcast='float') 

In [53]:

transactions_shops.to_pickle("pickled/transactions_shops")
transactions_shops_blocks.to_pickle("pickled/transactions_shops_blocks")


del transactions_shops
del transactions_shops_blocks
gc.collect()

189

shop_category


-UNITS
shop_category_units
shop_category_block_units
shop_category_mean_units_block
shop_category_day_units
shop_category_mean_units_day
shop_category_max_units_block
shop_category_min_units_block
shop_category_max_units_day
shop_category_min_units_day

-TURNOVER
shop_category_turnover
shop_category_block_turnover
shop_category_mean_turnover_block
shop_category_day_turnover
shop_category_mean_turnover_day
shop_category_max_turnover_block
shop_category_min_turnover_block
shop_category_max_turnover_day
shop_category_min_turnover_day

-PRICE
shop_category_mean_price
shop_category_mean_price_block


-TREND
shop_category_first_two_blocks_units
shop_category_last_two_blocks_units
shop_category_fluctuation_units_first_last_blocks
shop_category_first_two_blocks_mean_price
shop_category_last_two_blocks_mean_price
shop_category_fluctuation_price_first_last_blocks

-ENCODINGS
shop_category_share_of_total_units
shop_category_share_of_total_gross

In [54]:
gc.collect()
transactions_shops_categories = transactions.copy()
transactions_shops_categories_blocks = transactions.copy()

In [55]:
transactions_shops_categories_blocks['shop_category_block_units'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id','date_block_num'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories_blocks['shop_category_block_turnover'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id','date_block_num'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories_blocks['shop_category_mean_price_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id', 'date_block_num'])['item_price'].transform(np.mean), downcast='float') 

In [56]:
transactions_shops_categories['shop_category_units'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_day_units'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id','date'])['item_cnt_day'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_max_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_units_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_units'].transform(np.min), downcast='unsigned')
transactions_shops_categories['shop_category_max_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_units_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_units'].transform(np.min), downcast='unsigned')


In [57]:
transactions_shops_categories['shop_category_turnover'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_day_turnover'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id','date'])['turnover'].transform(np.sum), downcast='unsigned')
transactions_shops_categories['shop_category_mean_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.mean), downcast='float') 
transactions_shops_categories['shop_category_max_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_turnover_block'] = pd.to_numeric(transactions_shops_categories_blocks.groupby(['shop_id','item_category_id'])['shop_category_block_turnover'].transform(np.min), downcast='unsigned')
transactions_shops_categories['shop_category_max_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.max), downcast='unsigned')
transactions_shops_categories['shop_category_min_turnover_day'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['shop_category_day_turnover'].transform(np.min), downcast='unsigned')

In [58]:
transactions_shops_categories['shop_category_mean_price'] = pd.to_numeric(transactions_shops_categories.groupby(['shop_id','item_category_id'])['item_price'].transform(np.mean), downcast='float') 


In [59]:

transactions_shops_categories.to_pickle("pickled/transactions_shops_categories")
transactions_shops_categories_blocks.to_pickle("pickled/transactions_shops_categories_blocks")


del transactions_shops_categories
del transactions_shops_categories_blocks
gc.collect()

245

In [60]:
###
#DEBUG
###


pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
#transactions.sample(10).sort_values(by=['item_units'], ascending=False)
#transactions[transactions['item_category_id'] == 58].sample(10).sort_values(by=['total_sales_units'], ascending=False)

In [61]:
train_item_ids = transactions['item_id'].unique()
train_shop_ids = transactions['shop_id'].unique()
test_item_ids = test['item_id'].unique()
test_shop_ids = test['shop_id'].unique()
train_blocks = transactions['date_block_num'].unique()

all_item_ids = np.unique(np.append(test_item_ids,train_item_ids))
all_shop_ids = np.unique(np.append(train_shop_ids,test_shop_ids))

In [62]:
combinations = []
for shop in all_shop_ids:
    #get all article ids ever associated to this shop
    train_ids = transactions[transactions['shop_id'] == shop]['item_id'].unique()
    test_ids = test[test['shop_id'] == shop]['item_id'].unique()
    all_shop = np.unique(np.append(train_ids, test_ids))
    all_shop_combo = [[item, shop, block] for item in all_shop for block in train_blocks]
    for combo in all_shop_combo:
        combinations.append(combo)

In [63]:
all_combos = pd.DataFrame(np.unique(np.vstack([combinations]), axis=0), columns=['item_id', 'shop_id', 'date_block_num'])

In [64]:
all_combos.head()

Unnamed: 0,item_id,shop_id,date_block_num
0,0.0,54.0,12.0
1,0.0,54.0,13.0
2,0.0,54.0,14.0
3,0.0,54.0,15.0
4,0.0,54.0,16.0


In [65]:
all_combos['item_id'] = pd.to_numeric(all_combos['item_id'], downcast='unsigned')
all_combos['shop_id'] = pd.to_numeric(all_combos['shop_id'], downcast='unsigned')
all_combos['date_block_num'] = pd.to_numeric(all_combos['date_block_num'], downcast='unsigned')

In [66]:
len(all_combos)

8333930

In [67]:
all_combos = pd.merge(all_combos, items[['item_id', 'item_category_id']], on='item_id', how='left')

In [68]:
dates = transactions[['date_block_num', 'month', 'year']].drop_duplicates(['date_block_num', 'month', 'year'])

dates_dict = {}

for index,row in dates.iterrows():
    dates_dict[row['date_block_num']] = {"month": row['month'], "year": row['year']}
    
dates_dict

{20: {'month': 9, 'year': 2014},
 15: {'month': 4, 'year': 2014},
 18: {'month': 7, 'year': 2014},
 19: {'month': 8, 'year': 2014},
 21: {'month': 10, 'year': 2014},
 22: {'month': 11, 'year': 2014},
 23: {'month': 12, 'year': 2014},
 24: {'month': 1, 'year': 2015},
 27: {'month': 4, 'year': 2015},
 25: {'month': 2, 'year': 2015},
 12: {'month': 1, 'year': 2014},
 14: {'month': 3, 'year': 2014},
 16: {'month': 5, 'year': 2014},
 17: {'month': 6, 'year': 2014},
 13: {'month': 2, 'year': 2014},
 26: {'month': 3, 'year': 2015},
 28: {'month': 5, 'year': 2015},
 29: {'month': 6, 'year': 2015},
 30: {'month': 7, 'year': 2015},
 31: {'month': 8, 'year': 2015},
 32: {'month': 9, 'year': 2015},
 33: {'month': 10, 'year': 2015}}

In [69]:
all_combos['month'] = pd.to_numeric(all_combos['date_block_num'].apply(lambda block: dates_dict[block]['month']), downcast='unsigned')
all_combos['year'] = pd.to_numeric(all_combos['date_block_num'].apply(lambda block: dates_dict[block]['year']), downcast='unsigned')

In [70]:
def downcast(df, columns, dtypes):
    for column in columns:
        dtype = dtypes[column].kind
        if dtype in ['O', 'b']:
            continue  
        if dtype == 'u':
            df[column] = pd.to_numeric(df[column].astype(int), downcast='unsigned')
        elif dtype == 'i':
            df[column] = pd.to_numeric(df[column].astype(int), downcast='signed')
        else:
            df[column] = pd.to_numeric(df[column], downcast='float')

In [71]:
def fillnas(df, columns, dtypes):
    for column in columns:
        dtype = dtypes[column].kind
        if dtype in ['O', 'b']:
            continue  
        df[column].fillna(0, inplace=True)

In [72]:
#del training

In [73]:
transactions_items_columns = ['item_id', 'item_first_block',
       'item_last_block', 'is_first_two_blocks', 'is_last_two_blocks',
       'item_units', 'item_mean_units_block', 'item_day_units',
       'item_mean_units_day', 'item_max_units_block',
       'item_min_units_block', 'item_max_units_day', 'item_min_units_day',
       'item_turnover', 'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       'item_blocks_between_start_and_first_activity', 'item_first_day',
       'item_last_day', 'item_activity_on_all_blocks', 'item_mean_price',
       'item_min_price', 'item_max_price', 'item_number_different_prices',
       'item_price_amplitude', 
       'item_deviation_mean_category_price',
       'item_first_two_blocks_units', 'item_last_two_blocks_units',
       'item_fluctuation_units_first_last_blocks',
       'item_first_two_blocks_mean_price',
       'item_last_two_blocks_mean_price',
       'item_fluctuation_price_first_last_blocks',
       'item_share_of_total_units', 'item_share_of_total_turnover']

In [74]:
transactions_items = pd.read_pickle("pickled/transactions_items")
transactions_items_dtypes = transactions_items.dtypes
training = pd.merge(all_combos, transactions_items[transactions_items_columns].drop_duplicates('item_id'), on=['item_id'], how='left', copy=False)

del transactions_items
fillnas(training, transactions_items_columns, transactions_items_dtypes)
downcast(training, transactions_items_columns, transactions_items_dtypes)
gc.collect()

42

In [75]:
transactions_items_blocks_columns =  [ 'date_block_num', 'item_id', 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block']

In [76]:
transactions_items_blocks = pd.read_pickle("pickled/transactions_items_blocks")
transactions_items_blocks_dtypes = transactions_items_blocks.dtypes

training = pd.merge(training, transactions_items_blocks[transactions_items_blocks_columns]\
                    .drop_duplicates(['item_id', 'date_block_num']), on=['item_id','date_block_num'], how='left', copy=False)

del transactions_items_blocks
training.fillna(0, inplace=True)
downcast(training, transactions_items_blocks_columns, transactions_items_blocks_dtypes)
gc.collect()

14

In [77]:
gc.collect()
training.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8333930 entries, 0 to 8333929
Data columns (total 56 columns):
item_id                                          int16
shop_id                                          uint8
date_block_num                                   uint8
item_category_id                                 uint8
month                                            uint8
year                                             uint16
item_first_block                                 uint8
item_last_block                                  uint8
is_first_two_blocks                              object
is_last_two_blocks                               object
item_units                                       float32
item_mean_units_block                            float32
item_day_units                                   int16
item_mean_units_day                              float32
item_max_units_block                             int16
item_min_units_block                             int1

In [78]:
transactions_categories_columns = [
       'item_category_id',
       'category_units', 'category_mean_units_block',
       'category_day_units', 'category_mean_units_day',
       'category_max_units_block', 'category_min_units_block',
       'category_max_units_day', 'category_min_units_day',
       'category_turnover', 'category_mean_turnover_block',
       'category_day_turnover', 'category_mean_turnover_day',
       'category_max_turnover_block', 'category_min_turnover_block',
       'category_max_turnover_day', 'category_min_turnover_day',
       'category_mean_price', 'category_min_price', 'category_max_price',
       'category_first_two_blocks_units',
       'category_last_two_blocks_units',
       'category_fluctuation_units_first_last_blocks',
       'category_first_two_blocks_mean_price',
       'category_last_two_blocks_mean_price',
       'category_fluctuation_price_first_last_blocks', 'subcategory',
       'video_game', 'gaming_old_gen', 'gaming_new_gen', 'pc_games',
       'payment_cards', 'movies', 'movies_niche', 'books', 'music',
       'music_CD', 'music_vinyl', 'gifts', 'software',
       'subcategory_units',
       'subcategory_mean_units_block', 'subcategory_day_units',
       'subcategory_mean_units_day', 'subcategory_max_units_block',
       'subcategory_min_units_block', 'subcategory_max_units_day',
       'subcategory_min_units_day', 'subcategory_turnover', 'subcategory_mean_turnover_block',
       'subcategory_day_turnover', 'subcategory_mean_turnover_day',
       'subcategory_max_turnover_block', 'subcategory_min_turnover_block',
       'subcategory_max_turnover_day', 'subcategory_min_turnover_day',
       'category_share_of_total_units',
       'category_share_of_total_turnover',
       'subcategory_share_of_total_units',
       'subcategory_share_of_total_turnover',
       'subcategory_first_two_blocks_units',
       'subcategory_last_two_blocks_units',
       'subcategory_fluctuation_units_first_last_blocks',
       'subcategory_first_two_blocks_mean_price',
       'subcategory_last_two_blocks_mean_price',
       'subcategory_fluctuation_price_first_last_blocks']


In [79]:
training.columns.values

array(['item_id', 'shop_id', 'date_block_num', 'item_category_id',
       'month', 'year', 'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_day_units', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       '

In [80]:
transactions_categories = pd.read_pickle("pickled/transactions_categories")
transactions_categories_dtypes = transactions_categories.dtypes
training = pd.merge(training, transactions_categories[transactions_categories_columns]\
                    .drop_duplicates('item_category_id'), on=['item_category_id'], how='left', copy=False)

del transactions_categories
fillnas(training, transactions_categories_columns, transactions_categories_dtypes)
downcast(training, transactions_categories_columns, transactions_categories_dtypes)
gc.collect()

21

In [81]:
transactions_categories_blocks_columns = ['item_category_id', 'date_block_num', 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block', 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block']

In [82]:
transactions_categories_blocks = pd.read_pickle("pickled/transactions_categories_blocks")
transactions_categories_blocks_dtypes = transactions_categories_blocks.dtypes
training = pd.merge(training, transactions_categories_blocks[transactions_categories_blocks_columns]\
                    .drop_duplicates(['item_category_id', 'date_block_num']), on=['item_category_id', 'date_block_num'], how='left', copy=False)

del transactions_categories_blocks
fillnas(training, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
downcast(training, transactions_categories_blocks_columns, transactions_categories_blocks_dtypes)
gc.collect()

14

In [83]:
transactions_shops_columns = ['shop_id', 
       'shop_units', 'shop_mean_units_block', 'shop_day_units',
       'shop_mean_units_day', 'shop_max_units_block',
       'shop_min_units_block', 'shop_max_units_day', 'shop_min_units_day',
       'shop_turnover', 'shop_mean_turnover_block', 'shop_day_turnover',
       'shop_mean_turnover_day', 'shop_max_turnover_block',
       'shop_min_turnover_block', 'shop_max_turnover_day',
       'shop_min_turnover_day', 'shop_mean_price',
       'shop_first_two_blocks_units', 'shop_last_two_blocks_units',
       'shop_fluctuation_units_first_last_blocks',
       'shop_first_two_blocks_mean_price',
       'shop_last_two_blocks_mean_price',
       'shop_fluctuation_price_first_last_blocks', 'shop_share_of_units',
       'shop_share_of_turnover', 'shop_TC', 'shop_TRK', 'shop_SEC',
       'shop_shopping_center', 'shop_moscow', 'max_category_units',
       'max_category_turnover', 'area', 'area_units',
       'area_mean_units_block', 'area_day_units', 'area_mean_units_day',
       'area_max_units_block', 'area_min_units_block',
       'area_max_units_day', 'area_min_units_day', 'area_turnover',
     'area_mean_turnover_block',
       'area_day_turnover', 'area_mean_turnover_day',
       'area_max_turnover_block', 'area_min_turnover_block',
       'area_max_turnover_day', 'area_min_turnover_day',
       'area_mean_price',
       'area_first_two_blocks_units', 'area_last_two_blocks_units',
       'area_fluctuation_units_first_last_blocks',
       'area_first_two_blocks_mean_price',
       'area_last_two_blocks_mean_price',
       'area_fluctuation_price_first_last_blocks']

In [84]:
transactions_shops = pd.read_pickle("pickled/transactions_shops")
transactions_shops_dtypes = transactions_shops.dtypes
training = pd.merge(training, transactions_shops[transactions_shops_columns]\
                    .drop_duplicates('shop_id'), on=['shop_id'], how='left', copy=False)

del transactions_shops
fillnas(training, transactions_shops_columns, transactions_shops_dtypes)
downcast(training, transactions_shops_columns, transactions_shops_dtypes)
gc.collect()

14

In [85]:
transactions_shops_blocks_columns = ['shop_id', 'date_block_num',  'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block', 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block']

In [86]:
transactions_shops_blocks = pd.read_pickle("pickled/transactions_shops_blocks")
transactions_shops_blocks_dtypes = transactions_shops_blocks.dtypes
training = pd.merge(training, transactions_shops_blocks[transactions_shops_blocks_columns]\
                    .drop_duplicates(['shop_id', 'date_block_num']), on=['shop_id', 'date_block_num'], how='left', copy=False)

del transactions_shops_blocks
fillnas(training, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
downcast(training, transactions_shops_blocks_columns, transactions_shops_blocks_dtypes)
gc.collect()

14

In [87]:
transactions_shops_categories_columns = [ 'shop_id', 
       'item_category_id',
       'shop_category_units', 'shop_category_mean_units_block',
       'shop_category_day_units', 'shop_category_mean_units_day',
       'shop_category_max_units_block', 'shop_category_min_units_block',
       'shop_category_max_units_day', 'shop_category_min_units_day',
       'shop_category_turnover', 'shop_category_mean_turnover_block',
       'shop_category_day_turnover', 'shop_category_mean_turnover_day',
       'shop_category_max_turnover_block',
       'shop_category_min_turnover_block',
       'shop_category_max_turnover_day', 'shop_category_min_turnover_day',
       'shop_category_mean_price']

In [88]:
transactions_shops_categories = pd.read_pickle("pickled/transactions_shops_categories")
transactions_shops_categories_dtypes = transactions_shops_categories.dtypes
training = pd.merge(training, transactions_shops_categories[transactions_shops_categories_columns]\
                    .drop_duplicates(['shop_id','item_category_id']), on=['shop_id','item_category_id'], how='left', copy=False)

del transactions_shops_categories
fillnas(training, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
downcast(training, transactions_shops_categories_columns, transactions_shops_categories_dtypes)
gc.collect()

14

In [89]:
transactions_shops_categories_blocks_columns = ['shop_id', 'item_category_id', 'date_block_num',   'shop_category_block_units',
 'shop_category_block_turnover',
 'shop_category_mean_price_block']

In [90]:
transactions_shops_categories_blocks = pd.read_pickle("pickled/transactions_shops_categories_blocks")
transactions_shops_categories_blocks_dtypes = transactions_shops_categories_blocks.dtypes
training = pd.merge(training, transactions_shops_categories_blocks[transactions_shops_categories_blocks_columns]\
                    .drop_duplicates(['shop_id','item_category_id','date_block_num']),\
                    on=['shop_id','item_category_id','date_block_num'], how='left', copy=False)

del transactions_shops_categories_blocks
fillnas(training, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
downcast(training, transactions_shops_categories_blocks_columns, transactions_shops_categories_blocks_dtypes)
gc.collect()

14

In [91]:
len(training)

8333930

In [118]:

#training.to_pickle("pickled/training_pre_lags")
training = pd.read_pickle("pickled/training_pre_lags")

In [119]:
lag_columns = [
 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block',
 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block',
 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block',
 'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block',
 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block',
 'shop_category_block_units',
 'shop_category_turnover',
 'shop_category_mean_price_block'
 ]

In [8]:
def downcast_lags(df, lagged_names):
    for lagged_name in lagged_names:
        df[lagged_name].fillna(0,inplace=True)    
    for column in lagged_names:
        if "mean" in column:
            df[column] = pd.to_numeric(df[column], downcast='float')
        else:
            df[column] = pd.to_numeric(df[column].astype(int), downcast='unsigned')
    return df

In [9]:
lags = [1,2,3]


def add_lag_features(df, lag_columns, idx_columns):

    gc.collect()
    def lagged_name(lag_column, lag):
        return "%s_lag_%d" % (lag_column, lag)

    merge_columns = ['lagged_block'] + idx_columns

    for lag in lags:
        print(lag)
        lagged = df[['date_block_num'] + idx_columns + lag_columns].copy()
        lagged.rename(columns={'date_block_num':'lagged_block'},inplace=True)
        df['lagged_block'] = df['date_block_num'] - lag
        lagged_names = [lagged_name(c,lag) for c in lag_columns]
        lag_mapping = dict(zip(lag_columns, lagged_names))
        lagged.rename(columns=lag_mapping,inplace=True)
        df = pd.merge(df.set_index(merge_columns),lagged[lagged_names+merge_columns]\
                        .drop_duplicates(lagged_names+merge_columns)\
                        .set_index(merge_columns)\
                        ,on=merge_columns,how='left',copy=False)
        gc.collect()
        df.reset_index(inplace=True)
    
        df = downcast_lags(df, lagged_names)
        del lagged
        gc.collect()
        
    return df

In [122]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'item_block_units',
 'item_block_turnover',
 'item_mean_price_block',
 ]

idx_columns = ['item_id']

training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [127]:
training.columns.values

array(['lagged_block', 'item_category_id', 'item_id', 'shop_id',
       'date_block_num', 'month', 'year', 'item_first_block',
       'item_last_block', 'is_first_two_blocks', 'is_last_two_blocks',
       'item_units', 'item_mean_units_block', 'item_day_units',
       'item_mean_units_day', 'item_max_units_block',
       'item_min_units_block', 'item_max_units_day', 'item_min_units_day',
       'item_turnover', 'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_act

In [124]:

pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
training[(training['item_id'] == 30) & (training['shop_id'] == 30)]\
                .drop_duplicates(['item_id', 'date_block_num'])[['item_id','shop_id','date_block_num','item_block_units','item_block_turnover',\
                'item_block_units_lag_1',
 'item_block_turnover_lag_1', 'item_mean_price_block_lag_1',
 'item_block_units_lag_2', 'item_block_turnover_lag_2',
 'item_mean_price_block_lag_2', 'item_block_units_lag_3',
 'item_block_turnover_lag_3' ,'item_mean_price_block_lag_3']]

Unnamed: 0,item_id,shop_id,date_block_num,item_block_units,item_block_turnover,item_block_units_lag_1,item_block_turnover_lag_1,item_mean_price_block_lag_1,item_block_units_lag_2,item_block_turnover_lag_2,item_mean_price_block_lag_2,item_block_units_lag_3,item_block_turnover_lag_3,item_mean_price_block_lag_3
1210,30,30,12,58,9802,0,0,0.0,0,0,0.0,0,0,0.0
1211,30,30,13,24,3986,58,9802,169.0,0,0,0.0,0,0,0.0
1212,30,30,14,31,5239,24,3986,166.083328,58,9802,169.0,0,0,0.0
1213,30,30,15,21,3479,31,5239,169.0,24,3986,166.083328,58,9802,169.0
1214,30,30,16,16,2634,21,3479,165.666672,31,5239,169.0,24,3986,166.083328
1215,30,30,17,13,2197,16,2634,164.625,21,3479,165.666672,31,5239,169.0
1216,30,30,18,13,2127,13,2197,169.0,16,2634,164.625,21,3479,165.666672
1217,30,30,19,12,2028,13,2127,163.615387,13,2197,169.0,16,2634,164.625
1218,30,30,20,11,1859,12,2028,169.0,13,2127,163.615387,13,2197,169.0
1219,30,30,21,13,2197,11,1859,169.0,12,2028,169.0,13,2127,163.615387


In [125]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'category_block_units',
 'category_block_turnover',
 'category_mean_price_block',
 'subcategory_block_units',
 'subcategory_block_turnover',
 'subcategory_mean_price_block',
 ]

idx_columns = ['item_category_id']

training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [10]:
gc.collect()
#training.to_pickle("pickled/training_mid_lags")
training = pd.read_pickle("pickled/training_mid_lags")

In [11]:
gc.collect()
def lagged_name(lag_column, lag):
    return "%s_lag_%d" % (lag_column, lag)

lag_columns = [
 'shop_block_units',
 'shop_block_turnover',
 'shop_mean_price_block',
 'area_block_units',
 'area_block_turnover',
 'area_mean_price_block'
 ]

idx_columns = ['shop_id']


training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [12]:
lag_columns = [
  'shop_category_block_units',
 'shop_category_turnover',
 'shop_category_mean_price_block'
 ]

idx_columns = ['shop_id','item_category_id']


training = add_lag_features(training,lag_columns,idx_columns)

1
2
3


In [13]:
training.drop(columns=['lagged_block'],inplace=True)

In [14]:
training.columns.values

array(['shop_id', 'item_category_id', 'item_id', 'date_block_num',
       'month', 'year', 'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_day_units', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       '

In [16]:
gc.collect()
training.to_pickle("pickled/training_post_lags")

In [17]:
gc.collect()
training.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8333930 entries, 0 to 8333929
Columns: 263 entries, shop_id to shop_category_mean_price_block_lag_3
dtypes: bool(5), category(2), float32(96), int16(22), int32(53), int64(1), int8(2), object(16), uint16(31), uint32(23), uint64(2), uint8(10)
memory usage: 10.5 GB


In [18]:
training.dtypes

shop_id                                  uint64
item_category_id                         uint64
item_id                                   int64
date_block_num                            uint8
month                                     uint8
year                                     uint16
item_first_block                          uint8
item_last_block                           uint8
is_first_two_blocks                      object
is_last_two_blocks                       object
item_units                              float32
item_mean_units_block                   float32
item_day_units                            int16
item_mean_units_day                     float32
item_max_units_block                      int16
item_min_units_block                      int16
item_max_units_day                        int16
item_min_units_day                         int8
item_turnover                             int32
item_mean_turnover_block                float32
item_day_turnover                       

In [19]:
cols = ['shop_id','item_id', 'date_block_num']
training = pd.merge(training.set_index(cols), transactions[cols+['y']].drop_duplicates(cols).set_index(cols), on=cols, how='left', copy=False)

transactions.reset_index(inplace=True)
training.reset_index(inplace=True)

In [20]:
training['y'] = training['y'].fillna(0)

In [3]:
#training.to_pickle("pickled/training_pre_catboost")
training = pd.read_pickle("pickled/training")

In [23]:
gc.collect()

189

In [34]:
pd.set_option('display.max_rows', 300)
for col in training.columns:
    if training.dtypes[col].kind == 'b':
        print(col, training.dtypes[col])

shop_TC bool
shop_TRK bool
shop_SEC bool
shop_shopping_center bool
shop_moscow bool
1 bool
2 bool
3 bool
4 bool
5 bool
6 bool
7 bool
8 bool
9 bool
10 bool
11 bool
12 bool


In [None]:
for m in range(1,13):
    training[str(m)] = training['month'] == m

In [5]:
x_train = training[training['date_block_num'] < 33]
y_train = x_train['y']
#x_train = x_train.drop(columns=['y'])

x_val = training[training['date_block_num'] == 33]
y_val = x_val['y']
#x_val = x_val.drop(columns=['y'])


In [6]:
del training
gc.collect()

95

In [7]:
x_train.head()

Unnamed: 0,shop_id,item_id,date_block_num,item_category_id,month,year,item_first_block,item_last_block,is_first_two_blocks,is_last_two_blocks,...,3,4,5,6,7,8,9,10,11,12
0,54,0,12,40,1,2014,20,20,0.0,1.0,...,False,False,False,False,False,False,False,False,False,False
1,54,0,13,40,2,2014,20,20,0.0,1.0,...,False,False,False,False,False,False,False,False,False,False
2,54,0,14,40,3,2014,20,20,0.0,1.0,...,True,False,False,False,False,False,False,False,False,False
3,54,0,15,40,4,2014,20,20,0.0,1.0,...,False,True,False,False,False,False,False,False,False,False
4,54,0,16,40,5,2014,20,20,0.0,1.0,...,False,False,True,False,False,False,False,False,False,False


In [36]:
training.columns.values

array(['shop_id', 'item_id', 'date_block_num', 'item_category_id',
       'month', 'year', 'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_day_units', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block', 'item_day_turnover',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       '

In [9]:
cb_features = [
       'item_first_block', 'item_last_block',
       'is_first_two_blocks', 'is_last_two_blocks', 'item_units',
       'item_mean_units_block', 'item_mean_units_day',
       'item_max_units_block', 'item_min_units_block',
       'item_max_units_day', 'item_min_units_day', 'item_turnover',
       'item_mean_turnover_block',
       'item_mean_turnover_day', 'item_max_turnover_block',
       'item_min_turnover_block', 'item_max_turnover_day',
       'item_min_turnover_day', 'item_days_of_activity',
       'item_blocks_of_activity', 'item_days_since_start',
       'item_mean_day_between_activity',
       'item_longest_stretch_days_without_activity',
       'item_longest_stretch_blocks_without_activity',
       'item_longest_stretch_block_with_activity',
       'item_number_of_consecutive_days_with_activity',
       'item_days_between_start_and_first_activity',
       'item_blocks_between_start_and_first_activity', 'item_first_day',
       'item_last_day', 'item_activity_on_all_blocks', 'item_mean_price',
       'item_min_price', 'item_max_price', 'item_number_different_prices',
       'item_price_amplitude', 'item_deviation_mean_category_price',
       'item_first_two_blocks_units', 'item_last_two_blocks_units',
       'item_fluctuation_units_first_last_blocks',
       'item_first_two_blocks_mean_price',
       'item_last_two_blocks_mean_price',
       'item_fluctuation_price_first_last_blocks',
       'item_share_of_total_units', 'item_share_of_total_turnover',
       'category_units', 'category_mean_units_block',
       'category_mean_units_day',
       'category_max_units_block', 'category_min_units_block',
       'category_max_units_day', 'category_min_units_day',
       'category_turnover', 'category_mean_turnover_block',
       'category_mean_turnover_day',
       'category_max_turnover_block', 'category_min_turnover_block',
       'category_max_turnover_day', 'category_min_turnover_day',
       'category_mean_price', 'category_min_price', 'category_max_price',
       'category_first_two_blocks_units',
       'category_last_two_blocks_units',
       'category_fluctuation_units_first_last_blocks',
       'category_first_two_blocks_mean_price',
       'category_last_two_blocks_mean_price',
       'category_fluctuation_price_first_last_blocks',
       'video_game', 'gaming_old_gen', 'gaming_new_gen', 'pc_games',
       'payment_cards', 'movies', 'movies_niche', 'books', 'music',
       'music_CD', 'music_vinyl', 'gifts', 'software',
       'subcategory_units', 'subcategory_mean_units_block',
       'subcategory_mean_units_day',
       'subcategory_max_units_block', 'subcategory_min_units_block',
       'subcategory_max_units_day', 'subcategory_min_units_day',
       'subcategory_turnover', 'subcategory_mean_turnover_block',
       'subcategory_mean_turnover_day',
       'subcategory_max_turnover_block', 'subcategory_min_turnover_block',
       'subcategory_max_turnover_day', 'subcategory_min_turnover_day',
       'category_share_of_total_units',
       'category_share_of_total_turnover',
       'subcategory_share_of_total_units',
       'subcategory_share_of_total_turnover',
       'subcategory_first_two_blocks_units',
       'subcategory_last_two_blocks_units',
       'subcategory_fluctuation_units_first_last_blocks',
       'subcategory_first_two_blocks_mean_price',
       'subcategory_last_two_blocks_mean_price',
       'subcategory_fluctuation_price_first_last_blocks',
       'shop_units', 'shop_mean_units_block', 
       'shop_mean_units_day', 'shop_max_units_block',
       'shop_min_units_block', 'shop_max_units_day', 'shop_min_units_day',
       'shop_turnover', 'shop_mean_turnover_block', 
       'shop_mean_turnover_day', 'shop_max_turnover_block',
       'shop_min_turnover_block', 'shop_max_turnover_day',
       'shop_min_turnover_day', 'shop_mean_price',
       'shop_first_two_blocks_units', 'shop_last_two_blocks_units',
       'shop_fluctuation_units_first_last_blocks',
       'shop_first_two_blocks_mean_price',
       'shop_last_two_blocks_mean_price',
       'shop_fluctuation_price_first_last_blocks', 'shop_share_of_units',
       'shop_share_of_turnover', 'shop_TC', 'shop_TRK', 'shop_SEC',
       'shop_shopping_center', 'shop_moscow', 'max_category_units',
       'max_category_turnover', 'area_units',
       'area_mean_units_block', 'area_mean_units_day',
       'area_max_units_block', 'area_min_units_block',
       'area_max_units_day', 'area_min_units_day', 'area_turnover',
       'area_mean_turnover_block',
       'area_mean_turnover_day', 'area_max_turnover_block',
       'area_min_turnover_block', 'area_max_turnover_day',
       'area_min_turnover_day', 'area_mean_price',
       'area_first_two_blocks_units', 'area_last_two_blocks_units',
       'area_fluctuation_units_first_last_blocks',
       'area_first_two_blocks_mean_price',
       'area_last_two_blocks_mean_price',
       'area_fluctuation_price_first_last_blocks',
       'shop_category_units', 'shop_category_mean_units_block',
       'shop_category_mean_units_day',
       'shop_category_max_units_block', 'shop_category_min_units_block',
       'shop_category_max_units_day', 'shop_category_min_units_day',
       'shop_category_turnover', 'shop_category_mean_turnover_block',
       'shop_category_mean_turnover_day',
       'shop_category_max_turnover_block',
       'shop_category_min_turnover_block',
       'shop_category_max_turnover_day', 'shop_category_min_turnover_day',
       'shop_category_mean_price',
       'item_block_units_lag_1', 'item_block_turnover_lag_1',
       'item_mean_price_block_lag_1', 'item_block_units_lag_2',
       'item_block_turnover_lag_2', 'item_mean_price_block_lag_2',
       'item_block_units_lag_3', 'item_block_turnover_lag_3',
       'item_mean_price_block_lag_3', 'category_block_units_lag_1',
       'category_block_turnover_lag_1', 'category_mean_price_block_lag_1',
       'subcategory_block_units_lag_1',
       'subcategory_block_turnover_lag_1',
       'subcategory_mean_price_block_lag_1', 'category_block_units_lag_2',
       'category_block_turnover_lag_2', 'category_mean_price_block_lag_2',
       'subcategory_block_units_lag_2',
       'subcategory_block_turnover_lag_2',
       'subcategory_mean_price_block_lag_2', 'category_block_units_lag_3',
       'category_block_turnover_lag_3', 'category_mean_price_block_lag_3',
       'subcategory_block_units_lag_3',
       'subcategory_block_turnover_lag_3',
       'subcategory_mean_price_block_lag_3', 'shop_block_units_lag_1',
       'shop_block_turnover_lag_1', 'shop_mean_price_block_lag_1',
       'area_block_units_lag_1', 'area_block_turnover_lag_1',
       'area_mean_price_block_lag_1', 'shop_block_units_lag_2',
       'shop_block_turnover_lag_2', 'shop_mean_price_block_lag_2',
       'area_block_units_lag_2', 'area_block_turnover_lag_2',
       'area_mean_price_block_lag_2', 'shop_block_units_lag_3',
       'shop_block_turnover_lag_3', 'shop_mean_price_block_lag_3',
       'area_block_units_lag_3', 'area_block_turnover_lag_3',
       'area_mean_price_block_lag_3',
       '1', '2', '3', '4', '5', '6','7', '8', '9', '10', '11', '12']


In [35]:
cb_model = CatBoostRegressor(iterations=70000,
                             learning_rate=0.001,
                             eval_metric='RMSE',
                             #thread_count=16,
                             task_type = "GPU",
                             use_best_model=True,
                             #l2_leaf_reg = 1000,
                             od_type = "Iter",
                             od_wait = 30,
                             #random_strength = 10,
                             #bagging_temperature = 1,
                             #one_hot_max_size = 2,
                             random_seed = 42)

#drops = ['subcategory','area']
#x_train = x_train.drop(columns=drops)
#x_val = x_val.drop(columns=drops)


cb_model.fit(x_train[cb_features], y_train, #cat_features=categorical_features_indices,
             eval_set=(x_val[cb_features],y_val),
             #cat_features=categorical_features_pos,         
             verbose=True)

0:	learn: 1.0781136	test: 0.9237295	best: 0.9237295 (0)	total: 112ms	remaining: 2h 11m 6s
1:	learn: 1.0777911	test: 0.9235398	best: 0.9235398 (1)	total: 210ms	remaining: 2h 2m 27s
2:	learn: 1.0774677	test: 0.9233501	best: 0.9233501 (2)	total: 305ms	remaining: 1h 58m 45s
3:	learn: 1.0771454	test: 0.9231610	best: 0.9231610 (3)	total: 396ms	remaining: 1h 55m 32s
4:	learn: 1.0768224	test: 0.9229725	best: 0.9229725 (4)	total: 488ms	remaining: 1h 53m 50s
5:	learn: 1.0765008	test: 0.9227837	best: 0.9227837 (5)	total: 580ms	remaining: 1h 52m 44s
6:	learn: 1.0761802	test: 0.9225964	best: 0.9225964 (6)	total: 671ms	remaining: 1h 51m 53s
7:	learn: 1.0758589	test: 0.9224089	best: 0.9224089 (7)	total: 763ms	remaining: 1h 51m 15s
8:	learn: 1.0755391	test: 0.9222216	best: 0.9222216 (8)	total: 855ms	remaining: 1h 50m 48s
9:	learn: 1.0752208	test: 0.9220348	best: 0.9220348 (9)	total: 946ms	remaining: 1h 50m 18s
10:	learn: 1.0749020	test: 0.9218485	best: 0.9218485 (10)	total: 1.04s	remaining: 1h 49m 54s

90:	learn: 1.0508816	test: 0.9083246	best: 0.9083246 (90)	total: 8.28s	remaining: 1h 45m 57s
91:	learn: 1.0505935	test: 0.9081286	best: 0.9081286 (91)	total: 8.37s	remaining: 1h 45m 57s
92:	learn: 1.0503102	test: 0.9079697	best: 0.9079697 (92)	total: 8.46s	remaining: 1h 45m 58s
93:	learn: 1.0500240	test: 0.9077756	best: 0.9077756 (93)	total: 8.55s	remaining: 1h 45m 57s
94:	learn: 1.0497431	test: 0.9076400	best: 0.9076400 (94)	total: 8.64s	remaining: 1h 45m 57s
95:	learn: 1.0494643	test: 0.9074814	best: 0.9074814 (95)	total: 8.73s	remaining: 1h 45m 56s
96:	learn: 1.0491837	test: 0.9073466	best: 0.9073466 (96)	total: 8.82s	remaining: 1h 45m 56s
97:	learn: 1.0488991	test: 0.9071539	best: 0.9071539 (97)	total: 8.91s	remaining: 1h 45m 56s
98:	learn: 1.0486140	test: 0.9069603	best: 0.9069603 (98)	total: 9s	remaining: 1h 45m 57s
99:	learn: 1.0483352	test: 0.9068256	best: 0.9068256 (99)	total: 9.09s	remaining: 1h 45m 57s
100:	learn: 1.0480523	test: 0.9066334	best: 0.9066334 (100)	total: 9.18s	

177:	learn: 1.0275876	test: 0.8947875	best: 0.8947875 (177)	total: 16.1s	remaining: 1h 45m 24s
178:	learn: 1.0273351	test: 0.8946192	best: 0.8946192 (178)	total: 16.2s	remaining: 1h 45m 24s
179:	learn: 1.0270864	test: 0.8945028	best: 0.8945028 (179)	total: 16.3s	remaining: 1h 45m 24s
180:	learn: 1.0268328	test: 0.8943591	best: 0.8943591 (180)	total: 16.4s	remaining: 1h 45m 23s
181:	learn: 1.0265819	test: 0.8942240	best: 0.8942240 (181)	total: 16.5s	remaining: 1h 45m 23s
182:	learn: 1.0263319	test: 0.8940597	best: 0.8940597 (182)	total: 16.6s	remaining: 1h 45m 22s
183:	learn: 1.0260782	test: 0.8939177	best: 0.8939177 (183)	total: 16.7s	remaining: 1h 45m 20s
184:	learn: 1.0258298	test: 0.8937518	best: 0.8937518 (184)	total: 16.8s	remaining: 1h 45m 25s
185:	learn: 1.0255792	test: 0.8936173	best: 0.8936173 (185)	total: 16.9s	remaining: 1h 45m 25s
186:	learn: 1.0253344	test: 0.8935020	best: 0.8935020 (186)	total: 16.9s	remaining: 1h 45m 25s
187:	learn: 1.0250835	test: 0.8933331	best: 0.8933

265:	learn: 1.0067841	test: 0.8833964	best: 0.8833964 (265)	total: 24.2s	remaining: 1h 45m 37s
266:	learn: 1.0065607	test: 0.8832761	best: 0.8832761 (266)	total: 24.3s	remaining: 1h 45m 36s
267:	learn: 1.0063416	test: 0.8831749	best: 0.8831749 (267)	total: 24.4s	remaining: 1h 45m 36s
268:	learn: 1.0061245	test: 0.8830607	best: 0.8830607 (268)	total: 24.4s	remaining: 1h 45m 36s
269:	learn: 1.0059052	test: 0.8829239	best: 0.8829239 (269)	total: 24.5s	remaining: 1h 45m 35s
270:	learn: 1.0056820	test: 0.8828049	best: 0.8828049 (270)	total: 24.6s	remaining: 1h 45m 34s
271:	learn: 1.0054632	test: 0.8826969	best: 0.8826969 (271)	total: 24.7s	remaining: 1h 45m 34s
272:	learn: 1.0052483	test: 0.8825883	best: 0.8825883 (272)	total: 24.8s	remaining: 1h 45m 35s
273:	learn: 1.0050264	test: 0.8824652	best: 0.8824652 (273)	total: 24.9s	remaining: 1h 45m 34s
274:	learn: 1.0048044	test: 0.8823470	best: 0.8823470 (274)	total: 25s	remaining: 1h 45m 33s
275:	learn: 1.0045857	test: 0.8822271	best: 0.882227

353:	learn: 0.9883717	test: 0.8737912	best: 0.8737912 (353)	total: 32.5s	remaining: 1h 46m 37s
354:	learn: 0.9881800	test: 0.8737015	best: 0.8737015 (354)	total: 32.6s	remaining: 1h 46m 37s
355:	learn: 0.9879843	test: 0.8735918	best: 0.8735918 (355)	total: 32.7s	remaining: 1h 46m 37s
356:	learn: 0.9877912	test: 0.8734989	best: 0.8734989 (356)	total: 32.8s	remaining: 1h 46m 37s
357:	learn: 0.9875877	test: 0.8733956	best: 0.8733956 (357)	total: 32.9s	remaining: 1h 46m 40s
358:	learn: 0.9873937	test: 0.8732835	best: 0.8732835 (358)	total: 33s	remaining: 1h 46m 41s
359:	learn: 0.9871918	test: 0.8731845	best: 0.8731845 (359)	total: 33.1s	remaining: 1h 46m 45s
360:	learn: 0.9869957	test: 0.8730834	best: 0.8730834 (360)	total: 33.2s	remaining: 1h 46m 47s
361:	learn: 0.9868020	test: 0.8729822	best: 0.8729822 (361)	total: 33.3s	remaining: 1h 46m 47s
362:	learn: 0.9866107	test: 0.8728916	best: 0.8728916 (362)	total: 33.4s	remaining: 1h 46m 46s
363:	learn: 0.9864174	test: 0.8727962	best: 0.872796

441:	learn: 0.9721136	test: 0.8654628	best: 0.8654628 (441)	total: 41s	remaining: 1h 47m 38s
442:	learn: 0.9719347	test: 0.8653774	best: 0.8653774 (442)	total: 41.2s	remaining: 1h 47m 41s
443:	learn: 0.9717648	test: 0.8653025	best: 0.8653025 (443)	total: 41.2s	remaining: 1h 47m 41s
444:	learn: 0.9715930	test: 0.8652121	best: 0.8652121 (444)	total: 41.3s	remaining: 1h 47m 42s
445:	learn: 0.9714200	test: 0.8651028	best: 0.8651028 (445)	total: 41.4s	remaining: 1h 47m 41s
446:	learn: 0.9712488	test: 0.8650083	best: 0.8650083 (446)	total: 41.5s	remaining: 1h 47m 41s
447:	learn: 0.9710792	test: 0.8649354	best: 0.8649354 (447)	total: 41.6s	remaining: 1h 47m 41s
448:	learn: 0.9709088	test: 0.8648412	best: 0.8648412 (448)	total: 41.7s	remaining: 1h 47m 41s
449:	learn: 0.9707319	test: 0.8647542	best: 0.8647542 (449)	total: 41.8s	remaining: 1h 47m 44s
450:	learn: 0.9705608	test: 0.8646767	best: 0.8646767 (450)	total: 41.9s	remaining: 1h 47m 45s
451:	learn: 0.9703900	test: 0.8645903	best: 0.864590

530:	learn: 0.9575741	test: 0.8581947	best: 0.8581947 (530)	total: 50s	remaining: 1h 48m 56s
531:	learn: 0.9574252	test: 0.8581265	best: 0.8581265 (531)	total: 50.1s	remaining: 1h 48m 56s
532:	learn: 0.9572721	test: 0.8580419	best: 0.8580419 (532)	total: 50.2s	remaining: 1h 48m 57s
533:	learn: 0.9571167	test: 0.8579622	best: 0.8579622 (533)	total: 50.3s	remaining: 1h 48m 57s
534:	learn: 0.9569648	test: 0.8578778	best: 0.8578778 (534)	total: 50.4s	remaining: 1h 48m 59s
535:	learn: 0.9568141	test: 0.8577901	best: 0.8577901 (535)	total: 50.5s	remaining: 1h 48m 59s
536:	learn: 0.9566572	test: 0.8577164	best: 0.8577164 (536)	total: 50.6s	remaining: 1h 49m
537:	learn: 0.9565001	test: 0.8576472	best: 0.8576472 (537)	total: 50.7s	remaining: 1h 49m 2s
538:	learn: 0.9563440	test: 0.8575769	best: 0.8575769 (538)	total: 50.8s	remaining: 1h 49m 4s
539:	learn: 0.9561933	test: 0.8574978	best: 0.8574978 (539)	total: 50.9s	remaining: 1h 49m 6s
540:	learn: 0.9560418	test: 0.8574205	best: 0.8574205 (540)

617:	learn: 0.9450325	test: 0.8520095	best: 0.8520095 (617)	total: 58.6s	remaining: 1h 49m 40s
618:	learn: 0.9448971	test: 0.8519479	best: 0.8519479 (618)	total: 58.7s	remaining: 1h 49m 41s
619:	learn: 0.9447576	test: 0.8518833	best: 0.8518833 (619)	total: 58.8s	remaining: 1h 49m 41s
620:	learn: 0.9446195	test: 0.8518141	best: 0.8518141 (620)	total: 58.9s	remaining: 1h 49m 42s
621:	learn: 0.9444863	test: 0.8517616	best: 0.8517616 (621)	total: 59s	remaining: 1h 49m 42s
622:	learn: 0.9443516	test: 0.8516900	best: 0.8516900 (622)	total: 59.1s	remaining: 1h 49m 41s
623:	learn: 0.9442164	test: 0.8516173	best: 0.8516173 (623)	total: 59.2s	remaining: 1h 49m 41s
624:	learn: 0.9440781	test: 0.8515531	best: 0.8515531 (624)	total: 59.3s	remaining: 1h 49m 42s
625:	learn: 0.9439498	test: 0.8514831	best: 0.8514831 (625)	total: 59.4s	remaining: 1h 49m 42s
626:	learn: 0.9438143	test: 0.8514092	best: 0.8514092 (626)	total: 59.5s	remaining: 1h 49m 42s
627:	learn: 0.9436796	test: 0.8513387	best: 0.851338

705:	learn: 0.9336839	test: 0.8462790	best: 0.8462790 (705)	total: 1m 7s	remaining: 1h 50m 11s
706:	learn: 0.9335638	test: 0.8462260	best: 0.8462260 (706)	total: 1m 7s	remaining: 1h 50m 12s
707:	learn: 0.9334460	test: 0.8461618	best: 0.8461618 (707)	total: 1m 7s	remaining: 1h 50m 12s
708:	learn: 0.9333277	test: 0.8460978	best: 0.8460978 (708)	total: 1m 7s	remaining: 1h 50m 12s
709:	learn: 0.9332116	test: 0.8460549	best: 0.8460549 (709)	total: 1m 7s	remaining: 1h 50m 12s
710:	learn: 0.9330921	test: 0.8460086	best: 0.8460086 (710)	total: 1m 7s	remaining: 1h 50m 13s
711:	learn: 0.9329741	test: 0.8459637	best: 0.8459637 (711)	total: 1m 7s	remaining: 1h 50m 13s
712:	learn: 0.9328522	test: 0.8458974	best: 0.8458974 (712)	total: 1m 8s	remaining: 1h 50m 13s
713:	learn: 0.9327320	test: 0.8458311	best: 0.8458311 (713)	total: 1m 8s	remaining: 1h 50m 13s
714:	learn: 0.9326110	test: 0.8457738	best: 0.8457738 (714)	total: 1m 8s	remaining: 1h 50m 13s
715:	learn: 0.9324897	test: 0.8456890	best: 0.8456

791:	learn: 0.9237842	test: 0.8408628	best: 0.8408628 (791)	total: 1m 15s	remaining: 1h 50m 29s
792:	learn: 0.9236690	test: 0.8407944	best: 0.8407944 (792)	total: 1m 15s	remaining: 1h 50m 30s
793:	learn: 0.9235598	test: 0.8407354	best: 0.8407354 (793)	total: 1m 16s	remaining: 1h 50m 30s
794:	learn: 0.9234481	test: 0.8406654	best: 0.8406654 (794)	total: 1m 16s	remaining: 1h 50m 31s
795:	learn: 0.9233415	test: 0.8406161	best: 0.8406161 (795)	total: 1m 16s	remaining: 1h 50m 32s
796:	learn: 0.9232323	test: 0.8405561	best: 0.8405561 (796)	total: 1m 16s	remaining: 1h 50m 32s
797:	learn: 0.9231234	test: 0.8404979	best: 0.8404979 (797)	total: 1m 16s	remaining: 1h 50m 33s
798:	learn: 0.9230158	test: 0.8404393	best: 0.8404393 (798)	total: 1m 16s	remaining: 1h 50m 33s
799:	learn: 0.9229061	test: 0.8403704	best: 0.8403704 (799)	total: 1m 16s	remaining: 1h 50m 33s
800:	learn: 0.9227985	test: 0.8403122	best: 0.8403122 (800)	total: 1m 16s	remaining: 1h 50m 33s
801:	learn: 0.9226948	test: 0.8402603	be

878:	learn: 0.9147432	test: 0.8359811	best: 0.8359811 (878)	total: 1m 24s	remaining: 1h 50m 52s
879:	learn: 0.9146406	test: 0.8359301	best: 0.8359301 (879)	total: 1m 24s	remaining: 1h 50m 52s
880:	learn: 0.9145415	test: 0.8358848	best: 0.8358848 (880)	total: 1m 24s	remaining: 1h 50m 52s
881:	learn: 0.9144485	test: 0.8358530	best: 0.8358530 (881)	total: 1m 24s	remaining: 1h 50m 52s
882:	learn: 0.9143479	test: 0.8357799	best: 0.8357799 (882)	total: 1m 24s	remaining: 1h 50m 52s
883:	learn: 0.9142498	test: 0.8357129	best: 0.8357129 (883)	total: 1m 25s	remaining: 1h 50m 52s
884:	learn: 0.9141519	test: 0.8356727	best: 0.8356727 (884)	total: 1m 25s	remaining: 1h 50m 53s
885:	learn: 0.9140508	test: 0.8356138	best: 0.8356138 (885)	total: 1m 25s	remaining: 1h 50m 52s
886:	learn: 0.9139544	test: 0.8355469	best: 0.8355469 (886)	total: 1m 25s	remaining: 1h 50m 51s
887:	learn: 0.9138553	test: 0.8354751	best: 0.8354751 (887)	total: 1m 25s	remaining: 1h 50m 51s
888:	learn: 0.9137590	test: 0.8354238	be

964:	learn: 0.9066727	test: 0.8315569	best: 0.8315569 (964)	total: 1m 33s	remaining: 1h 51m
965:	learn: 0.9065845	test: 0.8315048	best: 0.8315048 (965)	total: 1m 33s	remaining: 1h 51m
966:	learn: 0.9064945	test: 0.8314584	best: 0.8314584 (966)	total: 1m 33s	remaining: 1h 51m 1s
967:	learn: 0.9064066	test: 0.8314005	best: 0.8314005 (967)	total: 1m 33s	remaining: 1h 51m
968:	learn: 0.9063206	test: 0.8313734	best: 0.8313734 (968)	total: 1m 33s	remaining: 1h 51m 1s
969:	learn: 0.9062268	test: 0.8313217	best: 0.8313217 (969)	total: 1m 33s	remaining: 1h 51m 2s
970:	learn: 0.9061392	test: 0.8312785	best: 0.8312785 (970)	total: 1m 33s	remaining: 1h 51m 1s
971:	learn: 0.9060584	test: 0.8312334	best: 0.8312334 (971)	total: 1m 33s	remaining: 1h 51m 2s
972:	learn: 0.9059675	test: 0.8311853	best: 0.8311853 (972)	total: 1m 33s	remaining: 1h 51m 2s
973:	learn: 0.9058840	test: 0.8311473	best: 0.8311473 (973)	total: 1m 34s	remaining: 1h 51m 3s
974:	learn: 0.9057996	test: 0.8311103	best: 0.8311103 (974)

1051:	learn: 0.8992946	test: 0.8275889	best: 0.8275889 (1051)	total: 1m 42s	remaining: 1h 51m 26s
1052:	learn: 0.8992145	test: 0.8275259	best: 0.8275259 (1052)	total: 1m 42s	remaining: 1h 51m 26s
1053:	learn: 0.8991404	test: 0.8275016	best: 0.8275016 (1053)	total: 1m 42s	remaining: 1h 51m 26s
1054:	learn: 0.8990664	test: 0.8274749	best: 0.8274749 (1054)	total: 1m 42s	remaining: 1h 51m 26s
1055:	learn: 0.8989827	test: 0.8274324	best: 0.8274324 (1055)	total: 1m 42s	remaining: 1h 51m 27s
1056:	learn: 0.8989050	test: 0.8273844	best: 0.8273844 (1056)	total: 1m 42s	remaining: 1h 51m 27s
1057:	learn: 0.8988228	test: 0.8273419	best: 0.8273419 (1057)	total: 1m 42s	remaining: 1h 51m 28s
1058:	learn: 0.8987466	test: 0.8273065	best: 0.8273065 (1058)	total: 1m 42s	remaining: 1h 51m 28s
1059:	learn: 0.8986676	test: 0.8272704	best: 0.8272704 (1059)	total: 1m 42s	remaining: 1h 51m 29s
1060:	learn: 0.8985858	test: 0.8272274	best: 0.8272274 (1060)	total: 1m 42s	remaining: 1h 51m 29s
1061:	learn: 0.89850

1135:	learn: 0.8929045	test: 0.8241027	best: 0.8241027 (1135)	total: 1m 50s	remaining: 1h 51m 47s
1136:	learn: 0.8928317	test: 0.8240655	best: 0.8240655 (1136)	total: 1m 50s	remaining: 1h 51m 47s
1137:	learn: 0.8927564	test: 0.8240272	best: 0.8240272 (1137)	total: 1m 50s	remaining: 1h 51m 48s
1138:	learn: 0.8926862	test: 0.8239969	best: 0.8239969 (1138)	total: 1m 50s	remaining: 1h 51m 48s
1139:	learn: 0.8926150	test: 0.8239572	best: 0.8239572 (1139)	total: 1m 51s	remaining: 1h 51m 48s
1140:	learn: 0.8925397	test: 0.8239195	best: 0.8239195 (1140)	total: 1m 51s	remaining: 1h 51m 48s
1141:	learn: 0.8924655	test: 0.8238812	best: 0.8238812 (1141)	total: 1m 51s	remaining: 1h 51m 49s
1142:	learn: 0.8923945	test: 0.8238421	best: 0.8238421 (1142)	total: 1m 51s	remaining: 1h 51m 49s
1143:	learn: 0.8923219	test: 0.8238040	best: 0.8238040 (1143)	total: 1m 51s	remaining: 1h 51m 49s
1144:	learn: 0.8922501	test: 0.8237644	best: 0.8237644 (1144)	total: 1m 51s	remaining: 1h 51m 49s
1145:	learn: 0.89216

1221:	learn: 0.8868768	test: 0.8208555	best: 0.8208555 (1221)	total: 1m 59s	remaining: 1h 51m 59s
1222:	learn: 0.8868077	test: 0.8208118	best: 0.8208118 (1222)	total: 1m 59s	remaining: 1h 51m 59s
1223:	learn: 0.8867435	test: 0.8207641	best: 0.8207641 (1223)	total: 1m 59s	remaining: 1h 51m 59s
1224:	learn: 0.8866821	test: 0.8207434	best: 0.8207434 (1224)	total: 1m 59s	remaining: 1h 51m 58s
1225:	learn: 0.8866150	test: 0.8207100	best: 0.8207100 (1225)	total: 1m 59s	remaining: 1h 51m 59s
1226:	learn: 0.8865503	test: 0.8206771	best: 0.8206771 (1226)	total: 1m 59s	remaining: 1h 51m 58s
1227:	learn: 0.8864880	test: 0.8206311	best: 0.8206311 (1227)	total: 1m 59s	remaining: 1h 51m 58s
1228:	learn: 0.8864249	test: 0.8205972	best: 0.8205972 (1228)	total: 2m	remaining: 1h 51m 58s
1229:	learn: 0.8863579	test: 0.8205620	best: 0.8205620 (1229)	total: 2m	remaining: 1h 51m 58s
1230:	learn: 0.8862965	test: 0.8205280	best: 0.8205280 (1230)	total: 2m	remaining: 1h 51m 58s
1231:	learn: 0.8862271	test: 0.8

1307:	learn: 0.8814363	test: 0.8179425	best: 0.8179425 (1307)	total: 2m 8s	remaining: 1h 52m 6s
1308:	learn: 0.8813804	test: 0.8179296	best: 0.8179296 (1308)	total: 2m 8s	remaining: 1h 52m 7s
1309:	learn: 0.8813239	test: 0.8178861	best: 0.8178861 (1309)	total: 2m 8s	remaining: 1h 52m 6s
1310:	learn: 0.8812559	test: 0.8178550	best: 0.8178550 (1310)	total: 2m 8s	remaining: 1h 52m 6s
1311:	learn: 0.8811967	test: 0.8177998	best: 0.8177998 (1311)	total: 2m 8s	remaining: 1h 52m 7s
1312:	learn: 0.8811366	test: 0.8177570	best: 0.8177570 (1312)	total: 2m 8s	remaining: 1h 52m 7s
1313:	learn: 0.8810763	test: 0.8177273	best: 0.8177273 (1313)	total: 2m 8s	remaining: 1h 52m 7s
1314:	learn: 0.8810148	test: 0.8176975	best: 0.8176975 (1314)	total: 2m 8s	remaining: 1h 52m 8s
1315:	learn: 0.8809577	test: 0.8176556	best: 0.8176556 (1315)	total: 2m 8s	remaining: 1h 52m 7s
1316:	learn: 0.8809024	test: 0.8176278	best: 0.8176278 (1316)	total: 2m 9s	remaining: 1h 52m 7s
1317:	learn: 0.8808354	test: 0.8175966	b

1394:	learn: 0.8764292	test: 0.8152502	best: 0.8152502 (1394)	total: 2m 16s	remaining: 1h 52m 11s
1395:	learn: 0.8763775	test: 0.8152231	best: 0.8152231 (1395)	total: 2m 16s	remaining: 1h 52m 11s
1396:	learn: 0.8763262	test: 0.8151968	best: 0.8151968 (1396)	total: 2m 17s	remaining: 1h 52m 11s
1397:	learn: 0.8762716	test: 0.8151462	best: 0.8151462 (1397)	total: 2m 17s	remaining: 1h 52m 11s
1398:	learn: 0.8762181	test: 0.8151118	best: 0.8151118 (1398)	total: 2m 17s	remaining: 1h 52m 10s
1399:	learn: 0.8761662	test: 0.8150857	best: 0.8150857 (1399)	total: 2m 17s	remaining: 1h 52m 10s
1400:	learn: 0.8761132	test: 0.8150620	best: 0.8150620 (1400)	total: 2m 17s	remaining: 1h 52m 10s
1401:	learn: 0.8760607	test: 0.8150342	best: 0.8150342 (1401)	total: 2m 17s	remaining: 1h 52m 10s
1402:	learn: 0.8759971	test: 0.8149992	best: 0.8149992 (1402)	total: 2m 17s	remaining: 1h 52m 10s
1403:	learn: 0.8759468	test: 0.8149788	best: 0.8149788 (1403)	total: 2m 17s	remaining: 1h 52m 11s
1404:	learn: 0.87589

1480:	learn: 0.8719553	test: 0.8128169	best: 0.8128169 (1480)	total: 2m 25s	remaining: 1h 52m 12s
1481:	learn: 0.8719030	test: 0.8127767	best: 0.8127767 (1481)	total: 2m 25s	remaining: 1h 52m 12s
1482:	learn: 0.8718543	test: 0.8127327	best: 0.8127327 (1482)	total: 2m 25s	remaining: 1h 52m 12s
1483:	learn: 0.8717967	test: 0.8127089	best: 0.8127089 (1483)	total: 2m 25s	remaining: 1h 52m 13s
1484:	learn: 0.8717504	test: 0.8126829	best: 0.8126829 (1484)	total: 2m 25s	remaining: 1h 52m 13s
1485:	learn: 0.8716958	test: 0.8126546	best: 0.8126546 (1485)	total: 2m 26s	remaining: 1h 52m 13s
1486:	learn: 0.8716464	test: 0.8126317	best: 0.8126317 (1486)	total: 2m 26s	remaining: 1h 52m 14s
1487:	learn: 0.8715939	test: 0.8125919	best: 0.8125919 (1487)	total: 2m 26s	remaining: 1h 52m 14s
1488:	learn: 0.8715398	test: 0.8125714	best: 0.8125714 (1488)	total: 2m 26s	remaining: 1h 52m 14s
1489:	learn: 0.8714940	test: 0.8125483	best: 0.8125483 (1489)	total: 2m 26s	remaining: 1h 52m 14s
1490:	learn: 0.87144

1564:	learn: 0.8678864	test: 0.8107965	best: 0.8107965 (1564)	total: 2m 34s	remaining: 1h 52m 16s
1565:	learn: 0.8678422	test: 0.8107677	best: 0.8107677 (1565)	total: 2m 34s	remaining: 1h 52m 16s
1566:	learn: 0.8677990	test: 0.8107461	best: 0.8107461 (1566)	total: 2m 34s	remaining: 1h 52m 16s
1567:	learn: 0.8677567	test: 0.8107225	best: 0.8107225 (1567)	total: 2m 34s	remaining: 1h 52m 16s
1568:	learn: 0.8677139	test: 0.8107040	best: 0.8107040 (1568)	total: 2m 34s	remaining: 1h 52m 16s
1569:	learn: 0.8676668	test: 0.8106770	best: 0.8106770 (1569)	total: 2m 34s	remaining: 1h 52m 16s
1570:	learn: 0.8676224	test: 0.8106582	best: 0.8106582 (1570)	total: 2m 34s	remaining: 1h 52m 15s
1571:	learn: 0.8675767	test: 0.8106175	best: 0.8106175 (1571)	total: 2m 34s	remaining: 1h 52m 15s
1572:	learn: 0.8675285	test: 0.8105935	best: 0.8105935 (1572)	total: 2m 34s	remaining: 1h 52m 16s
1573:	learn: 0.8674874	test: 0.8105856	best: 0.8105856 (1573)	total: 2m 34s	remaining: 1h 52m 16s
1574:	learn: 0.86744

1648:	learn: 0.8641493	test: 0.8090510	best: 0.8090510 (1648)	total: 2m 42s	remaining: 1h 52m 15s
1649:	learn: 0.8641065	test: 0.8090326	best: 0.8090326 (1649)	total: 2m 42s	remaining: 1h 52m 15s
1650:	learn: 0.8640654	test: 0.8090046	best: 0.8090046 (1650)	total: 2m 42s	remaining: 1h 52m 15s
1651:	learn: 0.8640239	test: 0.8089767	best: 0.8089767 (1651)	total: 2m 42s	remaining: 1h 52m 15s
1652:	learn: 0.8639804	test: 0.8089543	best: 0.8089543 (1652)	total: 2m 42s	remaining: 1h 52m 15s
1653:	learn: 0.8639363	test: 0.8089332	best: 0.8089332 (1653)	total: 2m 42s	remaining: 1h 52m 15s
1654:	learn: 0.8638951	test: 0.8089200	best: 0.8089200 (1654)	total: 2m 43s	remaining: 1h 52m 15s
1655:	learn: 0.8638517	test: 0.8089010	best: 0.8089010 (1655)	total: 2m 43s	remaining: 1h 52m 15s
1656:	learn: 0.8638088	test: 0.8088799	best: 0.8088799 (1656)	total: 2m 43s	remaining: 1h 52m 15s
1657:	learn: 0.8637669	test: 0.8088444	best: 0.8088444 (1657)	total: 2m 43s	remaining: 1h 52m 14s
1658:	learn: 0.86372

1734:	learn: 0.8606565	test: 0.8074631	best: 0.8074631 (1734)	total: 2m 51s	remaining: 1h 52m 16s
1735:	learn: 0.8606177	test: 0.8074506	best: 0.8074506 (1735)	total: 2m 51s	remaining: 1h 52m 16s
1736:	learn: 0.8605752	test: 0.8074365	best: 0.8074365 (1736)	total: 2m 51s	remaining: 1h 52m 16s
1737:	learn: 0.8605375	test: 0.8074257	best: 0.8074257 (1737)	total: 2m 51s	remaining: 1h 52m 16s
1738:	learn: 0.8605011	test: 0.8074114	best: 0.8074114 (1738)	total: 2m 51s	remaining: 1h 52m 16s
1739:	learn: 0.8604567	test: 0.8073830	best: 0.8073830 (1739)	total: 2m 51s	remaining: 1h 52m 17s
1740:	learn: 0.8604171	test: 0.8073655	best: 0.8073655 (1740)	total: 2m 51s	remaining: 1h 52m 17s
1741:	learn: 0.8603774	test: 0.8073365	best: 0.8073365 (1741)	total: 2m 51s	remaining: 1h 52m 16s
1742:	learn: 0.8603417	test: 0.8073256	best: 0.8073256 (1742)	total: 2m 52s	remaining: 1h 52m 16s
1743:	learn: 0.8603052	test: 0.8073148	best: 0.8073148 (1743)	total: 2m 52s	remaining: 1h 52m 16s
1744:	learn: 0.86026

1819:	learn: 0.8574910	test: 0.8061405	best: 0.8061405 (1819)	total: 2m 59s	remaining: 1h 52m 17s
1820:	learn: 0.8574559	test: 0.8061321	best: 0.8061321 (1820)	total: 2m 59s	remaining: 1h 52m 17s
1821:	learn: 0.8574221	test: 0.8061157	best: 0.8061157 (1821)	total: 3m	remaining: 1h 52m 17s
1822:	learn: 0.8573873	test: 0.8060883	best: 0.8060883 (1822)	total: 3m	remaining: 1h 52m 17s
1823:	learn: 0.8573551	test: 0.8060824	best: 0.8060824 (1823)	total: 3m	remaining: 1h 52m 17s
1824:	learn: 0.8573202	test: 0.8060672	best: 0.8060672 (1824)	total: 3m	remaining: 1h 52m 17s
1825:	learn: 0.8572824	test: 0.8060537	best: 0.8060537 (1825)	total: 3m	remaining: 1h 52m 17s
1826:	learn: 0.8572509	test: 0.8060476	best: 0.8060476 (1826)	total: 3m	remaining: 1h 52m 17s
1827:	learn: 0.8572117	test: 0.8060263	best: 0.8060263 (1827)	total: 3m	remaining: 1h 52m 17s
1828:	learn: 0.8571748	test: 0.8060109	best: 0.8060109 (1828)	total: 3m	remaining: 1h 52m 17s
1829:	learn: 0.8571353	test: 0.8059898	best: 0.80598

1904:	learn: 0.8546030	test: 0.8050538	best: 0.8050538 (1904)	total: 3m 8s	remaining: 1h 52m 21s
1905:	learn: 0.8545700	test: 0.8050439	best: 0.8050439 (1905)	total: 3m 8s	remaining: 1h 52m 21s
1906:	learn: 0.8545409	test: 0.8050339	best: 0.8050339 (1906)	total: 3m 8s	remaining: 1h 52m 21s
1907:	learn: 0.8545092	test: 0.8050131	best: 0.8050131 (1907)	total: 3m 8s	remaining: 1h 52m 20s
1908:	learn: 0.8544772	test: 0.8049897	best: 0.8049897 (1908)	total: 3m 8s	remaining: 1h 52m 20s
1909:	learn: 0.8544457	test: 0.8049749	best: 0.8049749 (1909)	total: 3m 9s	remaining: 1h 52m 20s
1910:	learn: 0.8544097	test: 0.8049557	best: 0.8049557 (1910)	total: 3m 9s	remaining: 1h 52m 21s
1911:	learn: 0.8543724	test: 0.8049372	best: 0.8049372 (1911)	total: 3m 9s	remaining: 1h 52m 21s
1912:	learn: 0.8543379	test: 0.8049252	best: 0.8049252 (1912)	total: 3m 9s	remaining: 1h 52m 21s
1913:	learn: 0.8543063	test: 0.8049115	best: 0.8049115 (1913)	total: 3m 9s	remaining: 1h 52m 21s
1914:	learn: 0.8542746	test: 0

1988:	learn: 0.8519603	test: 0.8039013	best: 0.8039013 (1988)	total: 3m 17s	remaining: 1h 52m 20s
1989:	learn: 0.8519293	test: 0.8038934	best: 0.8038934 (1989)	total: 3m 17s	remaining: 1h 52m 20s
1990:	learn: 0.8518986	test: 0.8038754	best: 0.8038754 (1990)	total: 3m 17s	remaining: 1h 52m 20s
1991:	learn: 0.8518619	test: 0.8038654	best: 0.8038654 (1991)	total: 3m 17s	remaining: 1h 52m 20s
1992:	learn: 0.8518294	test: 0.8038572	best: 0.8038572 (1992)	total: 3m 17s	remaining: 1h 52m 20s
1993:	learn: 0.8517990	test: 0.8038479	best: 0.8038479 (1993)	total: 3m 17s	remaining: 1h 52m 20s
1994:	learn: 0.8517706	test: 0.8038296	best: 0.8038296 (1994)	total: 3m 17s	remaining: 1h 52m 20s
1995:	learn: 0.8517402	test: 0.8038202	best: 0.8038202 (1995)	total: 3m 17s	remaining: 1h 52m 20s
1996:	learn: 0.8517074	test: 0.8038017	best: 0.8038017 (1996)	total: 3m 17s	remaining: 1h 52m 20s
1997:	learn: 0.8516791	test: 0.8037880	best: 0.8037880 (1997)	total: 3m 18s	remaining: 1h 52m 21s
1998:	learn: 0.85165

2072:	learn: 0.8495026	test: 0.8030799	best: 0.8030799 (2072)	total: 3m 25s	remaining: 1h 52m 22s
2073:	learn: 0.8494766	test: 0.8030717	best: 0.8030717 (2073)	total: 3m 25s	remaining: 1h 52m 22s
2074:	learn: 0.8494463	test: 0.8030474	best: 0.8030474 (2074)	total: 3m 25s	remaining: 1h 52m 23s
2075:	learn: 0.8494141	test: 0.8030347	best: 0.8030347 (2075)	total: 3m 26s	remaining: 1h 52m 23s
2076:	learn: 0.8493899	test: 0.8030323	best: 0.8030323 (2076)	total: 3m 26s	remaining: 1h 52m 23s
2077:	learn: 0.8493628	test: 0.8030230	best: 0.8030230 (2077)	total: 3m 26s	remaining: 1h 52m 22s
2078:	learn: 0.8493313	test: 0.8030050	best: 0.8030050 (2078)	total: 3m 26s	remaining: 1h 52m 23s
2079:	learn: 0.8493043	test: 0.8029895	best: 0.8029895 (2079)	total: 3m 26s	remaining: 1h 52m 23s
2080:	learn: 0.8492801	test: 0.8029861	best: 0.8029861 (2080)	total: 3m 26s	remaining: 1h 52m 23s
2081:	learn: 0.8492479	test: 0.8029677	best: 0.8029677 (2081)	total: 3m 26s	remaining: 1h 52m 23s
2082:	learn: 0.84921

2156:	learn: 0.8471619	test: 0.8020807	best: 0.8020807 (2156)	total: 3m 34s	remaining: 1h 52m 23s
2157:	learn: 0.8471370	test: 0.8020755	best: 0.8020755 (2157)	total: 3m 34s	remaining: 1h 52m 23s
2158:	learn: 0.8471103	test: 0.8020551	best: 0.8020551 (2158)	total: 3m 34s	remaining: 1h 52m 22s
2159:	learn: 0.8470732	test: 0.8020461	best: 0.8020461 (2159)	total: 3m 34s	remaining: 1h 52m 23s
2160:	learn: 0.8470468	test: 0.8020250	best: 0.8020250 (2160)	total: 3m 34s	remaining: 1h 52m 23s
2161:	learn: 0.8470215	test: 0.8020205	best: 0.8020205 (2161)	total: 3m 34s	remaining: 1h 52m 23s
2162:	learn: 0.8469989	test: 0.8020188	best: 0.8020188 (2162)	total: 3m 35s	remaining: 1h 52m 23s
2163:	learn: 0.8469742	test: 0.8020079	best: 0.8020079 (2163)	total: 3m 35s	remaining: 1h 52m 23s
2164:	learn: 0.8469354	test: 0.8019982	best: 0.8019982 (2164)	total: 3m 35s	remaining: 1h 52m 23s
2165:	learn: 0.8469065	test: 0.8019821	best: 0.8019821 (2165)	total: 3m 35s	remaining: 1h 52m 24s
2166:	learn: 0.84687

2241:	learn: 0.8448435	test: 0.8012820	best: 0.8012820 (2241)	total: 3m 43s	remaining: 1h 52m 25s
2242:	learn: 0.8448080	test: 0.8012717	best: 0.8012717 (2242)	total: 3m 43s	remaining: 1h 52m 26s
2243:	learn: 0.8447788	test: 0.8012598	best: 0.8012598 (2243)	total: 3m 43s	remaining: 1h 52m 25s
2244:	learn: 0.8447497	test: 0.8012479	best: 0.8012479 (2244)	total: 3m 43s	remaining: 1h 52m 25s
2245:	learn: 0.8447266	test: 0.8012466	best: 0.8012466 (2245)	total: 3m 43s	remaining: 1h 52m 26s
2246:	learn: 0.8446972	test: 0.8012315	best: 0.8012315 (2246)	total: 3m 43s	remaining: 1h 52m 26s
2247:	learn: 0.8446684	test: 0.8012138	best: 0.8012138 (2247)	total: 3m 43s	remaining: 1h 52m 26s
2248:	learn: 0.8446449	test: 0.8012063	best: 0.8012063 (2248)	total: 3m 43s	remaining: 1h 52m 26s
2249:	learn: 0.8446179	test: 0.8011901	best: 0.8011901 (2249)	total: 3m 44s	remaining: 1h 52m 27s
2250:	learn: 0.8445926	test: 0.8011831	best: 0.8011831 (2250)	total: 3m 44s	remaining: 1h 52m 27s
2251:	learn: 0.84456

2325:	learn: 0.8425401	test: 0.8001802	best: 0.8001802 (2325)	total: 3m 52s	remaining: 1h 52m 35s
2326:	learn: 0.8425158	test: 0.8001652	best: 0.8001652 (2326)	total: 3m 52s	remaining: 1h 52m 35s
2327:	learn: 0.8424890	test: 0.8001536	best: 0.8001536 (2327)	total: 3m 52s	remaining: 1h 52m 35s
2328:	learn: 0.8424541	test: 0.8001419	best: 0.8001419 (2328)	total: 3m 52s	remaining: 1h 52m 35s
2329:	learn: 0.8424331	test: 0.8001402	best: 0.8001402 (2329)	total: 3m 52s	remaining: 1h 52m 34s
2330:	learn: 0.8424100	test: 0.8001382	best: 0.8001382 (2330)	total: 3m 52s	remaining: 1h 52m 35s
2331:	learn: 0.8423774	test: 0.8001303	best: 0.8001303 (2331)	total: 3m 52s	remaining: 1h 52m 35s
2332:	learn: 0.8423508	test: 0.8001192	best: 0.8001192 (2332)	total: 3m 52s	remaining: 1h 52m 35s
2333:	learn: 0.8423288	test: 0.8001111	best: 0.8001111 (2333)	total: 3m 53s	remaining: 1h 52m 35s
2334:	learn: 0.8423050	test: 0.8000463	best: 0.8000463 (2334)	total: 3m 53s	remaining: 1h 52m 35s
2335:	learn: 0.84228

2410:	learn: 0.8403602	test: 0.7992819	best: 0.7992819 (2410)	total: 4m 1s	remaining: 1h 52m 41s
2411:	learn: 0.8403393	test: 0.7992754	best: 0.7992754 (2411)	total: 4m 1s	remaining: 1h 52m 41s
2412:	learn: 0.8403185	test: 0.7992362	best: 0.7992362 (2412)	total: 4m 1s	remaining: 1h 52m 41s
2413:	learn: 0.8402921	test: 0.7992238	best: 0.7992238 (2413)	total: 4m 1s	remaining: 1h 52m 42s
2414:	learn: 0.8402655	test: 0.7992189	best: 0.7992189 (2414)	total: 4m 1s	remaining: 1h 52m 42s
2415:	learn: 0.8402363	test: 0.7992032	best: 0.7992032 (2415)	total: 4m 1s	remaining: 1h 52m 42s
2416:	learn: 0.8402141	test: 0.7991950	best: 0.7991950 (2416)	total: 4m 1s	remaining: 1h 52m 43s
2417:	learn: 0.8401897	test: 0.7991848	best: 0.7991848 (2417)	total: 4m 1s	remaining: 1h 52m 42s
2418:	learn: 0.8401647	test: 0.7991786	best: 0.7991786 (2418)	total: 4m 2s	remaining: 1h 52m 43s
2419:	learn: 0.8401415	test: 0.7991731	best: 0.7991731 (2419)	total: 4m 2s	remaining: 1h 52m 43s
2420:	learn: 0.8401160	test: 0

2497:	learn: 0.8382627	test: 0.7982939	best: 0.7982939 (2497)	total: 4m 10s	remaining: 1h 52m 48s
2498:	learn: 0.8382391	test: 0.7982934	best: 0.7982934 (2498)	total: 4m 10s	remaining: 1h 52m 48s
2499:	learn: 0.8382188	test: 0.7982858	best: 0.7982858 (2499)	total: 4m 10s	remaining: 1h 52m 48s
2500:	learn: 0.8381950	test: 0.7982759	best: 0.7982759 (2500)	total: 4m 10s	remaining: 1h 52m 48s
2501:	learn: 0.8381748	test: 0.7982295	best: 0.7982295 (2501)	total: 4m 10s	remaining: 1h 52m 48s
2502:	learn: 0.8381499	test: 0.7982232	best: 0.7982232 (2502)	total: 4m 11s	remaining: 1h 52m 49s
2503:	learn: 0.8381291	test: 0.7982139	best: 0.7982139 (2503)	total: 4m 11s	remaining: 1h 52m 49s
2504:	learn: 0.8380985	test: 0.7982054	best: 0.7982054 (2504)	total: 4m 11s	remaining: 1h 52m 49s
2505:	learn: 0.8380749	test: 0.7981952	best: 0.7981952 (2505)	total: 4m 11s	remaining: 1h 52m 49s
2506:	learn: 0.8380484	test: 0.7981801	best: 0.7981801 (2506)	total: 4m 11s	remaining: 1h 52m 49s
2507:	learn: 0.83802

2582:	learn: 0.8363406	test: 0.7973334	best: 0.7973334 (2582)	total: 4m 19s	remaining: 1h 52m 55s
2583:	learn: 0.8363224	test: 0.7973291	best: 0.7973291 (2583)	total: 4m 19s	remaining: 1h 52m 55s
2584:	learn: 0.8362994	test: 0.7973198	best: 0.7973198 (2584)	total: 4m 19s	remaining: 1h 52m 55s
2585:	learn: 0.8362803	test: 0.7973067	best: 0.7973067 (2585)	total: 4m 19s	remaining: 1h 52m 55s
2586:	learn: 0.8362570	test: 0.7972981	best: 0.7972981 (2586)	total: 4m 20s	remaining: 1h 52m 56s
2587:	learn: 0.8362393	test: 0.7972939	best: 0.7972939 (2587)	total: 4m 20s	remaining: 1h 52m 56s
2588:	learn: 0.8362152	test: 0.7972813	best: 0.7972813 (2588)	total: 4m 20s	remaining: 1h 52m 56s
2589:	learn: 0.8361931	test: 0.7972721	best: 0.7972721 (2589)	total: 4m 20s	remaining: 1h 52m 56s
2590:	learn: 0.8361703	test: 0.7972668	best: 0.7972668 (2590)	total: 4m 20s	remaining: 1h 52m 56s
2591:	learn: 0.8361469	test: 0.7972565	best: 0.7972565 (2591)	total: 4m 20s	remaining: 1h 52m 57s
2592:	learn: 0.83612

2667:	learn: 0.8344567	test: 0.7966996	best: 0.7966996 (2667)	total: 4m 28s	remaining: 1h 53m 4s
2668:	learn: 0.8344293	test: 0.7966854	best: 0.7966854 (2668)	total: 4m 28s	remaining: 1h 53m 4s
2669:	learn: 0.8344058	test: 0.7966730	best: 0.7966730 (2669)	total: 4m 29s	remaining: 1h 53m 4s
2670:	learn: 0.8343776	test: 0.7966589	best: 0.7966589 (2670)	total: 4m 29s	remaining: 1h 53m 4s
2671:	learn: 0.8343608	test: 0.7966551	best: 0.7966551 (2671)	total: 4m 29s	remaining: 1h 53m 5s
2672:	learn: 0.8343432	test: 0.7966248	best: 0.7966248 (2672)	total: 4m 29s	remaining: 1h 53m 4s
2673:	learn: 0.8343259	test: 0.7966207	best: 0.7966207 (2673)	total: 4m 29s	remaining: 1h 53m 4s
2674:	learn: 0.8343092	test: 0.7966179	best: 0.7966179 (2674)	total: 4m 29s	remaining: 1h 53m 4s
2675:	learn: 0.8342863	test: 0.7966055	best: 0.7966055 (2675)	total: 4m 29s	remaining: 1h 53m 4s
2676:	learn: 0.8342655	test: 0.7965933	best: 0.7965933 (2676)	total: 4m 29s	remaining: 1h 53m 5s
2677:	learn: 0.8342461	test: 0

2752:	learn: 0.8327643	test: 0.7959676	best: 0.7959676 (2752)	total: 4m 37s	remaining: 1h 53m 7s
2753:	learn: 0.8327421	test: 0.7959554	best: 0.7959554 (2753)	total: 4m 37s	remaining: 1h 53m 6s
2754:	learn: 0.8327157	test: 0.7959451	best: 0.7959451 (2754)	total: 4m 38s	remaining: 1h 53m 7s
2755:	learn: 0.8326951	test: 0.7959452	best: 0.7959451 (2754)	total: 4m 38s	remaining: 1h 53m 7s
2756:	learn: 0.8326731	test: 0.7959337	best: 0.7959337 (2756)	total: 4m 38s	remaining: 1h 53m 7s
2757:	learn: 0.8326490	test: 0.7959203	best: 0.7959203 (2757)	total: 4m 38s	remaining: 1h 53m 7s
2758:	learn: 0.8326224	test: 0.7959141	best: 0.7959141 (2758)	total: 4m 38s	remaining: 1h 53m 8s
2759:	learn: 0.8325990	test: 0.7959011	best: 0.7959011 (2759)	total: 4m 38s	remaining: 1h 53m 8s
2760:	learn: 0.8325833	test: 0.7958973	best: 0.7958973 (2760)	total: 4m 38s	remaining: 1h 53m 8s
2761:	learn: 0.8325616	test: 0.7958989	best: 0.7958973 (2760)	total: 4m 38s	remaining: 1h 53m 8s
2762:	learn: 0.8325430	test: 0

2838:	learn: 0.8310647	test: 0.7953506	best: 0.7953506 (2838)	total: 4m 47s	remaining: 1h 53m 12s
2839:	learn: 0.8310492	test: 0.7953488	best: 0.7953488 (2839)	total: 4m 47s	remaining: 1h 53m 12s
2840:	learn: 0.8310218	test: 0.7953378	best: 0.7953378 (2840)	total: 4m 47s	remaining: 1h 53m 12s
2841:	learn: 0.8310027	test: 0.7953298	best: 0.7953298 (2841)	total: 4m 47s	remaining: 1h 53m 12s
2842:	learn: 0.8309826	test: 0.7953047	best: 0.7953047 (2842)	total: 4m 47s	remaining: 1h 53m 12s
2843:	learn: 0.8309682	test: 0.7953055	best: 0.7953047 (2842)	total: 4m 47s	remaining: 1h 53m 12s
2844:	learn: 0.8309474	test: 0.7952775	best: 0.7952775 (2844)	total: 4m 47s	remaining: 1h 53m 12s
2845:	learn: 0.8309310	test: 0.7952701	best: 0.7952701 (2845)	total: 4m 47s	remaining: 1h 53m 12s
2846:	learn: 0.8309087	test: 0.7952581	best: 0.7952581 (2846)	total: 4m 47s	remaining: 1h 53m 13s
2847:	learn: 0.8308865	test: 0.7952460	best: 0.7952460 (2847)	total: 4m 48s	remaining: 1h 53m 13s
2848:	learn: 0.83087

2924:	learn: 0.8294895	test: 0.7947282	best: 0.7947282 (2924)	total: 4m 56s	remaining: 1h 53m 17s
2925:	learn: 0.8294749	test: 0.7947256	best: 0.7947256 (2925)	total: 4m 56s	remaining: 1h 53m 17s
2926:	learn: 0.8294561	test: 0.7947231	best: 0.7947231 (2926)	total: 4m 56s	remaining: 1h 53m 17s
2927:	learn: 0.8294365	test: 0.7947122	best: 0.7947122 (2927)	total: 4m 56s	remaining: 1h 53m 17s
2928:	learn: 0.8294226	test: 0.7947094	best: 0.7947094 (2928)	total: 4m 56s	remaining: 1h 53m 17s
2929:	learn: 0.8294005	test: 0.7946975	best: 0.7946975 (2929)	total: 4m 56s	remaining: 1h 53m 17s
2930:	learn: 0.8293789	test: 0.7946850	best: 0.7946850 (2930)	total: 4m 57s	remaining: 1h 53m 17s
2931:	learn: 0.8293604	test: 0.7946790	best: 0.7946790 (2931)	total: 4m 57s	remaining: 1h 53m 18s
2932:	learn: 0.8293417	test: 0.7946724	best: 0.7946724 (2932)	total: 4m 57s	remaining: 1h 53m 18s
2933:	learn: 0.8293225	test: 0.7946617	best: 0.7946617 (2933)	total: 4m 57s	remaining: 1h 53m 17s
2934:	learn: 0.82930

3010:	learn: 0.8279377	test: 0.7940233	best: 0.7940233 (3010)	total: 5m 5s	remaining: 1h 53m 24s
3011:	learn: 0.8279223	test: 0.7940199	best: 0.7940199 (3011)	total: 5m 5s	remaining: 1h 53m 24s
3012:	learn: 0.8278996	test: 0.7940134	best: 0.7940134 (3012)	total: 5m 6s	remaining: 1h 53m 24s
3013:	learn: 0.8278772	test: 0.7940076	best: 0.7940076 (3013)	total: 5m 6s	remaining: 1h 53m 24s
3014:	learn: 0.8278650	test: 0.7940080	best: 0.7940076 (3013)	total: 5m 6s	remaining: 1h 53m 24s
3015:	learn: 0.8278496	test: 0.7939839	best: 0.7939839 (3015)	total: 5m 6s	remaining: 1h 53m 24s
3016:	learn: 0.8278268	test: 0.7939754	best: 0.7939754 (3016)	total: 5m 6s	remaining: 1h 53m 25s
3017:	learn: 0.8278099	test: 0.7939735	best: 0.7939735 (3017)	total: 5m 6s	remaining: 1h 53m 25s
3018:	learn: 0.8277882	test: 0.7939678	best: 0.7939678 (3018)	total: 5m 6s	remaining: 1h 53m 25s
3019:	learn: 0.8277698	test: 0.7939628	best: 0.7939628 (3019)	total: 5m 6s	remaining: 1h 53m 26s
3020:	learn: 0.8277449	test: 0

3095:	learn: 0.8263535	test: 0.7935073	best: 0.7935073 (3095)	total: 5m 15s	remaining: 1h 53m 37s
3096:	learn: 0.8263372	test: 0.7935064	best: 0.7935064 (3096)	total: 5m 15s	remaining: 1h 53m 37s
3097:	learn: 0.8263188	test: 0.7934833	best: 0.7934833 (3097)	total: 5m 15s	remaining: 1h 53m 38s
3098:	learn: 0.8262976	test: 0.7934781	best: 0.7934781 (3098)	total: 5m 15s	remaining: 1h 53m 38s
3099:	learn: 0.8262795	test: 0.7934677	best: 0.7934677 (3099)	total: 5m 15s	remaining: 1h 53m 38s
3100:	learn: 0.8262616	test: 0.7934569	best: 0.7934569 (3100)	total: 5m 16s	remaining: 1h 53m 38s
3101:	learn: 0.8262497	test: 0.7934573	best: 0.7934569 (3100)	total: 5m 16s	remaining: 1h 53m 38s
3102:	learn: 0.8262344	test: 0.7934483	best: 0.7934483 (3102)	total: 5m 16s	remaining: 1h 53m 38s
3103:	learn: 0.8262126	test: 0.7934431	best: 0.7934431 (3103)	total: 5m 16s	remaining: 1h 53m 39s
3104:	learn: 0.8261919	test: 0.7934377	best: 0.7934377 (3104)	total: 5m 16s	remaining: 1h 53m 39s
3105:	learn: 0.82617

3180:	learn: 0.8248575	test: 0.7930226	best: 0.7930226 (3180)	total: 5m 25s	remaining: 1h 53m 50s
3181:	learn: 0.8248339	test: 0.7930144	best: 0.7930144 (3181)	total: 5m 25s	remaining: 1h 53m 50s
3182:	learn: 0.8248158	test: 0.7930127	best: 0.7930127 (3182)	total: 5m 25s	remaining: 1h 53m 50s
3183:	learn: 0.8247930	test: 0.7930045	best: 0.7930045 (3183)	total: 5m 25s	remaining: 1h 53m 50s
3184:	learn: 0.8247721	test: 0.7929992	best: 0.7929992 (3184)	total: 5m 25s	remaining: 1h 53m 50s
3185:	learn: 0.8247481	test: 0.7929847	best: 0.7929847 (3185)	total: 5m 25s	remaining: 1h 53m 50s
3186:	learn: 0.8247301	test: 0.7929828	best: 0.7929828 (3186)	total: 5m 25s	remaining: 1h 53m 50s
3187:	learn: 0.8247139	test: 0.7929789	best: 0.7929789 (3187)	total: 5m 25s	remaining: 1h 53m 50s
3188:	learn: 0.8246920	test: 0.7929665	best: 0.7929665 (3188)	total: 5m 26s	remaining: 1h 53m 50s
3189:	learn: 0.8246753	test: 0.7929559	best: 0.7929559 (3189)	total: 5m 26s	remaining: 1h 53m 50s
3190:	learn: 0.82466

3266:	learn: 0.8232506	test: 0.7924119	best: 0.7924119 (3266)	total: 5m 35s	remaining: 1h 54m 3s
3267:	learn: 0.8232328	test: 0.7924034	best: 0.7924034 (3267)	total: 5m 35s	remaining: 1h 54m 3s
3268:	learn: 0.8232135	test: 0.7923977	best: 0.7923977 (3268)	total: 5m 35s	remaining: 1h 54m 4s
3269:	learn: 0.8231952	test: 0.7923883	best: 0.7923883 (3269)	total: 5m 35s	remaining: 1h 54m 4s
3270:	learn: 0.8231794	test: 0.7923886	best: 0.7923883 (3269)	total: 5m 35s	remaining: 1h 54m 4s
3271:	learn: 0.8231626	test: 0.7923670	best: 0.7923670 (3271)	total: 5m 35s	remaining: 1h 54m 4s
3272:	learn: 0.8231418	test: 0.7923592	best: 0.7923592 (3272)	total: 5m 35s	remaining: 1h 54m 4s
3273:	learn: 0.8231285	test: 0.7923563	best: 0.7923563 (3273)	total: 5m 35s	remaining: 1h 54m 4s
3274:	learn: 0.8231058	test: 0.7923417	best: 0.7923417 (3274)	total: 5m 35s	remaining: 1h 54m 5s
3275:	learn: 0.8230894	test: 0.7923276	best: 0.7923276 (3275)	total: 5m 36s	remaining: 1h 54m 5s
3276:	learn: 0.8230732	test: 0

3351:	learn: 0.8217977	test: 0.7918641	best: 0.7918641 (3351)	total: 5m 44s	remaining: 1h 54m 12s
3352:	learn: 0.8217813	test: 0.7918508	best: 0.7918508 (3352)	total: 5m 44s	remaining: 1h 54m 12s
3353:	learn: 0.8217701	test: 0.7918527	best: 0.7918508 (3352)	total: 5m 44s	remaining: 1h 54m 12s
3354:	learn: 0.8217543	test: 0.7918437	best: 0.7918437 (3354)	total: 5m 44s	remaining: 1h 54m 12s
3355:	learn: 0.8217367	test: 0.7918335	best: 0.7918335 (3355)	total: 5m 45s	remaining: 1h 54m 12s
3356:	learn: 0.8217197	test: 0.7918307	best: 0.7918307 (3356)	total: 5m 45s	remaining: 1h 54m 12s
3357:	learn: 0.8216988	test: 0.7918247	best: 0.7918247 (3357)	total: 5m 45s	remaining: 1h 54m 12s
3358:	learn: 0.8216821	test: 0.7918197	best: 0.7918197 (3358)	total: 5m 45s	remaining: 1h 54m 13s
3359:	learn: 0.8216651	test: 0.7918182	best: 0.7918182 (3359)	total: 5m 45s	remaining: 1h 54m 12s
3360:	learn: 0.8216476	test: 0.7918088	best: 0.7918088 (3360)	total: 5m 45s	remaining: 1h 54m 13s
3361:	learn: 0.82163

3435:	learn: 0.8204314	test: 0.7913296	best: 0.7913289 (3434)	total: 5m 54s	remaining: 1h 54m 25s
3436:	learn: 0.8204157	test: 0.7913164	best: 0.7913164 (3436)	total: 5m 54s	remaining: 1h 54m 25s
3437:	learn: 0.8203990	test: 0.7913063	best: 0.7913063 (3437)	total: 5m 54s	remaining: 1h 54m 26s
3438:	learn: 0.8203839	test: 0.7913035	best: 0.7913035 (3438)	total: 5m 54s	remaining: 1h 54m 26s
3439:	learn: 0.8203689	test: 0.7912983	best: 0.7912983 (3439)	total: 5m 54s	remaining: 1h 54m 26s
3440:	learn: 0.8203502	test: 0.7912981	best: 0.7912981 (3440)	total: 5m 54s	remaining: 1h 54m 26s
3441:	learn: 0.8203350	test: 0.7912769	best: 0.7912769 (3441)	total: 5m 55s	remaining: 1h 54m 26s
3442:	learn: 0.8203167	test: 0.7912776	best: 0.7912769 (3441)	total: 5m 55s	remaining: 1h 54m 26s
3443:	learn: 0.8203019	test: 0.7912715	best: 0.7912715 (3443)	total: 5m 55s	remaining: 1h 54m 26s
3444:	learn: 0.8202818	test: 0.7912648	best: 0.7912648 (3444)	total: 5m 55s	remaining: 1h 54m 26s
3445:	learn: 0.82026

3521:	learn: 0.8190818	test: 0.7906220	best: 0.7906220 (3521)	total: 6m 4s	remaining: 1h 54m 38s
3522:	learn: 0.8190641	test: 0.7906180	best: 0.7906180 (3522)	total: 6m 4s	remaining: 1h 54m 38s
3523:	learn: 0.8190469	test: 0.7906080	best: 0.7906080 (3523)	total: 6m 4s	remaining: 1h 54m 38s
3524:	learn: 0.8190278	test: 0.7906051	best: 0.7906051 (3524)	total: 6m 4s	remaining: 1h 54m 39s
3525:	learn: 0.8190127	test: 0.7905846	best: 0.7905846 (3525)	total: 6m 4s	remaining: 1h 54m 39s
3526:	learn: 0.8189984	test: 0.7905783	best: 0.7905783 (3526)	total: 6m 5s	remaining: 1h 54m 39s
3527:	learn: 0.8189835	test: 0.7905539	best: 0.7905539 (3527)	total: 6m 5s	remaining: 1h 54m 39s
3528:	learn: 0.8189736	test: 0.7905610	best: 0.7905539 (3527)	total: 6m 5s	remaining: 1h 54m 39s
3529:	learn: 0.8189571	test: 0.7905491	best: 0.7905491 (3529)	total: 6m 5s	remaining: 1h 54m 40s
3530:	learn: 0.8189392	test: 0.7905415	best: 0.7905415 (3530)	total: 6m 5s	remaining: 1h 54m 40s
3531:	learn: 0.8189282	test: 0

3607:	learn: 0.8178189	test: 0.7900438	best: 0.7900438 (3607)	total: 6m 14s	remaining: 1h 54m 51s
3608:	learn: 0.8178058	test: 0.7900393	best: 0.7900393 (3608)	total: 6m 14s	remaining: 1h 54m 51s
3609:	learn: 0.8177952	test: 0.7900321	best: 0.7900321 (3609)	total: 6m 14s	remaining: 1h 54m 51s
3610:	learn: 0.8177820	test: 0.7900266	best: 0.7900266 (3610)	total: 6m 14s	remaining: 1h 54m 51s
3611:	learn: 0.8177667	test: 0.7900204	best: 0.7900204 (3611)	total: 6m 14s	remaining: 1h 54m 51s
3612:	learn: 0.8177500	test: 0.7900139	best: 0.7900139 (3612)	total: 6m 15s	remaining: 1h 54m 51s
3613:	learn: 0.8177396	test: 0.7900069	best: 0.7900069 (3613)	total: 6m 15s	remaining: 1h 54m 51s
3614:	learn: 0.8177289	test: 0.7900039	best: 0.7900039 (3614)	total: 6m 15s	remaining: 1h 54m 51s
3615:	learn: 0.8177147	test: 0.7899992	best: 0.7899992 (3615)	total: 6m 15s	remaining: 1h 54m 51s
3616:	learn: 0.8176969	test: 0.7899990	best: 0.7899990 (3616)	total: 6m 15s	remaining: 1h 54m 51s
3617:	learn: 0.81768

3692:	learn: 0.8165973	test: 0.7896406	best: 0.7896342 (3691)	total: 6m 24s	remaining: 1h 54m 59s
3693:	learn: 0.8165834	test: 0.7896301	best: 0.7896301 (3693)	total: 6m 24s	remaining: 1h 54m 59s
3694:	learn: 0.8165693	test: 0.7896184	best: 0.7896184 (3694)	total: 6m 24s	remaining: 1h 55m
3695:	learn: 0.8165592	test: 0.7896126	best: 0.7896126 (3695)	total: 6m 24s	remaining: 1h 54m 59s
3696:	learn: 0.8165454	test: 0.7896050	best: 0.7896050 (3696)	total: 6m 24s	remaining: 1h 55m
3697:	learn: 0.8165294	test: 0.7896002	best: 0.7896002 (3697)	total: 6m 24s	remaining: 1h 55m
3698:	learn: 0.8165139	test: 0.7895955	best: 0.7895955 (3698)	total: 6m 24s	remaining: 1h 55m
3699:	learn: 0.8165015	test: 0.7895912	best: 0.7895912 (3699)	total: 6m 25s	remaining: 1h 55m
3700:	learn: 0.8164942	test: 0.7895889	best: 0.7895889 (3700)	total: 6m 25s	remaining: 1h 55m
3701:	learn: 0.8164795	test: 0.7895837	best: 0.7895837 (3701)	total: 6m 25s	remaining: 1h 55m
3702:	learn: 0.8164665	test: 0.7895843	best: 0.7

3779:	learn: 0.8154258	test: 0.7891688	best: 0.7891682 (3778)	total: 6m 34s	remaining: 1h 55m 7s
3780:	learn: 0.8154102	test: 0.7891682	best: 0.7891682 (3780)	total: 6m 34s	remaining: 1h 55m 7s
3781:	learn: 0.8153970	test: 0.7891614	best: 0.7891614 (3781)	total: 6m 34s	remaining: 1h 55m 7s
3782:	learn: 0.8153851	test: 0.7891598	best: 0.7891598 (3782)	total: 6m 34s	remaining: 1h 55m 7s
3783:	learn: 0.8153680	test: 0.7891596	best: 0.7891596 (3783)	total: 6m 34s	remaining: 1h 55m 7s
3784:	learn: 0.8153561	test: 0.7891554	best: 0.7891554 (3784)	total: 6m 34s	remaining: 1h 55m 7s
3785:	learn: 0.8153456	test: 0.7891487	best: 0.7891487 (3785)	total: 6m 34s	remaining: 1h 55m 7s
3786:	learn: 0.8153370	test: 0.7891471	best: 0.7891471 (3786)	total: 6m 35s	remaining: 1h 55m 7s
3787:	learn: 0.8153288	test: 0.7891448	best: 0.7891448 (3787)	total: 6m 35s	remaining: 1h 55m 7s
3788:	learn: 0.8153133	test: 0.7891428	best: 0.7891428 (3788)	total: 6m 35s	remaining: 1h 55m 7s
3789:	learn: 0.8153040	test: 0

3864:	learn: 0.8143293	test: 0.7888177	best: 0.7888177 (3864)	total: 6m 43s	remaining: 1h 55m 12s
3865:	learn: 0.8143190	test: 0.7888112	best: 0.7888112 (3865)	total: 6m 44s	remaining: 1h 55m 12s
3866:	learn: 0.8143041	test: 0.7888073	best: 0.7888073 (3866)	total: 6m 44s	remaining: 1h 55m 12s
3867:	learn: 0.8142958	test: 0.7888043	best: 0.7888043 (3867)	total: 6m 44s	remaining: 1h 55m 12s
3868:	learn: 0.8142846	test: 0.7888005	best: 0.7888005 (3868)	total: 6m 44s	remaining: 1h 55m 12s
3869:	learn: 0.8142729	test: 0.7887981	best: 0.7887981 (3869)	total: 6m 44s	remaining: 1h 55m 12s
3870:	learn: 0.8142640	test: 0.7887967	best: 0.7887967 (3870)	total: 6m 44s	remaining: 1h 55m 12s
3871:	learn: 0.8142479	test: 0.7887963	best: 0.7887963 (3871)	total: 6m 44s	remaining: 1h 55m 12s
3872:	learn: 0.8142329	test: 0.7887968	best: 0.7887963 (3871)	total: 6m 44s	remaining: 1h 55m 12s
3873:	learn: 0.8142227	test: 0.7887931	best: 0.7887931 (3873)	total: 6m 45s	remaining: 1h 55m 13s
3874:	learn: 0.81421

3948:	learn: 0.8133789	test: 0.7885403	best: 0.7885403 (3948)	total: 6m 53s	remaining: 1h 55m 15s
3949:	learn: 0.8133685	test: 0.7885366	best: 0.7885366 (3949)	total: 6m 53s	remaining: 1h 55m 16s
3950:	learn: 0.8133586	test: 0.7885330	best: 0.7885330 (3950)	total: 6m 53s	remaining: 1h 55m 16s
3951:	learn: 0.8133480	test: 0.7885267	best: 0.7885267 (3951)	total: 6m 53s	remaining: 1h 55m 16s
3952:	learn: 0.8133373	test: 0.7885243	best: 0.7885243 (3952)	total: 6m 53s	remaining: 1h 55m 16s
3953:	learn: 0.8133202	test: 0.7885255	best: 0.7885243 (3952)	total: 6m 54s	remaining: 1h 55m 16s
3954:	learn: 0.8133103	test: 0.7885227	best: 0.7885227 (3954)	total: 6m 54s	remaining: 1h 55m 16s
3955:	learn: 0.8132990	test: 0.7885217	best: 0.7885217 (3955)	total: 6m 54s	remaining: 1h 55m 16s
3956:	learn: 0.8132852	test: 0.7885163	best: 0.7885163 (3956)	total: 6m 54s	remaining: 1h 55m 16s
3957:	learn: 0.8132789	test: 0.7885139	best: 0.7885139 (3957)	total: 6m 54s	remaining: 1h 55m 16s
3958:	learn: 0.81326

4034:	learn: 0.8123865	test: 0.7882447	best: 0.7882437 (4033)	total: 7m 3s	remaining: 1h 55m 20s
4035:	learn: 0.8123804	test: 0.7882424	best: 0.7882424 (4035)	total: 7m 3s	remaining: 1h 55m 20s
4036:	learn: 0.8123667	test: 0.7882371	best: 0.7882371 (4036)	total: 7m 3s	remaining: 1h 55m 20s
4037:	learn: 0.8123577	test: 0.7882355	best: 0.7882355 (4037)	total: 7m 3s	remaining: 1h 55m 20s
4038:	learn: 0.8123478	test: 0.7882353	best: 0.7882353 (4038)	total: 7m 3s	remaining: 1h 55m 20s
4039:	learn: 0.8123379	test: 0.7882337	best: 0.7882337 (4039)	total: 7m 3s	remaining: 1h 55m 20s
4040:	learn: 0.8123221	test: 0.7882333	best: 0.7882333 (4040)	total: 7m 3s	remaining: 1h 55m 20s
4041:	learn: 0.8123086	test: 0.7882282	best: 0.7882282 (4041)	total: 7m 4s	remaining: 1h 55m 20s
4042:	learn: 0.8122950	test: 0.7882230	best: 0.7882230 (4042)	total: 7m 4s	remaining: 1h 55m 20s
4043:	learn: 0.8122798	test: 0.7882214	best: 0.7882214 (4043)	total: 7m 4s	remaining: 1h 55m 21s
4044:	learn: 0.8122715	test: 0

4119:	learn: 0.8114566	test: 0.7878942	best: 0.7878942 (4119)	total: 7m 13s	remaining: 1h 55m 24s
4120:	learn: 0.8114457	test: 0.7878933	best: 0.7878933 (4120)	total: 7m 13s	remaining: 1h 55m 24s
4121:	learn: 0.8114360	test: 0.7878913	best: 0.7878913 (4121)	total: 7m 13s	remaining: 1h 55m 24s
4122:	learn: 0.8114282	test: 0.7878876	best: 0.7878876 (4122)	total: 7m 13s	remaining: 1h 55m 23s
4123:	learn: 0.8114213	test: 0.7878841	best: 0.7878841 (4123)	total: 7m 13s	remaining: 1h 55m 23s
4124:	learn: 0.8114127	test: 0.7878841	best: 0.7878841 (4123)	total: 7m 13s	remaining: 1h 55m 23s
4125:	learn: 0.8114027	test: 0.7878840	best: 0.7878840 (4125)	total: 7m 13s	remaining: 1h 55m 23s
4126:	learn: 0.8113981	test: 0.7878820	best: 0.7878820 (4126)	total: 7m 13s	remaining: 1h 55m 23s
4127:	learn: 0.8113893	test: 0.7878804	best: 0.7878804 (4127)	total: 7m 13s	remaining: 1h 55m 23s
4128:	learn: 0.8113836	test: 0.7878817	best: 0.7878804 (4127)	total: 7m 14s	remaining: 1h 55m 23s
4129:	learn: 0.81137

4204:	learn: 0.8106183	test: 0.7876890	best: 0.7876890 (4204)	total: 7m 22s	remaining: 1h 55m 23s
4205:	learn: 0.8106046	test: 0.7876875	best: 0.7876875 (4205)	total: 7m 22s	remaining: 1h 55m 23s
4206:	learn: 0.8106006	test: 0.7876853	best: 0.7876853 (4206)	total: 7m 22s	remaining: 1h 55m 23s
4207:	learn: 0.8105964	test: 0.7876901	best: 0.7876853 (4206)	total: 7m 22s	remaining: 1h 55m 23s
4208:	learn: 0.8105875	test: 0.7876804	best: 0.7876804 (4208)	total: 7m 22s	remaining: 1h 55m 23s
4209:	learn: 0.8105789	test: 0.7876789	best: 0.7876789 (4209)	total: 7m 23s	remaining: 1h 55m 23s
4210:	learn: 0.8105641	test: 0.7876737	best: 0.7876737 (4210)	total: 7m 23s	remaining: 1h 55m 23s
4211:	learn: 0.8105589	test: 0.7876712	best: 0.7876712 (4211)	total: 7m 23s	remaining: 1h 55m 23s
4212:	learn: 0.8105508	test: 0.7876665	best: 0.7876665 (4212)	total: 7m 23s	remaining: 1h 55m 23s
4213:	learn: 0.8105405	test: 0.7876556	best: 0.7876556 (4213)	total: 7m 23s	remaining: 1h 55m 23s
4214:	learn: 0.81052

4289:	learn: 0.8097928	test: 0.7874871	best: 0.7874871 (4289)	total: 7m 31s	remaining: 1h 55m 21s
4290:	learn: 0.8097804	test: 0.7874877	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4291:	learn: 0.8097687	test: 0.7874900	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4292:	learn: 0.8097611	test: 0.7874976	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4293:	learn: 0.8097479	test: 0.7874959	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4294:	learn: 0.8097375	test: 0.7874955	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4295:	learn: 0.8097284	test: 0.7874940	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4296:	learn: 0.8097136	test: 0.7874955	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4297:	learn: 0.8097050	test: 0.7874915	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4298:	learn: 0.8096900	test: 0.7874928	best: 0.7874871 (4289)	total: 7m 32s	remaining: 1h 55m 22s
4299:	learn: 0.80968

4373:	learn: 0.8089636	test: 0.7873161	best: 0.7873093 (4368)	total: 7m 41s	remaining: 1h 55m 22s
4374:	learn: 0.8089547	test: 0.7873148	best: 0.7873093 (4368)	total: 7m 41s	remaining: 1h 55m 22s
4375:	learn: 0.8089421	test: 0.7872971	best: 0.7872971 (4375)	total: 7m 41s	remaining: 1h 55m 22s
4376:	learn: 0.8089319	test: 0.7872958	best: 0.7872958 (4376)	total: 7m 41s	remaining: 1h 55m 22s
4377:	learn: 0.8089241	test: 0.7872894	best: 0.7872894 (4377)	total: 7m 41s	remaining: 1h 55m 22s
4378:	learn: 0.8089157	test: 0.7872802	best: 0.7872802 (4378)	total: 7m 41s	remaining: 1h 55m 22s
4379:	learn: 0.8089072	test: 0.7872757	best: 0.7872757 (4379)	total: 7m 42s	remaining: 1h 55m 22s
4380:	learn: 0.8088978	test: 0.7872700	best: 0.7872700 (4380)	total: 7m 42s	remaining: 1h 55m 22s
4381:	learn: 0.8088865	test: 0.7872657	best: 0.7872657 (4381)	total: 7m 42s	remaining: 1h 55m 22s
4382:	learn: 0.8088781	test: 0.7872649	best: 0.7872649 (4382)	total: 7m 42s	remaining: 1h 55m 22s
4383:	learn: 0.80886

4458:	learn: 0.8081096	test: 0.7871268	best: 0.7871260 (4457)	total: 7m 50s	remaining: 1h 55m 22s
4459:	learn: 0.8081005	test: 0.7871285	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4460:	learn: 0.8080873	test: 0.7871350	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4461:	learn: 0.8080793	test: 0.7871283	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4462:	learn: 0.8080651	test: 0.7871300	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4463:	learn: 0.8080535	test: 0.7871262	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4464:	learn: 0.8080424	test: 0.7871267	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 23s
4465:	learn: 0.8080356	test: 0.7871261	best: 0.7871260 (4457)	total: 7m 51s	remaining: 1h 55m 22s
4466:	learn: 0.8080289	test: 0.7871247	best: 0.7871247 (4466)	total: 7m 51s	remaining: 1h 55m 22s
4467:	learn: 0.8080252	test: 0.7871234	best: 0.7871234 (4467)	total: 7m 51s	remaining: 1h 55m 22s
4468:	learn: 0.80801

4543:	learn: 0.8072185	test: 0.7869290	best: 0.7869290 (4543)	total: 8m	remaining: 1h 55m 21s
4544:	learn: 0.8072118	test: 0.7869294	best: 0.7869290 (4543)	total: 8m	remaining: 1h 55m 20s
4545:	learn: 0.8071994	test: 0.7869280	best: 0.7869280 (4545)	total: 8m	remaining: 1h 55m 21s
4546:	learn: 0.8071930	test: 0.7869281	best: 0.7869280 (4545)	total: 8m	remaining: 1h 55m 20s
4547:	learn: 0.8071893	test: 0.7869271	best: 0.7869271 (4547)	total: 8m	remaining: 1h 55m 20s
4548:	learn: 0.8071812	test: 0.7869247	best: 0.7869247 (4548)	total: 8m	remaining: 1h 55m 20s
4549:	learn: 0.8071724	test: 0.7869263	best: 0.7869247 (4548)	total: 8m 1s	remaining: 1h 55m 20s
4550:	learn: 0.8071613	test: 0.7869274	best: 0.7869247 (4548)	total: 8m 1s	remaining: 1h 55m 20s
4551:	learn: 0.8071465	test: 0.7869211	best: 0.7869211 (4551)	total: 8m 1s	remaining: 1h 55m 20s
4552:	learn: 0.8071339	test: 0.7869192	best: 0.7869192 (4552)	total: 8m 1s	remaining: 1h 55m 20s
4553:	learn: 0.8071198	test: 0.7869205	best: 0.7

4629:	learn: 0.8063421	test: 0.7868119	best: 0.7868083 (4625)	total: 8m 10s	remaining: 1h 55m 18s
4630:	learn: 0.8063355	test: 0.7868112	best: 0.7868083 (4625)	total: 8m 10s	remaining: 1h 55m 18s
4631:	learn: 0.8063293	test: 0.7868109	best: 0.7868083 (4625)	total: 8m 10s	remaining: 1h 55m 18s
4632:	learn: 0.8063215	test: 0.7868056	best: 0.7868056 (4632)	total: 8m 10s	remaining: 1h 55m 18s
4633:	learn: 0.8063066	test: 0.7867993	best: 0.7867993 (4633)	total: 8m 10s	remaining: 1h 55m 17s
4634:	learn: 0.8062944	test: 0.7867992	best: 0.7867992 (4634)	total: 8m 10s	remaining: 1h 55m 18s
4635:	learn: 0.8062864	test: 0.7867980	best: 0.7867980 (4635)	total: 8m 10s	remaining: 1h 55m 18s
4636:	learn: 0.8062792	test: 0.7867986	best: 0.7867980 (4635)	total: 8m 10s	remaining: 1h 55m 17s
4637:	learn: 0.8062658	test: 0.7867922	best: 0.7867922 (4637)	total: 8m 10s	remaining: 1h 55m 17s
4638:	learn: 0.8062575	test: 0.7867915	best: 0.7867915 (4638)	total: 8m 10s	remaining: 1h 55m 17s
4639:	learn: 0.80624

4713:	learn: 0.8054593	test: 0.7866149	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 15s
4714:	learn: 0.8054459	test: 0.7866145	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 15s
4715:	learn: 0.8054335	test: 0.7866205	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 15s
4716:	learn: 0.8054234	test: 0.7866274	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 14s
4717:	learn: 0.8054154	test: 0.7866266	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 14s
4718:	learn: 0.8054095	test: 0.7866267	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 14s
4719:	learn: 0.8053962	test: 0.7866208	best: 0.7866081 (4712)	total: 8m 19s	remaining: 1h 55m 14s
4720:	learn: 0.8053885	test: 0.7866198	best: 0.7866081 (4712)	total: 8m 20s	remaining: 1h 55m 14s
4721:	learn: 0.8053763	test: 0.7866207	best: 0.7866081 (4712)	total: 8m 20s	remaining: 1h 55m 14s
4722:	learn: 0.8053677	test: 0.7866013	best: 0.7866013 (4722)	total: 8m 20s	remaining: 1h 55m 14s
4723:	learn: 0.80536

4798:	learn: 0.8045903	test: 0.7864099	best: 0.7864015 (4794)	total: 8m 28s	remaining: 1h 55m 12s
4799:	learn: 0.8045782	test: 0.7864164	best: 0.7864015 (4794)	total: 8m 28s	remaining: 1h 55m 12s
4800:	learn: 0.8045685	test: 0.7864165	best: 0.7864015 (4794)	total: 8m 29s	remaining: 1h 55m 12s
4801:	learn: 0.8045523	test: 0.7864023	best: 0.7864015 (4794)	total: 8m 29s	remaining: 1h 55m 12s
4802:	learn: 0.8045437	test: 0.7864033	best: 0.7864015 (4794)	total: 8m 29s	remaining: 1h 55m 12s
4803:	learn: 0.8045336	test: 0.7864105	best: 0.7864015 (4794)	total: 8m 29s	remaining: 1h 55m 12s
4804:	learn: 0.8045234	test: 0.7864018	best: 0.7864015 (4794)	total: 8m 29s	remaining: 1h 55m 12s
4805:	learn: 0.8045176	test: 0.7864013	best: 0.7864013 (4805)	total: 8m 29s	remaining: 1h 55m 12s
4806:	learn: 0.8045045	test: 0.7864033	best: 0.7864013 (4805)	total: 8m 29s	remaining: 1h 55m 12s
4807:	learn: 0.8044904	test: 0.7864045	best: 0.7864013 (4805)	total: 8m 29s	remaining: 1h 55m 12s
4808:	learn: 0.80447

4883:	learn: 0.8036900	test: 0.7863210	best: 0.7863184 (4881)	total: 8m 38s	remaining: 1h 55m 9s
4884:	learn: 0.8036765	test: 0.7863155	best: 0.7863155 (4884)	total: 8m 38s	remaining: 1h 55m 9s
4885:	learn: 0.8036668	test: 0.7863119	best: 0.7863119 (4885)	total: 8m 38s	remaining: 1h 55m 9s
4886:	learn: 0.8036554	test: 0.7863118	best: 0.7863118 (4886)	total: 8m 38s	remaining: 1h 55m 10s
4887:	learn: 0.8036477	test: 0.7863024	best: 0.7863024 (4887)	total: 8m 38s	remaining: 1h 55m 10s
4888:	learn: 0.8036354	test: 0.7863102	best: 0.7863024 (4887)	total: 8m 38s	remaining: 1h 55m 10s
4889:	learn: 0.8036233	test: 0.7863178	best: 0.7863024 (4887)	total: 8m 38s	remaining: 1h 55m 10s
4890:	learn: 0.8036163	test: 0.7863135	best: 0.7863024 (4887)	total: 8m 39s	remaining: 1h 55m 10s
4891:	learn: 0.8036066	test: 0.7863207	best: 0.7863024 (4887)	total: 8m 39s	remaining: 1h 55m 9s
4892:	learn: 0.8035971	test: 0.7863279	best: 0.7863024 (4887)	total: 8m 39s	remaining: 1h 55m 9s
4893:	learn: 0.8035897	te

4968:	learn: 0.8028775	test: 0.7862440	best: 0.7862440 (4968)	total: 8m 47s	remaining: 1h 55m 6s
4969:	learn: 0.8028621	test: 0.7862417	best: 0.7862417 (4969)	total: 8m 47s	remaining: 1h 55m 6s
4970:	learn: 0.8028514	test: 0.7862383	best: 0.7862383 (4970)	total: 8m 47s	remaining: 1h 55m 6s
4971:	learn: 0.8028387	test: 0.7862427	best: 0.7862383 (4970)	total: 8m 48s	remaining: 1h 55m 6s
4972:	learn: 0.8028292	test: 0.7862391	best: 0.7862383 (4970)	total: 8m 48s	remaining: 1h 55m 6s
4973:	learn: 0.8028217	test: 0.7862381	best: 0.7862381 (4973)	total: 8m 48s	remaining: 1h 55m 6s
4974:	learn: 0.8028089	test: 0.7862374	best: 0.7862374 (4974)	total: 8m 48s	remaining: 1h 55m 6s
4975:	learn: 0.8027971	test: 0.7862436	best: 0.7862374 (4974)	total: 8m 48s	remaining: 1h 55m 6s
4976:	learn: 0.8027889	test: 0.7862414	best: 0.7862374 (4974)	total: 8m 48s	remaining: 1h 55m 6s
4977:	learn: 0.8027789	test: 0.7862386	best: 0.7862374 (4974)	total: 8m 48s	remaining: 1h 55m 6s
4978:	learn: 0.8027727	test: 0

<catboost.core.CatBoostRegressor at 0x11993b0f9e8>

In [7]:
#training.to_pickle("pickled/training")
#training = pd.read_pickle("pickled/training")

#pickle.dump(cb_model, open( "pickled/cb_model", "wb"), protocol=4)

cb_model = pickle.load( open( "pickled/cb_model", "rb" ) )

In [10]:
np.array(cb_features)[np.argsort(cb_model.get_feature_importance())[::-1]]

array(['item_block_units_lag_1', 'shop_category_units',
       'item_share_of_total_units', 'shop_first_two_blocks_units',
       'shop_share_of_units', 'item_mean_price_block_lag_2',
       'item_first_two_blocks_units', 'shop_max_turnover_block',
       'item_number_of_consecutive_days_with_activity', 'item_units',
       'shop_category_turnover', 'shop_max_units_block',
       'item_last_two_blocks_units', 'item_days_of_activity',
       'item_mean_units_block', 'shop_block_units_lag_1',
       'category_mean_turnover_day', '12', 'item_first_day',
       'shop_min_turnover_block', 'shop_block_turnover_lag_1',
       'item_block_turnover_lag_1', 'item_first_block',
       'item_block_units_lag_2', 'shop_category_max_units_block',
       'subcategory_block_units_lag_1', '11',
       'item_mean_price_block_lag_1', 'area_mean_price_block_lag_1',
       'item_block_turnover_lag_2',
       'shop_fluctuation_units_first_last_blocks',
       'shop_category_max_turnover_block', 'shop_turnove

In [13]:
xg_features = np.array(cb_features)[np.argsort(cb_model.get_feature_importance())[::-1]][0:31]
xg_features

array(['item_block_units_lag_1', 'shop_category_units',
       'item_share_of_total_units', 'shop_first_two_blocks_units',
       'shop_share_of_units', 'item_mean_price_block_lag_2',
       'item_first_two_blocks_units', 'shop_max_turnover_block',
       'item_number_of_consecutive_days_with_activity', 'item_units',
       'shop_category_turnover', 'shop_max_units_block',
       'item_last_two_blocks_units', 'item_days_of_activity',
       'item_mean_units_block', 'shop_block_units_lag_1',
       'category_mean_turnover_day', '12', 'item_first_day',
       'shop_min_turnover_block', 'shop_block_turnover_lag_1',
       'item_block_turnover_lag_1', 'item_first_block',
       'item_block_units_lag_2', 'shop_category_max_units_block',
       'subcategory_block_units_lag_1', '11',
       'item_mean_price_block_lag_1', 'area_mean_price_block_lag_1',
       'item_block_turnover_lag_2',
       'shop_fluctuation_units_first_last_blocks'], dtype='<U47')

In [None]:
gc.collect()
params =   {
    'objective' : 'reg:linear',
    #'tree_method':'gpu_hist',
    #'gpu_id': 0,
    'learning_rate': 0.001, 
    #'gamma' : 0.3, 
    #'min_child_weight' : 3,
    #'nthread' : 16,
    #'max_depth' : 30,
    #'subsample' : 0.9, 
    #'colsample_bytree' : 0.8, 
    'seed':42, 
    'eval_metric' : "rmse",
    'num_boost_round' : 300,
    #'n_estimators':999,
    #'max_leaves': 300
}


tr_data = xgb.DMatrix(x_train[xg_features], y_train)
va_data = xgb.DMatrix(x_val[xg_features], y_val)
watchlist = [(tr_data, 'train'), (va_data, 'valid')]

xg_model = xgb.train(params, tr_data, 300, watchlist, maximize=False, early_stopping_rounds = 30, verbose_eval=True)

In [None]:
gc.collect()
lgtrain = lgbm.Dataset(x_train[cb_features], label=y_train)
lgval = lgbm.Dataset(x_val[cb_features], label=y_val)



#[0.00542047893814942, 29, 24, 0.39949465609514856, 1, 0.67943500, 10]
params = {
        "num_threads": 16,
        "verbosity": -1,
        #"zero_as_missing": "true",
        "boosting":'gbdt',
        "objective" : "regression",
        "metric" : "rmse",
        "seed": 42,
        #"max_bin": 10,#default 255
        #"num_leaves": 10, #default 31
        #"bagging_fraction": 0.7,
        #"bagging_freq": 1,
        #"min_data_in_leaf": 50000,
        #"feature_fraction": 0.7,
        #"lambda_l2": 3,
        #"max_depth": 2,
        #"min_gain_to_split": 10,
        "learning_rate" : 0.001,
        "histogram_pool_size": 1000,
        #"categorical_column": [0,1,2,3,4]
}

evals_result = {}
model_lgb = lgbm.train(params, lgtrain, 1000, 
                      valid_sets=[lgval], 
                      early_stopping_rounds=100, 
                      verbose_eval=100, 
                      evals_result=evals_result)



In [27]:
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
x_train[cb_features].sample(10)

Unnamed: 0,item_first_block,item_last_block,is_first_two_blocks,is_last_two_blocks,item_units,item_mean_units_block,item_mean_units_day,item_max_units_block,item_min_units_block,item_max_units_day,item_min_units_day,item_turnover,item_mean_turnover_block,item_mean_turnover_day,item_max_turnover_block,item_min_turnover_block,item_max_turnover_day,item_min_turnover_day,item_days_of_activity,item_blocks_of_activity,item_days_since_start,item_mean_day_between_activity,item_longest_stretch_days_without_activity,item_longest_stretch_blocks_without_activity,item_longest_stretch_block_with_activity,item_number_of_consecutive_days_with_activity,item_days_between_start_and_first_activity,item_blocks_between_start_and_first_activity,item_first_day,item_last_day,item_activity_on_all_blocks,item_mean_price,item_min_price,item_max_price,item_number_different_prices,item_price_amplitude,item_deviation_mean_category_price,item_first_two_blocks_units,item_last_two_blocks_units,item_fluctuation_units_first_last_blocks,item_first_two_blocks_mean_price,item_last_two_blocks_mean_price,item_fluctuation_price_first_last_blocks,item_share_of_total_units,item_share_of_total_turnover,category_units,category_mean_units_block,category_mean_units_day,category_max_units_block,category_min_units_block,category_max_units_day,category_min_units_day,category_turnover,category_mean_turnover_block,category_mean_turnover_day,category_max_turnover_block,category_min_turnover_block,category_max_turnover_day,category_min_turnover_day,category_mean_price,category_min_price,category_max_price,category_first_two_blocks_units,category_last_two_blocks_units,category_fluctuation_units_first_last_blocks,category_first_two_blocks_mean_price,category_last_two_blocks_mean_price,category_fluctuation_price_first_last_blocks,video_game,gaming_old_gen,gaming_new_gen,pc_games,payment_cards,movies,movies_niche,books,music,music_CD,music_vinyl,gifts,software,subcategory_units,subcategory_mean_units_block,subcategory_mean_units_day,subcategory_max_units_block,subcategory_min_units_block,subcategory_max_units_day,subcategory_min_units_day,subcategory_turnover,subcategory_mean_turnover_block,subcategory_mean_turnover_day,subcategory_max_turnover_block,subcategory_min_turnover_block,subcategory_max_turnover_day,subcategory_min_turnover_day,category_share_of_total_units,category_share_of_total_turnover,subcategory_share_of_total_units,subcategory_share_of_total_turnover,subcategory_first_two_blocks_units,subcategory_last_two_blocks_units,subcategory_fluctuation_units_first_last_blocks,subcategory_first_two_blocks_mean_price,subcategory_last_two_blocks_mean_price,subcategory_fluctuation_price_first_last_blocks,shop_units,shop_mean_units_block,shop_mean_units_day,shop_max_units_block,shop_min_units_block,shop_max_units_day,shop_min_units_day,shop_turnover,shop_mean_turnover_block,shop_mean_turnover_day,shop_max_turnover_block,shop_min_turnover_block,shop_max_turnover_day,shop_min_turnover_day,shop_mean_price,shop_first_two_blocks_units,shop_last_two_blocks_units,shop_fluctuation_units_first_last_blocks,shop_first_two_blocks_mean_price,shop_last_two_blocks_mean_price,shop_fluctuation_price_first_last_blocks,shop_share_of_units,shop_share_of_turnover,shop_TC,shop_TRK,shop_SEC,shop_shopping_center,shop_moscow,max_category_units,max_category_turnover,area_units,area_mean_units_block,area_mean_units_day,area_max_units_block,area_min_units_block,area_max_units_day,area_min_units_day,area_turnover,area_mean_turnover_block,area_mean_turnover_day,area_max_turnover_block,area_min_turnover_block,area_max_turnover_day,area_min_turnover_day,area_mean_price,area_first_two_blocks_units,area_last_two_blocks_units,area_fluctuation_units_first_last_blocks,area_first_two_blocks_mean_price,area_last_two_blocks_mean_price,area_fluctuation_price_first_last_blocks,shop_category_units,shop_category_mean_units_block,shop_category_mean_units_day,shop_category_max_units_block,shop_category_min_units_block,shop_category_max_units_day,shop_category_min_units_day,shop_category_turnover,shop_category_mean_turnover_block,shop_category_mean_turnover_day,shop_category_max_turnover_block,shop_category_min_turnover_block,shop_category_max_turnover_day,shop_category_min_turnover_day,shop_category_mean_price,item_block_units_lag_1,item_block_turnover_lag_1,item_mean_price_block_lag_1,item_block_units_lag_2,item_block_turnover_lag_2,item_mean_price_block_lag_2,item_block_units_lag_3,item_block_turnover_lag_3,item_mean_price_block_lag_3,category_block_units_lag_1,category_block_turnover_lag_1,category_mean_price_block_lag_1,subcategory_block_units_lag_1,subcategory_block_turnover_lag_1,subcategory_mean_price_block_lag_1,category_block_units_lag_2,category_block_turnover_lag_2,category_mean_price_block_lag_2,subcategory_block_units_lag_2,subcategory_block_turnover_lag_2,subcategory_mean_price_block_lag_2,category_block_units_lag_3,category_block_turnover_lag_3,category_mean_price_block_lag_3,subcategory_block_units_lag_3,subcategory_block_turnover_lag_3,subcategory_mean_price_block_lag_3,shop_block_units_lag_1,shop_block_turnover_lag_1,shop_mean_price_block_lag_1,area_block_units_lag_1,area_block_turnover_lag_1,area_mean_price_block_lag_1,shop_block_units_lag_2,shop_block_turnover_lag_2,shop_mean_price_block_lag_2,area_block_units_lag_2,area_block_turnover_lag_2,area_mean_price_block_lag_2,shop_block_units_lag_3,shop_block_turnover_lag_3,shop_mean_price_block_lag_3,area_block_units_lag_3,area_block_turnover_lag_3,area_mean_price_block_lag_3,1,2,3,4,5,6,7,8,9,10,11,12
3760773,14,28,False,False,30.0,3.206897,1.034483,6,1,2,1,3270,384.344818,112.758621,815,28,196,28,29,14,77,0.566138,83,2,10,4,428,14,62,490,False,109.379311,28,149,5,432.142853,-58.630501,6.0,2.0,-66.666664,149.0,28.0,-81.208054,0.001439,0.00015,303281,15002.385742,556.335815,22065,6779,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,264.395996,11,1399,96528.0,23415,-75.74279,317.183868,257.053314,-18.95764,False,False,False,False,False,True,False,False,False,False,False,False,False,303281,15002.385742,556.335815,22065,6779.0,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,14.542552,1.8334,14.542552,1.8334,96528,23415,-75.74279,317.183868,257.053314,-18.95764,38923,1875.521484,70.542175,2836,1243,227,10,43717169,2097699.5,78692.929688,4506068,1326990,335970,9110,1101.736816,9173,2919,-68.178352,1209.355957,1392.696045,15.160137,1.866387,0.035185,False,False,False,True,False,30,20,38923,0.0,70.542175,0.0,0.0,227,10,43717169,0.0,78692.93,0.0,0.0,335970,9110,1101.736816,9173,2919,-68.178352,1209.355957,1392.696045,15.160137,5212.0,278.192108,12.65541,448,98,49,1,1491537,78902.992188,3579.239258,131547,26723,14528,79,280.640045,3,447,149.0,3,447,149.0,6,815,135.833328,15005,3939418,254.357773,15005,3939418,254.357773,18320,5192105,268.40567,18320,5192105,268.40567,21266,5848777,262.434174,21266,5848777,262.434174,1704,1537879,804.28125,1704,1537879,804.28125,1671,1399028,813.183899,1671,1399028,813.183899,2144,1844511,839.920959,2144,1844511,839.920959,False,False,False,False,False,True,False,False,False,False,False,False
5701104,12,33,False,False,180.0,10.965117,1.366279,18,1,4,1,183206,11078.041016,1065.151123,18683,1099,4396,521,147,21,21,0.039745,48,2,16,52,636,21,1,637,False,1014.616272,521,1099,7,110.940498,18.459469,23.0,10.0,-56.52174,968.913025,1054.400024,8.822975,0.008631,0.008399,41748,2217.055908,95.289406,5064,1186,708,13,35328866,1880533.125,81327.390625,4358673,986724,729265,9036,856.509216,0,3799,10278.0,4278,-58.377117,837.763733,834.670776,-0.369192,False,False,False,False,False,False,False,False,False,False,False,True,False,383999,19259.488281,767.912781,41289,11648.0,4086,206,226562464,11668349.0,479620.96875,28598762,6677513,2885223,107343,2.001848,1.619619,18.41304,0.541606,77525,32212,-58.449532,893.948547,961.26062,7.529747,5872,2953.773438,828.4375,3261,2611,1150,-1,6599606,3310663.0,926186.375,3498386,3101220,1288510,-5999,1333.252808,1293,2761,-4954.988281,1130.224121,1491.505493,31.965464,0.281567,0.302553,False,False,False,False,True,61,61,716036,34716.898438,1265.770996,59487.0,19179.0,5428,411,713475134,34876700.0,1272760.0,78798072.0,17100444.0,6572848,350612,972.611328,168461,59211,-64.851807,1082.714233,1001.846069,-7.469019,1139.0,622.417969,170.277344,864,275,341,16,1228718,669239.0,182511.625,919778,308940,368959,20694,1070.40625,7,6647,949.571411,18,16944,941.333313,0,0,0.0,1281,993812,780.411316,17956,7863184,660.848328,1295,1005460,784.34021,19148,7932869,638.632446,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,False,False,True,False,False,False,False,False,False,False,False,False
6722975,16,24,False,False,65.0,29.698412,1.936508,42,1,4,-1,35142,16109.063477,557.809509,22877,549,2196,-549,43,7,153,0.137874,102,3,5,26,249,8,124,373,False,540.380981,368,549,2,49.184784,26.144161,54.0,1.0,-98.148148,538.944458,549.0,1.865787,0.003117,0.001611,116758,5696.091797,205.597595,7704,2989,496,51,51140895,2461388.25,89192.367188,3554760,1381295,221548,22736,428.383667,23,3599,27043.0,10380,-61.616684,535.168945,402.608398,-24.769852,False,False,False,False,False,True,False,False,False,False,False,False,False,145606,7092.105957,255.632874,10219,3993.0,636,66,73308105,3559088.0,128960.757812,5784690,1845127,341120,32170,5.598634,0.375519,6.981917,1.391754,32847,12851,-60.876183,622.182678,460.82605,-25.933966,31462,1513.709106,61.82373,2516,963,223,8,35640733,1726934.625,69330.1875,3778172,1066802,311681,6352,1132.035522,6909,2348,-66.015343,1292.466553,1396.882812,8.078844,1.508627,1.633916,True,False,False,False,False,30,20,31462,0.0,61.82373,0.0,0.0,223,8,35640733,0.0,69330.19,0.0,0.0,311681,6352,1132.035522,6909,2348,-66.015343,1292.466553,1396.882812,8.078844,1160.0,61.929688,5.012153,98,17,19,0,490551,25400.53125,1933.950562,34871,9177,8438,-51,417.572906,0,0,0.0,0,0,0.0,0,0,0.0,3288,1722002,515.592957,4214,2347853,548.12146,4670,2645291,531.98761,5819,3532453,579.634827,5098,2225711,422.952057,6720,3389050,479.322601,1187,1419472,1102.192627,1187,1419472,1102.192627,1110,1444088,1207.524048,1110,1444088,1207.524048,1187,1401401,1173.637695,1187,1401401,1173.637695,False,False,False,False,False,True,False,False,False,False,False,False
3690965,12,17,False,False,23.0,9.521739,1.434783,14,1,3,1,3303,1413.347778,143.608688,2086,76,447,76,19,6,6,0.551471,73,1,5,5,150,5,2,152,False,143.608688,76,149,3,96.052635,-45.684242,5.0,2.0,-60.0,149.0,87.0,-41.610737,0.001103,0.000151,303281,15002.385742,556.335815,22065,6779,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,264.395996,11,1399,96528.0,23415,-75.74279,317.183868,257.053314,-18.95764,False,False,False,False,False,True,False,False,False,False,False,False,False,303281,15002.385742,556.335815,22065,6779.0,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,14.542552,1.8334,14.542552,1.8334,96528,23415,-75.74279,317.183868,257.053314,-18.95764,35677,1700.149414,65.064308,2511,1061,197,11,35746224,1721317.5,66259.101562,3408786,978169,298310,5012,999.811462,7839,2660,-66.067101,1168.453491,986.214294,-15.596612,1.710739,1.638752,False,False,False,True,False,30,20,63336,0.0,112.622551,0.0,0.0,397,27,64265957,0.0,116927.4,0.0,0.0,617426,15672,995.653931,14026,4943,-64.758308,1171.715576,1000.935242,-14.575236,3634.0,178.891937,8.972284,312,93,32,-1,929395,45580.578125,2287.397461,80984,25976,8419,-349,254.015396,1,149,149.0,4,596,149.0,14,2086,149.0,21266,5848777,262.434174,21266,5848777,262.434174,18467,5024651,261.254333,18467,5024651,261.254333,22065,5787678,252.908356,22065,5787678,252.908356,2059,1879741,923.913513,3800,3368108,889.414368,1866,1807101,942.598267,3389,3373334,919.830322,2144,2249131,1054.058838,3769,3921487,1022.478027,False,False,False,True,False,False,False,False,False,False,False,False
8127155,33,33,False,True,2.0,2.0,1.0,2,2,1,1,1548,1548.0,774.0,1548,1548,899,649,2,1,609,3.0,6,0,0,0,6,0,609,615,False,774.0,649,899,2,38.520802,192.742706,0.0,2.0,0.0,0.0,774.0,0.0,9.6e-05,7.1e-05,303281,15002.385742,556.335815,22065,6779,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,264.395996,11,1399,96528.0,23415,-75.74279,317.183868,257.053314,-18.95764,False,False,False,False,False,True,False,False,False,False,False,False,False,303281,15002.385742,556.335815,22065,6779.0,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,14.542552,1.8334,14.542552,1.8334,96528,23415,-75.74279,317.183868,257.053314,-18.95764,91202,4260.20459,151.584,7256,3501,740,47,103664958,4934829.0,176722.015625,11189135,2957495,2301462,31040,1026.907715,19545,10025,-48.708111,1200.999756,1111.887695,-7.419827,4.373205,0.814448,True,False,False,False,False,40,20,115611,0.0,199.92717,0.0,0.0,740,52,138027658,0.0,247166.2,0.0,0.0,2301462,48581,1078.478638,26358,10279,-61.002354,1263.122803,1109.352661,-12.173812,12451.0,580.310608,22.790649,829,430,60,1,3615872,168347.671875,6540.329102,233046,125308,15808,58,282.296082,0,0,0.0,0,0,0.0,0,0,0.0,21266,5848777,262.434174,21266,5848777,262.434174,18467,5024651,261.254333,18467,5024651,261.254333,22065,5787678,252.908356,22065,5787678,252.908356,4210,4531937,917.51062,6288,7627257,1030.383667,3541,3860463,869.661255,5511,6840026,924.503113,3994,3162102,843.994507,5619,4808555,888.970032,False,False,False,True,False,False,False,False,False,False,False,False
2208109,21,33,False,False,110.0,20.605505,1.642202,34,1,4,1,32721,6127.64209,300.192657,10166,299,1196,299,82,13,245,0.09375,60,1,12,39,378,12,244,622,False,297.449554,190,299,3,57.36842,0.320058,63.0,5.0,-92.063492,297.241943,299.0,0.591459,0.005275,0.0015,193656,9346.445312,334.23877,13786,4913,967,100,57557430,2760072.25,98877.375,4159781,1554553,301825,30525,296.50058,22,3299,44983.0,13053,-70.982368,303.582916,310.186859,2.175334,False,False,False,False,False,False,False,True,True,True,False,False,False,198574,9588.948242,342.913208,14646,5225.0,1026,111,64181960,3080086.25,110357.804688,5454410,2042046,397437,41157,9.285951,0.669679,9.521772,0.973374,46302,13780,-70.238869,335.102661,357.344696,6.637381,35948,1765.162231,70.346764,2765,882,538,14,42652562,2099742.0,83518.375,4648931,1152143,521342,5444,1197.254883,8674,3132,-63.89209,1317.474365,1350.170044,2.48169,1.723734,1.955367,True,False,False,False,True,30,20,716036,0.0,1265.770996,0.0,0.0,5428,411,713475134,0.0,1272760.0,0.0,0.0,6572848,350612,972.611328,168461,59211,-64.851807,1082.714233,1001.846069,-7.469019,3328.0,159.417831,8.586707,244,103,27,1,1015541,48587.6875,2622.515137,73056,31426,8273,199,304.836548,10,2990,299.0,29,8562,295.241394,34,10166,299.0,9291,2726585,293.449493,9544,3123707,328.011322,13786,4159781,300.91391,14646,5454410,365.177704,9809,2929444,297.179535,10293,3629723,342.5271,1750,2401629,1409.24585,38014,40755083,1134.502686,2765,4648931,1647.920532,59487,78798074,1294.215576,2091,3128583,1528.060547,39963,49005476,1152.425171,False,True,False,False,False,False,False,False,False,False,False,False
5943582,26,31,False,False,354.0,105.227425,7.913043,162,3,22,1,139970,41730.777344,468.127075,64189,1068,8698,235,95,6,445,0.02998,32,1,5,57,149,5,427,576,False,394.866211,235,399,6,69.787231,49.346516,141.0,20.0,-85.815605,396.728668,384.473694,-3.089012,0.016975,0.006417,303281,15002.385742,556.335815,22065,6779,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,264.395996,11,1399,96528.0,23415,-75.74279,317.183868,257.053314,-18.95764,False,False,False,False,False,True,False,False,False,False,False,False,False,303281,15002.385742,556.335815,22065,6779.0,1472,128,82941753,4084407.25,152254.96875,5848777,1927394,391497,34996,14.542552,1.8334,14.542552,1.8334,96528,23415,-75.74279,317.183868,257.053314,-18.95764,40949,1963.041504,76.95507,3499,1227,262,11,46923921,2262117.5,87294.335938,4792668,1394783,400836,11621,1123.340576,9721,3388,-65.147621,1286.284302,1181.555054,-8.141995,1.963535,0.182196,False,False,False,False,False,55,20,47400,0.0,87.836449,0.0,0.0,308,14,55506391,0.0,103100.5,0.0,0.0,517282,12991,1153.808228,10558,4283,-59.433605,1347.492188,1265.44043,-6.089221,4508.0,229.507568,12.187195,347,103,38,1,1358280,68040.140625,3564.424805,97579,31758,11133,28,296.873108,31,12111,390.399994,40,15667,391.487183,101,40299,399.0,9304,2668836,279.140717,9304,2668836,279.140717,9208,2712062,286.50354,9208,2712062,286.50354,10683,3138552,281.62674,10683,3138552,281.62674,1227,1394783,1125.034302,1612,1901249,1172.807373,1537,2124630,1267.559814,1946,2776388,1313.939087,1570,1950895,1149.921753,1994,2501687,1153.427124,False,False,False,False,False,False,True,False,False,False,False,False
455586,0,0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11779,845.716797,58.436024,1828,36,1084,0,39118821,2615691.25,205950.328125,6655078,67872,3899423,-1954,2796.927734,46,11999,1941.0,544,-71.973213,3204.47583,3774.841797,17.799042,True,False,False,True,False,False,False,False,False,False,False,False,False,361094,17778.880859,646.900879,23170,8155.0,4503,133,278607495,13237918.0,479773.84375,24968266,5944002,8050903,97529,0.564812,1.793366,17.31473,0.958575,77354,24513,-68.310623,773.027954,759.93457,-1.693776,38872,1890.282227,73.997772,3318,1212,294,10,43105092,2114059.5,81840.515625,4680572,1267314,386489,9603,1094.160522,8745,2840,-67.5243,1249.937012,1195.948364,-4.319308,1.863942,0.007125,False,False,True,False,False,30,20,92762,1327.778687,165.286789,2127.0,954.0,617,42,103314526,1309045.0,183702.1,2642482.0,825369.0,1227026,35038,1064.623779,21464,7209,-66.413528,1205.663574,1173.176636,-2.694532,125.0,13.265306,1.561224,35,1,7,1,271966,30150.082031,3533.040771,78761,399,21393,399,2066.183594,0,0,0.0,0,0,0.0,0,0,0.0,611,1323053,2205.281982,21610,16778569,697.459717,1533,3457270,2171.534912,19428,17807521,693.24823,264,427004,1584.049438,18859,9094154,477.98941,1651,1897579,1155.056641,4206,4749966,1109.765015,1718,2127427,1181.810303,4472,5607732,1118.041992,2001,1767146,869.962463,4564,3924986,846.007446,False,False,False,False,False,False,False,False,False,False,True,False
2418906,0,0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,221327,10926.091797,402.012482,14883,4150,4286,72,136797559,6496837.5,239353.171875,17530861,2722889,7722472,47572,542.386414,0,2399,48673.0,11674,-76.01545,646.213196,608.40979,-5.849987,True,False,False,True,False,False,False,False,False,False,False,False,False,361094,17778.880859,646.900879,23170,8155.0,4503,133,278607495,13237918.0,479773.84375,24968266,5944002,8050903,97529,10.612796,0.364393,17.31473,0.958575,77354,24513,-68.310623,773.027954,759.93457,-1.693776,24629,1233.831421,49.618576,2055,680,165,5,25260678,1240105.625,49353.527344,2130967,619174,250972,1564,1011.962646,5983,1721,-71.235168,1110.404907,1262.365112,13.685107,1.180979,1.158052,False,False,False,True,False,30,20,46189,0.0,89.517883,0.0,0.0,285,12,47646316,0.0,91833.98,0.0,0.0,475689,5541,1042.349487,10091,3961,-60.7472,1169.053711,1254.86853,7.340535,4645.0,235.897461,10.916411,424,99,60,-1,2792941,135055.546875,5972.700684,274207,63669,106528,-2400,556.814453,0,0,0.0,0,0,0.0,0,0,0.0,11645,5380328,451.798523,19684,10945276,546.077148,11604,5674204,473.885529,21061,16372730,582.206604,10755,5943825,517.859558,18784,14312020,625.100037,1143,1111658,988.459412,2209,2157222,1028.656738,1277,1337972,1002.03125,2442,2399941,971.630615,1222,1066144,874.143921,2251,1938394,880.625732,False,False,False,False,False,False,True,False,False,False,False,False
674732,20,33,False,False,30.0,8.75,1.875,20,1,6,1,10470,3053.75,654.375,6980,349,2094,349,16,7,257,1.641667,115,5,2,5,394,13,245,639,False,349.0,349,349,1,0.0,-35.402809,3.0,2.0,-33.333332,349.0,349.0,-0.0,0.001439,0.00048,30207,1637.410034,56.983677,4824,506,1100,6,21888709,1174862.25,39938.835938,3609885,200173,1058260,995,540.271179,4,3599,4171.0,7403,77.487411,697.763977,696.479858,-0.184038,True,False,False,True,False,False,False,False,False,False,False,False,False,361094,17778.880859,646.900879,23170,8155.0,4503,133,278607495,13237918.0,479773.84375,24968266,5944002,8050903,97529,1.448448,1.003468,17.31473,0.958575,77354,24513,-68.310623,773.027954,759.93457,-1.693776,49562,2469.983887,96.063042,5081,1319,495,18,47036676,2399077.5,94000.03125,6674785,1250170,887317,6093,920.429199,11956,3253,-72.791901,984.343689,1067.300171,8.427596,2.376535,0.187365,True,False,False,False,False,40,23,130506,0.0,240.32988,0.0,0.0,1129,49,119258667,0.0,226286.2,0.0,0.0,1770871,30648,889.948792,33383,9239,-72.324234,969.262573,939.476379,-3.073079,0.0,0.0,0.0,0,0,0,0,0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0,0.0,0,0,0.0,1394,917512,557.452026,12754,8073499,612.478577,4824,2020075,483.838593,19318,12555760,673.721497,2101,1927900,539.246704,22729,19994788,802.234314,2360,2152019,884.499023,5900,5193391,863.94397,2519,2499132,957.61792,6662,6836495,980.664673,5081,6674785,1186.043701,11920,15821211,1246.025146,False,False,True,False,False,False,False,False,False,False,False,False


In [None]:
tes