In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
%matplotlib inline 
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import export_graphviz
import matplotlib.pyplot as plt

import lightgbm as lgbm
import gc
import xgboost as xgb
import pickle as pickle


from catboost import CatBoostRegressor
import dask.dataframe as dd
from sklearn.model_selection import KFold
from itertools import product

In [3]:
items           = pd.read_csv('items.csv',usecols=["item_id", "item_category_id"])
item_categories = pd.read_csv('item_categories.csv')
shops           = pd.read_csv('shops.csv')
sales_train     = pd.read_csv('sales_train.csv.gz')
test            = pd.read_csv('test.csv.gz')

In [4]:
sales_train[['day','month', 'year']] = sales_train['date'].str.split('.', expand=True).astype(int)
sales_train = sales_train[sales_train['year'] != 2013]
sales_train = sales_train.set_index('item_id').join(items.set_index('item_id'))
sales_train.reset_index(inplace=True)

In [323]:
# Якутск Орджоникидзе, 56
sales_train.loc[sales_train.shop_id == 0, 'shop_id'] = 57
test.loc[test.shop_id == 0, 'shop_id'] = 57
# Якутск ТЦ "Центральный"
sales_train.loc[sales_train.shop_id == 1, 'shop_id'] = 58
test.loc[test.shop_id == 1, 'shop_id'] = 58
# Жуковский ул. Чкалова 39м²
sales_train.loc[sales_train.shop_id == 10, 'shop_id'] = 11
test.loc[test.shop_id == 10, 'shop_id'] = 11

In [13]:
sums = sales_train.groupby('item_id')['item_cnt_day'].sum().reset_index().rename(columns={"item_cnt_day":"item_total_sales"}).sort_values(by='item_total_sales')

ids_reject = sums[(sums['item_total_sales'] > 0) & (sums['item_total_sales'] < 1000)]['item_id'].unique()

#sums.groupby(pd.cut(sums["item_total_sales"], np.arange(0, 16000, 1000))).count()#.cumsum()


In [170]:
def get_number_of_days_since_start(day,month, year):
    days = 0
    if year == 2015:
        days = 365
    def is_even(num):
        return num % 2 == 0
    half_of_month = int(month/2)
    even = (30*half_of_month) + (31*half_of_month)
    if is_even(month):
        days = days + even - 30 - day
    else:
        days = days + even + day
    return days

sales_train['item_days_since_start'] = pd.to_numeric(sales_train.apply(lambda row: get_number_of_days_since_start(row['day'],row['month'], row['year']),axis=1), downcast='unsigned') 

In [52]:
train_item_ids = sales_train['item_id'].unique()
train_item_ids = np.setdiff1d(train_item_ids, ids_reject)
train_shop_ids = sales_train['shop_id'].unique()
test_item_ids = test['item_id'].unique()
test_shop_ids = test['shop_id'].unique()
train_blocks = sales_train['date_block_num'].unique()

all_item_ids = np.unique(np.append(test_item_ids,train_item_ids))
all_shop_ids = np.unique(np.append(train_shop_ids,test_shop_ids))

In [53]:
combinations = []

for dbn in range(np.min(train_blocks), np.max(train_blocks)+1):
    sales = sales_train[sales_train.date_block_num==dbn]
    dbn_combos = list(product(sales.shop_id.unique(), sales.item_id.unique(), [dbn]))
    for combo in dbn_combos:
        combinations.append(combo)
        
all_combos = pd.DataFrame(np.unique(np.vstack([combinations]), axis=0), columns=['shop_id','item_id','date_block_num'])

In [300]:
ys = sales_train.groupby(['shop_id', 'item_id', 'date_block_num'], as_index=False)['item_cnt_day']\
                .sum().rename(columns={"item_cnt_day":"item_cnt_block"})

training = all_combos.merge(ys, on=['shop_id', 'item_id', 'date_block_num'], how='left').fillna(0)


training['item_cnt_block'] = training['item_cnt_block'].clip(0,20).astype('int8')

training = training.set_index('item_id').join(items.set_index('item_id'))
training.reset_index(inplace=True)

for col in ['item_id', 'shop_id', 'item_category_id']:
    training[col] = pd.to_numeric(training[col], downcast='unsigned')

In [301]:
dates = sales_train[['date_block_num', 'month', 'year']].drop_duplicates(['date_block_num', 'month', 'year'])

dates_dict = {}

for index,row in dates.iterrows():
    dates_dict[row['date_block_num']] = {"month": row['month'], "year": row['year']}
    
training['month'] = pd.to_numeric(training['date_block_num'].apply(lambda block: dates_dict[block]['month']), downcast='unsigned')


In [205]:
training["shop_cat"] = training["shop_id"].astype(str) + "_" + training["item_category_id"].astype(str)

In [208]:
training["shop_item"] = training["shop_id"].astype(str) + "_" + training["item_id"].astype(str)

In [302]:
#https://maxhalford.github.io/blog/target-encoding-done-the-right-way/
#https://www.kaggle.com/vprokopev/mean-likelihood-encodings-a-comprehensive-study

from sklearn.model_selection import StratifiedKFold
#columns = ["item_id", "shop_id", "item_category_id", "month", "shop_cat", "shop_item", "date_block_num"]
columns = ["item_id", "shop_id", "item_category_id", "month",  "date_block_num"]



y_train = training["item_cnt_block"].values
folds = KFold(n_splits = 5, shuffle=True).split(training)

i=1
for in_fold_index, out_of_fold_index in folds:
    print("fold", i)
    #print(np.intersect1d(training.loc[in_fold_index]["shop_id"].unique(), training.loc[out_of_fold_index]["shop_id"].unique()))
    #print(len(in_fold_index))
    for column in columns:
        means = training.iloc[in_fold_index].groupby(column)['item_cnt_block'].mean()
            #x_validation[column + "_mean_target"] = means\
        name = column + '_mean_encoding'
        training.loc[out_of_fold_index,name] = training.loc[out_of_fold_index][column].map(means)
    i+=1

fold 1
fold 2
fold 3
fold 4
fold 5


In [325]:
def add_block_units_mean(df, cols, name):
    print(name)
    name_units = name + '_units'
    name_mean = name + '_mean'
    
    try:
        df.drop(columns=[name_units, name_mean],inplace=True)
    except:
        pass

    
    block_units = df.groupby(cols,as_index=False)['item_cnt_block'].sum()\
                        .drop_duplicates(cols)\
                        .rename(columns={'item_cnt_block':name_units})
    df = df.merge(block_units, on=cols, how='left')
    df[name_units].fillna(0,inplace=True)
    df[name_units] = pd.to_numeric(df[name_units].astype(int),downcast='unsigned')
    del block_units
    
    block_means = df.groupby(cols,as_index=False)['item_cnt_block'].mean()\
                        .drop_duplicates(cols)\
                        .rename(columns={'item_cnt_block':name_mean})
    df = df.merge(block_means, on=cols, how='left')
    df[name_mean].fillna(0,inplace=True)
    df[name_mean] = pd.to_numeric(df[name_mean],downcast='float')
    del block_means
    
    gc.collect()
    return df


training = add_block_units_mean(training, ['item_id','date_block_num'], 'item_block')
training = add_block_units_mean(training, ['shop_id','date_block_num'], 'shop_block')
training = add_block_units_mean(training, ['item_category_id','date_block_num'], 'cat_block')
training = add_block_units_mean(training, ['shop_id', 'item_category_id','date_block_num'], 'shop_cat_block')
training = add_block_units_mean(training, ['shop_id', 'item_id','date_block_num'], 'shop_item_block')

item_block
shop_block
cat_block
shop_cat_block
shop_item_block


In [218]:
number_of_items = sales_train['item_id'].nunique()
print("number_of_items:", number_of_items)
number_of_categories = sales_train['item_category_id'].nunique()
print("number_of_categories:", number_of_categories)
number_of_shops = sales_train['shop_id'].nunique()
print("number_of_shops:", number_of_shops)
number_of_days = 365 + 365 - 30 - 31
print("number_of_days:", number_of_days)
number_of_blocks = sales_train['date_block_num'].nunique()
print("number_of_blocks:", number_of_blocks)
total_sales = sales_train['item_cnt_day'].sum()
print("total_sales:", total_sales)
average_price = sales_train['item_price'].mean()
print("average_price:", average_price)

training['item_units'] = pd.to_numeric(training.groupby(['date_block_num'])['item_block_units'].transform(np.sum),downcast='unsigned')
training['cat_units'] = pd.to_numeric(training.groupby(['date_block_num'])['cat_block_units'].transform(np.sum),downcast='unsigned')
training['shop_units'] = pd.to_numeric(training.groupby(['date_block_num'])['shop_block_units'].transform(np.sum),downcast='unsigned')

training['item_share_of_total_units'] = pd.to_numeric(training['item_units'] * 100 / total_sales,downcast='float')
training['category_share_of_total_units'] = pd.to_numeric(training['cat_units'] * 100 / total_sales,downcast='float')
training['shop_share_of_units'] = pd.to_numeric(training['shop_units'] * 100 / total_sales,downcast='float')
training['shop_item_units'] = pd.to_numeric(training.groupby(['date_block_num'])\
                                            ['shop_item_block_units'].transform(np.sum),downcast='unsigned')

training['shop_item_share_of_total_units'] = pd.to_numeric(training['shop_item_units'] * 100\
                        / total_sales,downcast='float')
training['shop_item_share_of_shop_units'] = pd.to_numeric(training['shop_item_units'] * 100\
                        / training['shop_units'],downcast='float')


training['item_share_of_shop_units'] = pd.to_numeric(training['shop_item_units'] * 100 / training['shop_units'],downcast='float')

training['shop_item_share_of_shop_units_mean'] = training.groupby('item_id')['shop_item_share_of_shop_units'].transform(np.mean)


number_of_items: 17054
number_of_categories: 79
number_of_shops: 55
number_of_days: 669
number_of_blocks: 22
total_sales: 2085473.0
average_price: 1015.5023073772021


In [24]:
def add_min_max_quantiles(df, cols, name):
    print(name)

    block_name = name+'_block_units'
    units_name = name+'_units'
    max_name = name+'_max_units_block'
    min_name = name+'_min_units_block'
    
    try:
        df.drop(columns=[units_name, max_name, min_name, min_max_name],inplace=True)
    except:
        pass


    df[units_name] = pd.to_numeric(df.groupby(['date_block_num'])[block_name].transform(np.sum), downcast='unsigned')
    df[max_name] = pd.to_numeric(df.groupby(cols)[block_name].transform(np.max), downcast='unsigned')
    df[min_name] = pd.to_numeric(df.groupby(cols)[block_name].transform(np.min), downcast='unsigned')
    


    for q in [0.25,0.50,0.75]:
        qname = name+'_minmax_q' + str(q)
        try:
            df.drop(columns=[qname],inplace=True)
        except:
            pass
        df[qname] =  pd.to_numeric(df[[min_name,max_name]].quantile(q,axis=1), downcast='unsigned')
        
    return df

training = add_min_max_quantiles(training, ['item_id'], 'item')
training = add_min_max_quantiles(training, ['shop_id'], 'shop')
training = add_min_max_quantiles(training, ['item_category_id'], 'cat')
training = add_min_max_quantiles(training, ['shop_id','item_category_id'], 'shop_cat')
training = add_min_max_quantiles(training, ['shop_id','item_id'], 'shop_item')

item
shop
cat
shop_cat
shop_item


In [326]:
def add_rolls(df, cols, name, rolls = [2,3,4,6]):
    for roll in rolls:
        print(name, roll)
        roll_name = name+"_rolling_" + str(roll)
        roll_name_tmp = roll_name + "_tmp"
        
        try:
            df.drop(columns=[roll_name],inplace=True)
        except:
            pass       

    
        block_units_rolling_temp = df\
            .drop_duplicates(cols)\
            .sort_values(cols)\
            .set_index(cols)\
            .groupby(cols[0:len(cols)-1],as_index=False)\
            [name].rolling(roll,min_periods=2).mean().reset_index()\
            .rename(columns={name:roll_name_tmp})\
            [cols+[roll_name_tmp]]
        
    
        df = df.merge(block_units_rolling_temp, on=cols, how='left')
        #print(df.columns.values)
        del block_units_rolling_temp
        gc.collect()
        

        block_units_rolling = df\
            .drop_duplicates(cols)\
            .sort_values(cols)\
            .set_index(cols)\
            .groupby(cols[0:len(cols)-1],as_index=False)\
            [roll_name_tmp].shift(1)\
            .rename(columns={roll_name_tmp:roll_name}).reset_index()

        df = df.merge(block_units_rolling, on=cols, how='left')
        df[roll_name].fillna(0,inplace=True)
        df[roll_name] = pd.to_numeric(df[roll_name], downcast='float')
        df.drop(columns=[roll_name_tmp], inplace=True)
        del block_units_rolling
        gc.collect()
    
    return df
    

#training = add_rolls(training, ['item_id','date_block_num'], 'item_block_units')
training = add_rolls(training, ['item_id','date_block_num'], 'item_block_mean')
#training = add_rolls(training, ['shop_id','date_block_num'], 'shop_block_units')
training = add_rolls(training, ['shop_id','date_block_num'], 'shop_block_mean')
#training = add_rolls(training, ['item_category_id','date_block_num'], 'cat_block_units')
training = add_rolls(training, ['item_category_id','date_block_num'], 'cat_block_mean')
#training = add_rolls(training, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_units')
#training = add_rolls(training, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_mean')
#training = add_rolls(training, ['shop_id','item_id','date_block_num'], 'shop_item')

item_block_mean 2
item_block_mean 3
item_block_mean 4
item_block_mean 6
shop_block_mean 2
shop_block_mean 3
shop_block_mean 4
shop_block_mean 6
cat_block_mean 2
cat_block_mean 3
cat_block_mean 4
cat_block_mean 6


In [26]:
training = add_rolls(training, ['shop_id','item_id','date_block_num'], 'shop_item_block_mean')

shop_item_block_mean 3


In [357]:
def add_lags(df, cols, name, lags = [1]):
    
    for lag in lags:
        print(name, lag)
        lag_name = name + "_lag_" + str(lag)
        
        try:
            df.drop(columns=[lag_name],inplace=True)
        except:
            pass       

        result = df\
            .drop_duplicates(cols)\
            .sort_values(cols)\
            .set_index(cols)\
            .groupby(cols[0:len(cols)-1],as_index=False)\
            [name].shift(lag)\
            .rename(columns={name:lag_name}).reset_index()

        df = df.merge(result, on=cols, how='left')
        df[lag_name].fillna(0,inplace=True)
        if "mean" in name:
            df[lag_name] = pd.to_numeric(df[lag_name], downcast='float')
        else:
            df[lag_name] = pd.to_numeric(df[lag_name].astype(int), downcast='unsigned')
        del result
        gc.collect()
    
    return df
                                         

                                        
#training = add_lags(training, ['item_id','date_block_num'], 'item_block_units')
training = add_lags(training, ['item_id','date_block_num'], 'item_block_mean')
#training = add_lags(training, ['shop_id','date_block_num'], 'shop_block_units')
training = add_lags(training, ['shop_id','date_block_num'], 'shop_block_mean')
#training = add_lags(training, ['item_category_id','date_block_num'], 'cat_block_units')
#training = add_lags(training, ['item_category_id','date_block_num'], 'cat_block_mean')
#training = add_lags(training, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_units')
#training = add_lags(training, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_mean')
#training = add_lags(training, ['shop_id','item_id','date_block_num'], 'shop_item_block_units')
#training = add_lags(training, ['shop_id','item_id','date_block_num'], 'shop_item_block_mean')

item_block_mean 1
shop_block_mean 1


In [28]:
training['shop_block_units_lag_comp1'] = pd.to_numeric(training['shop_block_units_lag_1'] * training['item_share_of_shop_units'],downcast='unsigned')

#training['shop_share_item_units_comp'] = training['item_units'] * training['shop_share_of_units']
training['item_block_units_lag_comp1'] = pd.to_numeric(training['item_block_units_lag_1'] * training['item_share_of_shop_units'],downcast='unsigned')

In [303]:
first_day = sales_train.groupby('item_id')['item_days_since_start'].min()
training['first_day'] = training['item_id'].map(first_day)

In [174]:
training.columns.values

array(['item_id', 'shop_id', 'date_block_num', 'item_cnt_block',
       'item_category_id', 'month', 'shop_cat', 'shop_item',
       'item_id_mean_encoding', 'shop_id_mean_encoding',
       'item_category_id_mean_encoding', 'month_mean_encoding',
       'shop_cat_mean_encoding', 'shop_item_mean_encoding',
       'date_block_num_mean_encoding', 'first_day'], dtype=object)

In [328]:

pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)
training.sample(10)

Unnamed: 0,item_id,shop_id,date_block_num,item_cnt_block,item_category_id,month,item_id_mean_encoding,shop_id_mean_encoding,item_category_id_mean_encoding,month_mean_encoding,date_block_num_mean_encoding,first_day,item_block_units,item_block_mean,shop_block_units,shop_block_mean,cat_block_units,cat_block_mean,shop_cat_block_units,shop_cat_block_mean,shop_item_block_units,shop_item_block_mean,item_block_mean_rolling_2,item_block_mean_rolling_3,item_block_mean_rolling_4,item_block_mean_rolling_6,shop_block_mean_rolling_2,shop_block_mean_rolling_3,shop_block_mean_rolling_4,shop_block_mean_rolling_6,cat_block_mean_rolling_2,cat_block_mean_rolling_3,cat_block_mean_rolling_4,cat_block_mean_rolling_6
1511669,5401,35,20,0,19,9,0.082437,0.282103,0.594824,0.269049,0.273536,194,2,0.04,1707,0.281173,5625,0.618132,136,0.747253,0,0,0.158824,0.158824,0.158824,0.158824,0.274903,0.275371,0.273357,0.27354,0.642548,0.615256,0.599499,0.583036
2220197,8055,39,19,0,62,8,0.065719,0.12161,0.151676,0.287595,0.288298,123,7,0.137255,728,0.114411,562,0.131186,10,0.119048,0,0,0.100408,0.100408,0.100408,0.100408,0.098321,0.103792,0.100283,0.102209,0.144027,0.141493,0.146756,0.170704
6290389,21725,28,20,0,40,9,0.093023,0.690118,0.239173,0.268372,0.272338,10,3,0.06,3612,0.59496,12609,0.224559,634,0.564559,0,0,0.089412,0.120832,0.111032,0.108531,0.704643,0.714935,0.697719,0.681123,0.215287,0.216133,0.210858,0.220141
2430101,8751,10,14,0,40,3,0.034483,0.095804,0.239173,0.288146,0.299587,17,2,0.041667,849,0.118874,20143,0.253104,118,0.07117,0,0,0.032609,0.032609,0.032609,0.032609,0.105657,0.105657,0.105657,0.105657,0.248271,0.248271,0.248271,0.248271
2851325,10329,47,27,0,38,4,0.03125,0.303018,0.170653,0.248292,0.241046,15,2,0.042553,1561,0.285062,1149,0.166305,13,0.088435,0,0,0.04087,0.033913,0.030435,0.030159,0.320888,0.333706,0.37087,0.353557,0.177202,0.182998,0.210249,0.201389
6381428,22017,25,33,0,40,10,0.231214,0.888969,0.238764,0.271164,0.259028,488,3,0.068182,5263,0.972289,7656,0.220812,767,0.97335,0,0,0.20072,0.250092,0.239895,0.239895,0.8703,0.845276,0.847626,0.856034,0.221569,0.230362,0.235241,0.232489
6159601,21335,6,25,1,40,2,0.094241,0.360109,0.238764,0.293426,0.277533,3,1,0.021277,2197,0.362961,11959,0.25143,308,0.304348,1,1,0.061862,0.070227,0.107018,0.107018,0.463423,0.447565,0.426849,0.411024,0.314305,0.29702,0.280207,0.263077
2489847,8951,51,22,0,38,11,0.039474,0.170477,0.171955,0.311972,0.311972,10,2,0.04,1062,0.167985,1586,0.182299,15,0.086207,0,0,0.039231,0.032821,0.029717,0.026685,0.17656,0.187956,0.189939,0.187726,0.175252,0.170686,0.162577,0.152739
2298993,8330,39,24,0,37,1,0.175758,0.12161,0.16418,0.313503,0.317277,4,8,0.16,808,0.131618,5940,0.183901,42,0.065015,0,0,0.13,0.105897,0.149423,0.191968,0.16265,0.139219,0.130481,0.122165,0.195702,0.175555,0.164013,0.15386
2682966,9800,47,13,0,40,2,0.056338,0.302849,0.239558,0.294932,0.308955,3,2,0.043478,1894,0.265489,18061,0.238972,238,0.144857,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [260]:
from sklearn.preprocessing import StandardScaler

cols =  ['item_id_mean_encoding', 'shop_id_mean_encoding',
       'item_category_id_mean_encoding', 'month_mean_encoding',
       'date_block_num_mean_encoding', 'first_day']


training[cols] = StandardScaler().fit_transform(training[cols])

In [358]:
training.fillna(0, inplace=True)

In [359]:
training.columns.values

array(['item_id', 'shop_id', 'date_block_num', 'item_cnt_block',
       'item_category_id', 'month', 'item_id_mean_encoding',
       'shop_id_mean_encoding', 'item_category_id_mean_encoding',
       'month_mean_encoding', 'date_block_num_mean_encoding', 'first_day',
       'item_block_units', 'item_block_mean', 'shop_block_units',
       'shop_block_mean', 'cat_block_units', 'cat_block_mean',
       'shop_cat_block_units', 'shop_cat_block_mean',
       'shop_item_block_units', 'shop_item_block_mean',
       'item_block_mean_rolling_2', 'item_block_mean_rolling_3',
       'item_block_mean_rolling_4', 'item_block_mean_rolling_6',
       'shop_block_mean_rolling_2', 'shop_block_mean_rolling_3',
       'shop_block_mean_rolling_4', 'shop_block_mean_rolling_6',
       'cat_block_mean_rolling_2', 'cat_block_mean_rolling_3',
       'cat_block_mean_rolling_4', 'cat_block_mean_rolling_6',
       'item_block_mean_lag_1', 'shop_block_mean_lag_1'], dtype=object)

In [361]:
gc.collect()

ZEROS_KEEP=0.25


#x_train = training[(training['date_block_num'] < 33) & (training['val_ignore'] == False)]
x_train = training[(training['date_block_num'] < 33)]
y_train = x_train['item_cnt_block']





x_val = training[training['date_block_num'] == 33]
y_val = x_val['item_cnt_block']

pos_val_len = len(y_val[y_val != 0])
print("pos_val_len", pos_val_len)

zeros_keep_indices_val = y_val[y_val == 0].sample(int(pos_val_len/ZEROS_KEEP)).index
print("zeros_keep_indices_val", len(zeros_keep_indices_val))
non_zeros_val_indices = y_val[y_val != 0].index
print("non_zeros_val_indices", len(non_zeros_val_indices))

val_indices = np.append(np.array(zeros_keep_indices_val), np.array(non_zeros_val_indices))

y_val = y_val.loc[val_indices]
x_val = x_val.loc[val_indices]



pos_val_len 31471
zeros_keep_indices_val 125884
non_zeros_val_indices 31471


In [264]:

features = [
    
    
        'item_category_id',
       'item_block_mean_rolling_3',
       'shop_block_mean_rolling_3',
           'shop_cat_block_mean_rolling_3',



      'item_block_mean_lag_1',
        'shop_block_mean_lag_1',
            'shop_cat_block_mean_lag_1',
    
    'shop_item_share_of_shop_units_mean',
    'shop_item_block_mean_rolling_3',
    'shop_item_block_mean_lag_1',
    
    'cat_me_real'

]




In [236]:
training.sample(20)

Unnamed: 0,item_id,shop_id,date_block_num,item_cnt_block,item_category_id,month,shop_cat,shop_item,item_id_mean_encoding,shop_id_mean_encoding,item_category_id_mean_encoding,month_mean_encoding,shop_cat_mean_encoding,shop_item_mean_encoding,date_block_num_mean_encoding,first_day
540666,-1.446625,-0.746499,18,0,0.642309,0.314602,-0.704933,-0.911123,-0.398289,-0.007475,-0.168134,-0.756216,-0.03217,-0.099579,-0.790865,-0.536748
2014892,-0.647405,-1.731624,26,0,-0.949663,-0.889072,-1.690745,-0.995961,-0.029854,-0.698855,1.890515,0.009616,0.769069,-0.099579,-0.338357,0.728126
2049976,-0.625404,1.22375,13,0,0.642309,-1.189991,1.238117,-0.733642,-0.3832,-0.214906,-0.168317,0.148258,-0.012423,-0.336342,0.463882,-0.594903
3879210,0.391437,-1.268036,23,2,1.533813,1.819195,-1.211269,-0.488682,0.17076,-0.927359,-0.079125,3.800325,-0.423562,0.255566,3.738588,0.728126
748720,-1.336618,0.876059,13,0,-1.268058,-1.189991,0.878081,-0.766788,0.275855,0.069582,-0.191857,0.169267,0.034145,0.847475,0.483791,-0.587633
5201605,1.063432,0.06478,20,0,-0.312874,0.916439,0.086574,0.696175,-0.167498,-0.939821,-0.128835,-0.412532,-0.381794,-0.336342,-0.31677,-0.580364
2722407,-0.21567,-0.630602,13,0,-0.503911,-1.189991,-0.600923,-0.896868,-0.43146,0.008805,-0.329834,0.148258,-0.036848,-0.336342,0.463882,-0.427707
1305255,-1.040718,0.354523,22,0,-0.885984,1.518277,0.367174,-0.812113,-0.414876,-0.279498,-0.597977,0.534986,-0.554445,-0.336342,0.526283,0.473697
6337261,1.68983,-0.282911,21,0,-0.312874,1.217358,-0.256317,0.389668,-0.395031,1.354315,-0.129463,-0.395617,0.060107,-0.099579,-0.15131,-0.580364
3429482,0.136509,0.702214,23,0,1.597492,1.819195,0.732353,1.25882,-0.344934,-0.561742,0.267743,3.750847,-0.247344,-0.217961,3.689914,-0.587633


In [135]:
training.fillna(0,inplace=True)

In [362]:
features = [
    
   'item_id_mean_encoding',
       'shop_id_mean_encoding',
    'item_category_id_mean_encoding', 
     # 'month_mean_encoding',
     #'shop_cat_mean_encoding',
       #'shop_item_mean_encoding',
    #'date_block_num_mean_encoding', 
    #'first_day',
      'item_block_mean_rolling_2',# 'item_block_mean_rolling_3',
       #'item_block_mean_rolling_6',#, 'item_block_mean_rolling_6',
       #'shop_block_mean_rolling_2', #'shop_block_mean_rolling_3',
       #'shop_block_mean_rolling_6',# 'shop_block_mean_rolling_6',
       #'cat_block_mean_rolling_2', 'cat_block_mean_rolling_3',
       #'cat_block_mean_rolling_4', 'cat_block_mean_rolling_6',
    'item_block_mean_lag_1', 
    'shop_block_mean_lag_1'
]


gc.collect()
lgtrain = lgbm.Dataset(x_train[features], label=y_train)
lgval = lgbm.Dataset(x_val[features], label=y_val)



#[0.00542047893814942, 29, 24, 0.39949465609514856, 1, 0.67943500, 10]
params = {
        "num_threads": 16,
        #"device": "gpu",
        "verbosity": -1,
        #"zero_as_missing": "true",
        "boosting":'gbdt',
        "objective" : "regression",
        "metric" : "rmse",
        "seed": 42,
        #"max_bin": 10,#default 255
        #"num_leaves": 10, #default 31
        #"bagging_fraction": 0.7,
        #"bagging_freq": 1,
        #"min_data_in_leaf": 50000,
        #"feature_fraction": 0.7,
        #"lambda_l2": 3,
        #"max_depth": 2,
        #"min_gain_to_split": 10,
        "learning_rate" : 0.01,
        #"histogram_pool_size": 1000,
        #"categorical_column": [0,1,2,3,4]
}

evals_result = {}
lg_model = lgbm.train(params, lgtrain, 20000, 
                      valid_sets=[lgval], 
                      early_stopping_rounds=1, 
                      verbose_eval=10, 
                      evals_result=evals_result)

scores = {}
for i,score in enumerate(lg_model.feature_importance()):
    scores[features[i]] = score

sorted(scores.items(), key=lambda x: x[1])[::-1]

Training until validation scores don't improve for 1 rounds.
[10]	valid_0's rmse: 1.32552
[20]	valid_0's rmse: 1.27841
[30]	valid_0's rmse: 1.23943
[40]	valid_0's rmse: 1.20676
[50]	valid_0's rmse: 1.1796
[60]	valid_0's rmse: 1.15726
[70]	valid_0's rmse: 1.13912
[80]	valid_0's rmse: 1.12474
[90]	valid_0's rmse: 1.11306
[100]	valid_0's rmse: 1.10353
[110]	valid_0's rmse: 1.09635
[120]	valid_0's rmse: 1.09073
[130]	valid_0's rmse: 1.0861
[140]	valid_0's rmse: 1.08279
[150]	valid_0's rmse: 1.08048
[160]	valid_0's rmse: 1.07888
[170]	valid_0's rmse: 1.07779
Early stopping, best iteration is:
[174]	valid_0's rmse: 1.07749


[('shop_id_mean_encoding', 1416),
 ('item_block_mean_lag_1', 1218),
 ('item_id_mean_encoding', 943),
 ('item_category_id_mean_encoding', 760),
 ('item_block_mean_rolling_2', 471),
 ('shop_block_mean_lag_1', 412)]

In [265]:
cb_model = CatBoostRegressor(iterations=1000,
                             #learning_rate=0.05,
                             eval_metric='RMSE',
                             task_type = "GPU",
                             use_best_model=True,
                             od_type = "Iter",
                             od_wait = 1,
                             bagging_temperature = 30,
                             cat_features=[0],
                             random_seed = 42)

#drops = ['subcategory','area']
#x_train = x_train.drop(columns=drops)
#x_val = x_val.drop(columns=drops)


cb_model.fit(x_train[features], y_train, #cat_features=categorical_features_indices,
             eval_set=(x_val[features],y_val),
             #cat_features=categorical_features_pos,         
             verbose=True)

scores = {}
for i,score in enumerate(cb_model.get_feature_importance()):
    scores[features[i]] = score

sorted(scores.items(), key=lambda x: x[1])[::-1]

0:	learn: 1.2142134	test: 1.3007728	best: 1.3007728 (0)	total: 208ms	remaining: 3m 27s
1:	learn: 1.2048024	test: 1.2913237	best: 1.2913237 (1)	total: 422ms	remaining: 3m 30s
2:	learn: 1.1935868	test: 1.2809283	best: 1.2809283 (2)	total: 612ms	remaining: 3m 23s
3:	learn: 1.1839645	test: 1.2724598	best: 1.2724598 (3)	total: 836ms	remaining: 3m 28s
4:	learn: 1.1767478	test: 1.2645890	best: 1.2645890 (4)	total: 1.01s	remaining: 3m 21s
5:	learn: 1.1705086	test: 1.2577772	best: 1.2577772 (5)	total: 1.22s	remaining: 3m 21s
6:	learn: 1.1622471	test: 1.2508170	best: 1.2508170 (6)	total: 1.4s	remaining: 3m 18s
7:	learn: 1.1528430	test: 1.2422691	best: 1.2422691 (7)	total: 1.62s	remaining: 3m 21s
8:	learn: 1.1469227	test: 1.2371112	best: 1.2371112 (8)	total: 1.83s	remaining: 3m 21s
9:	learn: 1.1379664	test: 1.2292195	best: 1.2292195 (9)	total: 2.03s	remaining: 3m 20s
10:	learn: 1.1309602	test: 1.2228338	best: 1.2228338 (10)	total: 2.22s	remaining: 3m 19s
11:	learn: 1.1239430	test: 1.2168717	best:

93:	learn: 0.9618758	test: 1.0653203	best: 1.0653203 (93)	total: 20.4s	remaining: 3m 16s
94:	learn: 0.9603815	test: 1.0641604	best: 1.0641604 (94)	total: 20.6s	remaining: 3m 15s
95:	learn: 0.9599992	test: 1.0639171	best: 1.0639171 (95)	total: 20.8s	remaining: 3m 15s
96:	learn: 0.9598076	test: 1.0637045	best: 1.0637045 (96)	total: 21s	remaining: 3m 15s
97:	learn: 0.9596032	test: 1.0636136	best: 1.0636136 (97)	total: 21.3s	remaining: 3m 16s
98:	learn: 0.9588431	test: 1.0626051	best: 1.0626051 (98)	total: 21.5s	remaining: 3m 16s
99:	learn: 0.9584068	test: 1.0621874	best: 1.0621874 (99)	total: 21.7s	remaining: 3m 15s
100:	learn: 0.9581137	test: 1.0617128	best: 1.0617128 (100)	total: 21.9s	remaining: 3m 15s
101:	learn: 0.9575106	test: 1.0612601	best: 1.0612601 (101)	total: 22.2s	remaining: 3m 15s
102:	learn: 0.9569574	test: 1.0608835	best: 1.0608835 (102)	total: 22.5s	remaining: 3m 15s
103:	learn: 0.9559245	test: 1.0600643	best: 1.0600643 (103)	total: 22.7s	remaining: 3m 15s
104:	learn: 0.9

185:	learn: 0.9294323	test: 1.0330034	best: 1.0330034 (185)	total: 40.8s	remaining: 2m 58s
186:	learn: 0.9286348	test: 1.0321979	best: 1.0321979 (186)	total: 41s	remaining: 2m 58s
187:	learn: 0.9282590	test: 1.0316546	best: 1.0316546 (187)	total: 41.2s	remaining: 2m 57s
188:	learn: 0.9281398	test: 1.0316287	best: 1.0316287 (188)	total: 41.4s	remaining: 2m 57s
189:	learn: 0.9281088	test: 1.0315856	best: 1.0315856 (189)	total: 41.6s	remaining: 2m 57s
190:	learn: 0.9279867	test: 1.0315170	best: 1.0315170 (190)	total: 41.9s	remaining: 2m 57s
191:	learn: 0.9279078	test: 1.0314832	best: 1.0314832 (191)	total: 42.1s	remaining: 2m 57s
192:	learn: 0.9271230	test: 1.0306199	best: 1.0306199 (192)	total: 42.3s	remaining: 2m 56s
193:	learn: 0.9269463	test: 1.0304205	best: 1.0304205 (193)	total: 42.5s	remaining: 2m 56s
194:	learn: 0.9268561	test: 1.0302560	best: 1.0302560 (194)	total: 42.9s	remaining: 2m 56s
195:	learn: 0.9266286	test: 1.0300495	best: 1.0300495 (195)	total: 43.1s	remaining: 2m 56s
1

[('item_block_mean_lag_1', 24.26178950360488),
 ('shop_item_block_mean_lag_1', 20.302278737207473),
 ('shop_item_block_mean_rolling_3', 11.060282556272488),
 ('shop_cat_block_mean_lag_1', 9.487496404150535),
 ('item_block_mean_rolling_3', 7.479725163588699),
 ('shop_cat_block_mean_rolling_3', 6.068712510620433),
 ('shop_item_share_of_shop_units_mean', 5.536159204862697),
 ('item_category_id', 5.138615825079335),
 ('cat_me_real', 4.480760033674159),
 ('shop_block_mean_lag_1', 3.542334724861135),
 ('shop_block_mean_rolling_3', 2.641845336078146)]

In [48]:
features = [item[0] for item in scores.items() if item[1] > 2000]

In [307]:
test            = pd.read_csv('test.csv.gz')
test = test.set_index('item_id').join(items.set_index('item_id'))
test.reset_index(inplace=True)
test['month'] = 11

In [308]:
item_features = [ 
    #'shop_item_share_of_shop_units_mean'
       'item_id_mean_encoding','first_day'
                ]

merge_col = ['item_id']
cols=item_features+merge_col

test = test.merge(training.drop_duplicates('item_id')[cols], on=merge_col, how='left')

In [309]:
shop_features = [
        #'shop_me'
           'shop_id_mean_encoding'

]

merge_col = ['shop_id']
cols=shop_features+merge_col


test = test.merge(training.drop_duplicates(merge_col)[cols], on=merge_col, how='left')

In [310]:
cat_features = [
        'item_category_id_mean_encoding'
]

merge_col = ['item_category_id']
cols=cat_features+merge_col


test = test.merge(training.drop_duplicates(merge_col)[cols], on=merge_col, how='left')

In [311]:
month_features = [
        'month_mean_encoding'
]

merge_col = ['month']
cols=month_features+merge_col


test = test.merge(training.drop_duplicates(merge_col)[cols], on=merge_col, how='left')

In [312]:
shop_cat_features = [
        'shop_cat_mean_encoding'
]

merge_col = ['shop_id', 'item_category_id']
cols=shop_cat_features+merge_col


test = test.merge(training.drop_duplicates(merge_col)[cols], on=merge_col, how='left')

KeyError: "['shop_cat_mean_encoding'] not in index"

In [246]:
shop_item_features = [
        'shop_item_mean_encoding'
]

merge_col = ['shop_id', 'item_category_id']
cols=shop_item_features+merge_col


test = test.merge(training.drop_duplicates(merge_col)[cols], on=merge_col, how='left')

In [315]:
test["date_block_num_mean_encoding"] = training[training["date_block_num"] == 33]["date_block_num_mean_encoding"].mean()

In [344]:
def add_rolls_test(df, cols, name, rolls = [2]):
    for roll in rolls:
        print(name, roll)
        roll_name = name+"_rolling_" + str(roll)
        roll_name_tmp = roll_name + "_tmp"
        
        try:
            df.drop(columns=[roll_name],inplace=True)
        except:
            pass       

    
        block_units_rolling_temp = training\
            .drop_duplicates(cols)\
            .sort_values(cols)\
            .set_index(cols)\
            .groupby(cols[0:len(cols)-1],as_index=False)\
            [name].rolling(roll,min_periods=2).mean().reset_index()\
            .rename(columns={name:roll_name})\
            [cols+[roll_name]]
        
        print([cols[0:len(cols)-1]+[roll_name]])
        thirty_three = block_units_rolling_temp[block_units_rolling_temp['date_block_num'] == 33].drop_duplicates(cols)\
                [cols[0:len(cols)-1]+[roll_name]]
        df = df.merge(thirty_three, on=cols[0:len(cols)-1], how='left')
    

        del block_units_rolling_temp
        gc.collect()
        

    
    return df
    

test = add_rolls_test(test, ['item_id','date_block_num'], 'item_block_mean')
#test = add_rolls_test(test, ['shop_id','date_block_num'], 'shop_block_mean')
#test = add_rolls_test(test, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_mean')


item_block_mean 2
[['item_id', 'item_block_mean_rolling_2']]


In [225]:
test = add_rolls_test(test, ['shop_id','item_id','date_block_num'], 'shop_item_block_mean')

shop_item_block_mean 3
[['shop_id', 'item_id', 'shop_item_block_mean_rolling_3']]


In [191]:
def add_lags_test(df, cols, name, lags = [1]):
    
    for lag in lags:
        print(name, lag)
        lag_name = name + "_lag_" + str(lag)
        
        try:
            df.drop(columns=[lag_name],inplace=True)
        except:
            pass       

        result = training\
            .drop_duplicates(cols)\
            .sort_values(cols)\
            .set_index(cols)\
            .groupby(cols[0:len(cols)-1],as_index=False)\
            [name].shift(lag)\
            .rename(columns={name:lag_name}).reset_index()
        
        thirty_three = result[result['date_block_num'] == 33].drop_duplicates(cols)\
                [cols[0:len(cols)-1] + [lag_name]]
        df = df.merge(thirty_three, on=cols[0:len(cols)-1], how='left')

        gc.collect()
    
    return df
                                         

                                        
test = add_lags_test(test, ['item_id','date_block_num'], 'item_block_mean')
test = add_lags_test(test, ['shop_id','date_block_num'], 'shop_block_mean')
test = add_lags_test(test, ['shop_id','item_category_id','date_block_num'], 'shop_cat_block_mean')


item_block_mean 1
shop_block_mean 1
shop_cat_block_mean 1


In [236]:
test = add_lags_test(test, ['shop_id','item_id','date_block_num'], 'shop_item_block_mean')

shop_item_block_mean 1


In [316]:
test.fillna(0, inplace=True)

In [317]:
test.sample(10)

Unnamed: 0,item_id,ID,shop_id,item_category_id,month,item_id_mean_encoding,first_day,shop_id_mean_encoding,item_category_id_mean_encoding,month_mean_encoding,date_block_num_mean_encoding
140870,14335,11732,6,37,11,0.080357,2.0,0.362123,0.163505,0.311019,0.258566
70832,7202,103212,42,30,11,1.279855,1.0,0.561663,0.995123,0.311019,0.258566
140700,14313,1306,5,55,11,0.204489,2.0,0.196823,0.224447,0.311019,0.258566
198532,20413,206817,39,72,11,0.049822,434.0,0.12161,0.23643,0.311019,0.258566
135103,13704,161375,56,69,11,0.130159,371.0,0.301666,0.262351,0.311019,0.258566
19811,2327,148309,59,20,11,0.0,0.0,0.188085,1.506454,0.311019,0.258566
181881,18479,107425,50,55,11,1.738372,488.0,0.242695,0.224447,0.311019,0.258566
28832,3182,106016,42,20,11,0.301471,489.0,0.561663,1.506454,0.311019,0.258566
12647,1555,27568,7,28,11,3.803097,305.0,0.268001,0.98189,0.311019,0.258566
153388,15478,23330,2,63,11,0.294821,429.0,0.140911,0.329658,0.311019,0.258566


In [116]:
cb_preds = cb_model.predict(test[features])
cb_preds.clip(0,20,out=cb_preds)

NameError: name 'cb_model' is not defined

In [271]:
print(np.mean(cb_preds))
print(np.max(cb_preds))

0.31713491408940697
11.488168775656959


In [347]:
lg_preds = lg_model.predict(test[features])
lg_preds.clip(0,20,out=lg_preds)

array([ 0.20668834,  0.2051827 ,  0.24606054, ...,  0.37272144,
        0.36750827,  0.3712158 ])

In [348]:
print(np.mean(lg_preds))
print(np.max(lg_preds))

0.326655314543
13.3867865603


In [294]:
print(np.mean(cb_preds))
print(np.max(cb_preds))

NameError: name 'cb_preds' is not defined

In [349]:
lg_preds[0:100]

array([ 0.20668834,  0.2051827 ,  0.24606054,  0.2051827 ,  0.2051827 ,
        0.22011749,  0.2051827 ,  0.20722462,  0.41040999,  0.62507763,
        0.2073078 ,  0.61880275,  0.20722462,  0.2073078 ,  0.2227    ,
        0.22011749,  0.20722462,  0.20722462,  0.20668834,  0.2227    ,
        0.37363883,  0.2073078 ,  0.2051827 ,  0.2073078 ,  0.20668834,
        0.22557139,  0.20664073,  0.37363883,  0.23515105,  0.20668834,
        0.2073078 ,  0.2227    ,  0.2051827 ,  0.2051827 ,  0.22289051,
        0.20722462,  0.2051827 ,  0.2227    ,  0.20668834,  0.20668834,
        0.2051827 ,  0.2051827 ,  0.2233398 ,  0.21933927,  0.32370546,
        0.21933927,  0.21933927,  0.2510445 ,  0.21933927,  0.2311878 ,
        0.57648811,  0.77795007,  0.23796471,  0.72951646,  0.2311878 ,
        0.23796471,  0.26295036,  0.2510445 ,  0.2311878 ,  0.2311878 ,
        0.22084491,  0.25991693,  0.4840437 ,  0.23796471,  0.21933927,
        0.23796471,  0.2233398 ,  0.26582175,  0.22329218,  0.48

In [350]:
submission = test.loc[:,['ID']]
submission['item_cnt_month'] = lg_preds

submission.to_csv('submission.csv', index=False)

In [351]:
training.sample(10)

Unnamed: 0,item_id,shop_id,date_block_num,item_cnt_block,item_category_id,month,item_id_mean_encoding,shop_id_mean_encoding,item_category_id_mean_encoding,month_mean_encoding,date_block_num_mean_encoding,first_day,item_block_units,item_block_mean,shop_block_units,shop_block_mean,cat_block_units,cat_block_mean,shop_cat_block_units,shop_cat_block_mean,shop_item_block_units,shop_item_block_mean,item_block_mean_rolling_2,item_block_mean_rolling_3,item_block_mean_rolling_4,item_block_mean_rolling_6,shop_block_mean_rolling_2,shop_block_mean_rolling_3,shop_block_mean_rolling_4,shop_block_mean_rolling_6,cat_block_mean_rolling_2,cat_block_mean_rolling_3,cat_block_mean_rolling_4,cat_block_mean_rolling_6
1664544,5983,58,27,0,30,4,0.039655,0.347876,0.991726,0.250437,0.242989,14,1,0.021277,1629,0.29748,7826,0.867243,209,1.088542,0,0,0.061739,0.061159,0.05087,0.043782,0.377824,0.382034,0.457419,0.414623,0.862122,0.904061,0.977607,1.024919
2885211,10424,5,13,0,37,2,0.084084,0.19446,0.163523,0.293575,0.307729,8,1,0.021739,1191,0.166947,6026,0.15127,59,0.068129,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5083670,17482,48,30,0,37,7,0.304575,0.206393,0.163523,0.256729,0.260998,1,5,0.116279,1094,0.205523,3291,0.156834,40,0.081967,0,0,0.264006,0.296572,0.30395,0.329158,0.191532,0.186673,0.187645,0.192634,0.157556,0.16818,0.167541,0.167832
2419986,8708,22,25,0,37,2,0.04,0.214979,0.163523,0.293575,0.277288,68,1,0.021277,1221,0.201718,4830,0.152926,72,0.107143,0,0,0.059412,0.046411,0.045012,0.043755,0.260791,0.255428,0.247499,0.236772,0.209181,0.191768,0.177641,0.163198
3798403,13436,16,22,1,11,11,0.061404,0.218308,0.260647,0.311852,0.311852,1,2,0.04,1562,0.247074,92,0.184,3,0.3,1,1,0.069231,0.072298,0.059223,0.056489,0.211654,0.218686,0.213326,0.208407,0.14014,0.159974,0.162481,0.191473
2276070,8248,31,15,1,40,4,0.054913,1.119358,0.239173,0.250437,0.256209,6,2,0.040816,7464,1.10186,17333,0.224309,1990,1.26189,1,1,0.063859,0.078804,0.078804,0.078804,1.145991,1.147343,1.147343,1.147343,0.246038,0.249882,0.249882,0.249882
1073356,3970,39,30,0,55,7,0.253191,0.122355,0.224271,0.257048,0.258174,244,7,0.162791,598,0.112343,6475,0.216664,63,0.090647,0,0,0.206395,0.222703,0.270288,0.266504,0.123662,0.120304,0.123071,0.12301,0.207113,0.209156,0.221755,0.225911
4027367,14220,10,22,0,57,11,0.085906,0.098287,0.09437,0.311019,0.311019,7,4,0.08,717,0.113413,697,0.085521,0,0.0,0,0,0.07902,0.079346,0.090122,0.1077,0.10184,0.100006,0.095387,0.094615,0.084507,0.092603,0.097591,0.100657
4113557,14434,7,14,0,40,3,0.103718,0.265655,0.238764,0.288169,0.299465,2,11,0.229167,1905,0.266732,20143,0.253104,289,0.174306,0,0,0.26087,0.26087,0.26087,0.26087,0.301063,0.301063,0.301063,0.301063,0.248271,0.248271,0.248271,0.248271
2458455,8820,55,15,0,65,4,0.178462,0.244115,0.653119,0.250581,0.255012,2,7,0.142857,998,0.147328,1838,0.72135,0,0.0,0,0,0.338768,0.349034,0.349034,0.349034,0.13302,0.13331,0.13331,0.13331,0.849364,0.914532,0.914532,0.914532


In [169]:
sales_train.sample(10)

Unnamed: 0,item_id,date,date_block_num,shop_id,item_price,item_cnt_day,day,month,year,item_category_id
635476,6501,16.02.2015,25,42,390.64,1.0,16,2,2015,28
1271389,16056,21.01.2014,12,25,699.0,1.0,21,1,2014,64
1123093,14124,03.07.2015,30,41,599.0,1.0,3,7,2015,41
982533,11797,28.12.2014,23,31,149.0,1.0,28,12,2014,41
307649,3566,03.02.2014,13,27,407.36,1.0,3,2,2014,23
1363132,17164,14.08.2014,19,49,399.0,1.0,14,8,2014,40
1081679,13491,19.08.2014,19,52,14990.0,1.0,19,8,2014,11
1537875,20243,13.06.2015,29,16,199.0,1.0,13,6,2015,40
884686,10207,06.08.2015,31,49,1199.0,1.0,6,8,2015,30
542943,5673,03.09.2014,20,28,799.0,1.0,3,9,2014,3
