In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns

from sklearn.preprocessing import LabelEncoder
import os
print(os.listdir("../input"))
import gc
import time

In [None]:
train = pd.read_csv("../input/sales_train.csv")
test = pd.read_csv("../input/test.csv")
items = pd.read_csv("../input/items.csv")
item_cats = pd.read_csv("../input/item_categories.csv")
shops = pd.read_csv("../input/shops.csv")

In [None]:
shops.head()

In [None]:
test.head()

In [None]:
train.head()

In [None]:
#train = train[train["item_id"] < 20000]

In [None]:
sns.pairplot(train.iloc[0:4000 , :])

In [None]:
sns.boxplot("item_price" , data = train)

### Remove Outliers

In [None]:
train = train[train["item_price"] < 100000]

In [None]:
sns.boxplot("item_cnt_day" , data = train)

In [None]:
train = train[train["item_cnt_day"] < 1001]

In [None]:
train[train["item_price"] < 0]

In [None]:
train[(train["date_block_num"] == 4) & (train["shop_id"] == 32) & (train["item_id"] == 2973) & (train["item_price"] > 0)]["item_price"].median()

In [None]:
train.loc[train["item_price"] < 0 , "item_price"] = train[(train["date_block_num"] == 4) & (train["shop_id"] == 32) & (train["item_id"] == 2973) & (train["item_price"] > 0)]["item_price"].median()

In [None]:
train.loc[train["item_price"] < 0]

In [None]:
train.describe()

### Fix duplicate shops id in train and test

In [None]:
train.loc[train["shop_id"] == 0 , "shop_id"] = 57
test.loc[test["shop_id"] == 0 , "shop_id"] = 57
train.loc[train.shop_id == 1, 'shop_id'] = 58
test.loc[test.shop_id == 1, 'shop_id'] = 58
train.loc[train.shop_id == 10, 'shop_id'] = 11
test.loc[test.shop_id == 10, 'shop_id'] = 11

### Shops preprocessing.

### 1.Extract city name 

In [None]:
shops.head()

In [None]:
shops["shop_name"].apply(lambda x : x.split(" ")[0]).value_counts()

### See some duplicate cities.that means duplicate shop names too.Lets fix it first.

In [None]:
shops.loc[shops.shop_name == 'Сергиев Посад ТЦ "7Я"', 'shop_name'] = 'СергиевПосад ТЦ "7Я"'

In [None]:
shops["city"] = shops["shop_name"].apply(lambda x : x.split(" ")[0])
shops.loc[shops["city"] == "!Якутск" , "city"] = "Якутск"
shops["city_code"] = LabelEncoder().fit_transform(shops["city"])
shops = shops.drop(["city" , "shop_name"] , axis = 1)

In [None]:
shops.head()

### Preprocess categories

In [None]:
item_cats.head()

In [None]:
split = item_cats["item_category_name"].apply(lambda x : x.split("-"))
item_cats["type"] = item_cats["item_category_name"].apply(lambda x : x.split("-")[0])
item_cats["type_code"] = LabelEncoder().fit_transform(item_cats["type"])

In [None]:
item_cats.head(20)

In [None]:
item_cats['subtype'] = split.map(lambda x: x[1].strip() if len(x) > 1 else x[0].strip())
item_cats['subtype_code'] = LabelEncoder().fit_transform(item_cats['subtype'])

In [None]:
item_cats = item_cats[["item_category_id" , "type_code" , "subtype_code"]]

### Preprocess items

In [None]:
items.head()

In [None]:
items = items.drop("item_name" , axis = 1)

### Watch item id and shop id pairs in train and test

### 1.Item id in test but not in train -> so set their target 0 in test

In [None]:
train.head()

In [None]:
len(train["item_id"].unique())

In [None]:
len(test["item_id"].unique())

In [None]:
train_arr = train["item_id"].unique()
test_arr = test["item_id"].unique()

In [None]:
print(len(train_arr))
print(len(test_arr))

In [None]:
count = 0
for i in (test_arr):
    if(i not in train_arr):
        count = count + 1

In [None]:
count

### Create matrix

In [None]:
from itertools import product

In [None]:
matrix = []
for i in range(0,34):
    sales_month = train[train["date_block_num"] == i]
    matrix.append(np.array(list(product([i] , sales_month["item_id"].unique() , sales_month["shop_id"].unique()))))

### Create all possible combinations of item id and shop id from train.

In [None]:
mat_df = pd.DataFrame(np.vstack(matrix) , columns = ["date_block_num" , "item_id" , "shop_id"])

In [None]:
mat_df["date_block_num"] = mat_df["date_block_num"].astype("int8")
mat_df["item_id"] = mat_df["item_id"].astype("int16")
mat_df["shop_id"] = mat_df["shop_id"].astype("int16")

In [None]:
mat_df.head()

In [None]:
mat_df.describe()

### Monthly sales in train and then merge it to matrix using dbc,item_id,shop_id

In [None]:
train_temp = train.groupby(["date_block_num" , "shop_id" , "item_id"] , as_index = False)["item_cnt_day"].sum()

In [None]:
train_temp.describe()

### Merge monthly sales on matrix

In [None]:
cols = ["date_block_num" , "shop_id" , "item_id"]

In [None]:
mat_df = pd.merge(mat_df , train_temp , on = cols , how = "left")

In [None]:
mat_df.info()

In [None]:
mat_df = mat_df.rename(index = str , columns = {"item_cnt_day" : "item_cnt_month"})

In [None]:
mat_df["item_cnt_month"] = mat_df["item_cnt_month"].clip(0.,20.)
mat_df["item_cnt_month"] = mat_df["item_cnt_month"].fillna(0.)
mat_df["item_cnt_month"] = mat_df["item_cnt_month"].astype("float16")

### Now append rows of test to matrix and fill its monthly sales with 0

In [None]:
mat_df.head()

In [None]:
test.head()

In [None]:
test['date_block_num'] = 34
test['date_block_num'] = test['date_block_num'].astype(np.int8)
test['shop_id'] = test['shop_id'].astype(np.int16)
test['item_id'] = test['item_id'].astype(np.int16)

In [None]:
cols

In [None]:
test.head()

In [None]:
mat_df = pd.concat([mat_df , test] , ignore_index = True , sort = False , keys = cols)

In [None]:
mat_df = mat_df.drop("ID" , axis = 1)

In [None]:
mat_df["item_cnt_month"] = mat_df["item_cnt_month"].fillna(0.)

In [None]:
mat_df.info()

In [None]:
mat_df[mat_df["shop_id"].isnull()]

### Concat shops , items , categories

In [None]:
mat_df = pd.merge(mat_df , shops , on = "shop_id" , how = "left")

In [None]:
mat_df = pd.merge(mat_df , items , on = "item_id" , how = "left")

In [None]:
mat_df = pd.merge(mat_df , item_cats , on = "item_category_id" , how = "left")

In [None]:
mat_df.info()

In [None]:
mat_df["city_code"] = mat_df["city_code"].astype("int8")
mat_df["item_category_id"] = mat_df["item_category_id"].astype("int8")
mat_df["type_code"] = mat_df["type_code"].astype("int8")
mat_df["subtype_code"] = mat_df["subtype_code"].astype("int8")

### Find lags features on train and merge it to matrix

In [None]:
train.head()

In [None]:
def findLag(df , months , lagcol):
    temp = df[["date_block_num" , "shop_id" , "item_id" , lagcol]]
    for i in months:
        temp2 = temp.copy()
        temp2["date_block_num"] += i
        temp2 = temp2.rename(index = str , columns = {lagcol : lagcol + str("_lag_") + str(i)})
        df = pd.merge(df , temp2 , on = ["date_block_num" , "shop_id" , "item_id"] , how = "left")
    return df

In [None]:
del matrix

In [None]:
mat_df[mat_df["date_block_num"] > 11].head()

In [None]:
mat_df.info()

In [None]:
mat_df = findLag(mat_df , [1,2,3,6] , "item_cnt_month")

### Mean Encoded Features

In [None]:
date_block_avg = mat_df.groupby(["date_block_num"])["item_cnt_month"].mean()
date_block_avg = date_block_avg.reset_index()
date_block_avg = date_block_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_avg"})
mat_df = pd.merge(mat_df , date_block_avg , on = "date_block_num" , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_avg")
mat_df = mat_df.drop("date_block_avg" , axis = 1)

In [None]:
#mat_df[(mat_df["date_block_num"] > 11)]

In [None]:
del date_block_avg

In [None]:
date_block_item_avg = mat_df.groupby(["date_block_num" , "item_id"])["item_cnt_month"].mean()
date_block_item_avg = date_block_item_avg.reset_index()
date_block_item_avg = date_block_item_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_item_avg"})
mat_df = pd.merge(mat_df , date_block_item_avg , on = ["date_block_num" , "item_id"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_item_avg")
mat_df = mat_df.drop("date_block_item_avg" , axis = 1)

In [None]:
del date_block_item_avg

In [None]:
#sdasd

In [None]:
date_block_shop_avg = mat_df.groupby(["date_block_num" , "shop_id"])["item_cnt_month"].mean()
date_block_shop_avg = date_block_shop_avg.reset_index()
date_block_shop_avg = date_block_shop_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_shop_avg"})
mat_df = pd.merge(mat_df , date_block_shop_avg , on = ["date_block_num" , "shop_id"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_shop_avg")
mat_df = mat_df.drop("date_block_shop_avg" , axis = 1)

In [None]:
del date_block_shop_avg

In [None]:
gc.collect()

In [None]:
date_block_item_cat_avg = mat_df.groupby(["date_block_num" , "item_category_id"])["item_cnt_month"].mean()
date_block_item_cat_avg = date_block_item_cat_avg.reset_index()
date_block_item_cat_avg = date_block_item_cat_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_item_cat_avg"})
mat_df = pd.merge(mat_df , date_block_item_cat_avg , on = ["date_block_num" , "item_category_id"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_item_cat_avg")
mat_df = mat_df.drop("date_block_item_cat_avg" , axis = 1)

In [None]:
del date_block_item_cat_avg

In [None]:
date_block_shop_item_cat_avg = mat_df.groupby(["date_block_num" , "shop_id" ,"item_category_id"])["item_cnt_month"].mean()
date_block_shop_item_cat_avg = date_block_shop_item_cat_avg.reset_index()
date_block_shop_item_cat_avg = date_block_shop_item_cat_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_shop_item_cat_avg"})
mat_df = pd.merge(mat_df , date_block_shop_item_cat_avg , on = ["date_block_num" , "shop_id" ,"item_category_id"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_shop_item_cat_avg")
mat_df = mat_df.drop("date_block_shop_item_cat_avg" , axis = 1)

In [None]:
del date_block_shop_item_cat_avg

In [None]:
date_block_shop_type_code_avg = mat_df.groupby(["date_block_num" , "shop_id" ,"type_code"])["item_cnt_month"].mean()
date_block_shop_type_code_avg = date_block_shop_type_code_avg.reset_index()
date_block_shop_type_code_avg = date_block_shop_type_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_shop_type_code_avg"})
mat_df = pd.merge(mat_df , date_block_shop_type_code_avg , on = ["date_block_num" , "shop_id" ,"type_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_shop_type_code_avg")
mat_df = mat_df.drop("date_block_shop_type_code_avg" , axis = 1)

In [None]:
del date_block_shop_type_code_avg

In [None]:
date_block_shop_subtype_code_avg = mat_df.groupby(["date_block_num" , "shop_id" ,"subtype_code"])["item_cnt_month"].mean()
date_block_shop_subtype_code_avg = date_block_shop_subtype_code_avg.reset_index()
date_block_shop_subtype_code_avg = date_block_shop_subtype_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_shop_subtype_code_avg"})
mat_df = pd.merge(mat_df , date_block_shop_subtype_code_avg , on = ["date_block_num" , "shop_id" ,"subtype_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_shop_subtype_code_avg")
mat_df = mat_df.drop("date_block_shop_subtype_code_avg" , axis = 1)

In [None]:
del date_block_shop_subtype_code_avg

In [None]:
gc.collect()

In [None]:
date_block_city_code_avg = mat_df.groupby(["date_block_num" , "city_code"])["item_cnt_month"].mean()
date_block_city_code_avg = date_block_city_code_avg.reset_index()
date_block_city_code_avg = date_block_city_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_city_code_avg"})
mat_df = pd.merge(mat_df , date_block_city_code_avg , on = ["date_block_num" , "city_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_city_code_avg")
mat_df = mat_df.drop("date_block_city_code_avg" , axis = 1)

In [None]:
del date_block_city_code_avg

In [None]:
date_block_item_city_code_avg = mat_df.groupby(["date_block_num" , "item_id" , "city_code"])["item_cnt_month"].mean()
date_block_item_city_code_avg = date_block_item_city_code_avg.reset_index()
date_block_item_city_code_avg = date_block_item_city_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_item_city_code_avg"})
mat_df = pd.merge(mat_df , date_block_item_city_code_avg , on = ["date_block_num" , "item_id" , "city_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_item_city_code_avg")
mat_df = mat_df.drop("date_block_item_city_code_avg" , axis = 1)

In [None]:
del date_block_item_city_code_avg

In [None]:
gc.collect()

In [None]:
date_block_type_code_avg = mat_df.groupby(["date_block_num" , "type_code"])["item_cnt_month"].mean()
date_block_type_code_avg = date_block_type_code_avg.reset_index()
date_block_type_code_avg = date_block_type_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_type_code_avg"})
mat_df = pd.merge(mat_df , date_block_type_code_avg , on = ["date_block_num" , "type_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_type_code_avg")
mat_df = mat_df.drop("date_block_type_code_avg" , axis = 1)

In [None]:
del date_block_type_code_avg

In [None]:
gc.collect()

In [None]:
date_block_subtype_code_avg = mat_df.groupby(["date_block_num" , "subtype_code"])["item_cnt_month"].mean()
date_block_subtype_code_avg = date_block_subtype_code_avg.reset_index()
date_block_subtype_code_avg = date_block_subtype_code_avg.rename(index = str , columns = {"item_cnt_month" : "date_block_subtype_code_avg"})
mat_df = pd.merge(mat_df , date_block_subtype_code_avg , on = ["date_block_num" , "subtype_code"] , how = "left")
mat_df = findLag(mat_df , [1] , "date_block_subtype_code_avg")
mat_df = mat_df.drop("date_block_subtype_code_avg" , axis = 1)

In [None]:
del date_block_subtype_code_avg
gc.collect()

### Trend Features

### 1.Revenue trend

#### It's (total in  month revenue - all months avg revenue) / all months avg revenue

In [None]:
mat_df[mat_df["date_block_num"] > 11].head()

In [None]:
train.head()

In [None]:
train["revenue"] = train["item_price"] * train["item_cnt_day"]

In [None]:
#Monthly total revenue of shop
total_in_month = train.groupby(["date_block_num" , "shop_id"])["revenue"].agg(sum)

In [None]:
total_in_month = total_in_month.reset_index()

In [None]:
total_in_month.head()

In [None]:
total_in_month = total_in_month.rename(index = str , columns = {"revenue" : "total_in_month_revenue"})

In [None]:
#Calculate shops avg revenue for whole period
all_months_avg = total_in_month.groupby(["shop_id"])["total_in_month_revenue"].mean()
all_months_avg = all_months_avg.reset_index()

In [None]:
all_months_avg.head()

In [None]:
all_months_avg = all_months_avg.rename(index = str , columns = {"total_in_month_revenue" : "all_months_avg"})

In [None]:
mat_df = pd.merge(mat_df , total_in_month , on = ["date_block_num" , "shop_id"] , how = "left")

In [None]:
mat_df[(mat_df["date_block_num"] > 11)].head()

In [None]:
#mat_df.head()

In [None]:
mat_df = pd.merge(mat_df , all_months_avg , on = ["shop_id"] , how = "left")

In [None]:
#mat_df = mat_df.drop(["total_in_month_revenue" , "all_months_avg"] , axis = 1)

In [None]:
mat_df["total_in_month_revenue"] = mat_df["total_in_month_revenue"].astype("float32")
mat_df["all_months_avg"] = mat_df["all_months_avg"].astype("float32")

In [None]:
mat_df["delta_revenue"] = (mat_df["total_in_month_revenue"] - mat_df["all_months_avg"]) / mat_df["all_months_avg"]

In [None]:
mat_df = findLag(mat_df , [1] , "delta_revenue")

In [None]:
mat_df = mat_df.drop(["total_in_month_revenue" , "all_months_avg" , "delta_revenue"] , axis = 1)

In [None]:
gc.collect()
del total_in_month , all_months_avg

In [None]:
#mat_df[(mat_df["date_block_num"] > 11)].head(30)

### Add special features

In [None]:
mat_df["month"] = mat_df["date_block_num"] % 12

In [None]:
days = pd.Series([31,30,31,30,31,30,31,31,30,31,30,31])
mat_df["total_days_in_month"] = mat_df["month"].map(days)
mat_df["total_days_in_month"] = mat_df["total_days_in_month"].astype("int8")

In [None]:
#mat_df.info()

### Months from last sale for item/shop pair and only for item.

In [None]:
gc.collect()

In [None]:
#mat_df[(mat_df["date_block_num"] > 11)]

In [None]:
#For item/shop pair

In [None]:
ts = time.time()
cache = {}
mat_df['item_shop_last_sale'] = -1
mat_df['item_shop_last_sale'] = mat_df['item_shop_last_sale'].astype(np.int8)
for idx, row in mat_df.iterrows():    
    key = str(row.item_id)+' '+str(row.shop_id)
    if key not in cache:
        if row.item_cnt_month!=0:
            cache[key] = row.date_block_num
    else:
        last_date_block_num = cache[key]
        mat_df.at[idx, 'item_shop_last_sale'] = row.date_block_num - last_date_block_num
        cache[key] = row.date_block_num         
time.time() - ts

In [None]:
ts = time.time()
cache = {}
mat_df['item_last_sale'] = -1
mat_df['item_last_sale'] = mat_df['item_last_sale'].astype(np.int8)
for idx, row in mat_df.iterrows():    
    key = row.item_id
    if key not in cache:
        if row.item_cnt_month!=0:
            cache[key] = row.date_block_num
    else:
        last_date_block_num = cache[key]
        if row.date_block_num>last_date_block_num:
            mat_df.at[idx, 'item_last_sale'] = row.date_block_num - last_date_block_num
            cache[key] = row.date_block_num         
time.time() - ts

### Months from first sale for item/shop and for item only

In [None]:
#mat_df.describe()

In [None]:
ts = time.time()
mat_df['item_shop_first_sale'] = mat_df['date_block_num'] - mat_df.groupby(['item_id','shop_id'])['date_block_num'].transform('min')
mat_df['item_first_sale'] = mat_df['date_block_num'] - mat_df.groupby('item_id')['date_block_num'].transform('min')
time.time() - ts

In [None]:
gc.collect()

### Remove first 12 months

In [None]:
mat_df = mat_df[mat_df.date_block_num > 11]

In [None]:
#mat_df.head()

### Fill nulls

### Understand where are nulls

In [None]:
gc.collect()

In [None]:
for col in mat_df.columns:
    if(mat_df[col].isnull().any()):
        print(col)

In [None]:
gc.collect()

In [None]:
#Null containing columns
"""item_cnt_month_lag_1
item_cnt_month_lag_2
item_cnt_month_lag_3
item_cnt_month_lag_6
date_block_avg_lag_1
date_block_item_avg_lag_1
date_block_shop_avg_lag_1
date_block_item_cat_avg_lag_1
date_block_shop_item_cat_avg_lag_1
date_block_shop_type_code_avg_lag_1
date_block_shop_subtype_code_avg_lag_1
date_block_city_code_avg_lag_1
date_block_item_city_code_avg_lag_1
date_block_type_code_avg_lag_1
date_block_subtype_code_avg_lag_1
delta_revenue_lag_1"""

In [None]:
"""temp =mat_df[["item_cnt_month_lag_1",
"item_cnt_month_lag_2",
"item_cnt_month_lag_3",
"item_cnt_month_lag_6",
"date_block_avg_lag_1",
"date_block_item_avg_lag_1",
"date_block_shop_avg_lag_1",
"date_block_item_cat_avg_lag_1",
"date_block_shop_item_cat_avg_lag_1",
"date_block_shop_type_code_avg_lag_1",
"date_block_shop_subtype_code_avg_lag_1",
"date_block_city_code_avg_lag_1",
"date_block_item_city_code_avg_lag_1",
"date_block_type_code_avg_lag_1",
"date_block_subtype_code_avg_lag_1",
"delta_revenue_lag_1"]]"""

In [None]:
#mat_df.head(50)

In [None]:
len(mat_df[mat_df["item_cnt_month_lag_1"].isnull()]) , len(mat_df)

In [None]:
def fill_na(df):
    for col in df.columns:
        if ('_lag_' in col) & (df[col].isnull().any() & (col != "delta_revenue_lag_1")):
            df[col].fillna(0, inplace=True)         
    return df

mat_df = fill_na(mat_df)

In [None]:
mat_df.to_pickle('data.pkl')

In [None]:
del mat_df
del items
del shops
del item_cats
del train

In [None]:
gc.collect()

In [None]:
data = pd.read_pickle('data.pkl')

In [None]:
#data = mat_df.copy()

In [None]:
X_train = data[data.date_block_num < 33].drop(['item_cnt_month'], axis=1)
Y_train = data[data.date_block_num < 33]['item_cnt_month']
X_valid = data[data.date_block_num == 33].drop(['item_cnt_month'], axis=1)
Y_valid = data[data.date_block_num == 33]['item_cnt_month']
X_test = data[data.date_block_num == 34].drop(['item_cnt_month'], axis=1)

In [None]:
del data
gc.collect()

In [None]:
#X_train.head()

In [None]:
del train_arr , test_arr , LabelEncoder , col,cols,count,days 

In [None]:
del i, sales_month , train_temp , ts

In [None]:
del split,product,time,sns,os,findLag , last_date_block_num , cache

In [None]:
gc.collect()

In [None]:
whos

In [None]:
gc.collect()

In [None]:
import time
import xgboost

ts = time.time()

model = xgboost.XGBRegressor(
    max_depth = 8,
    n_estimators=1000,      
    min_child_weight=300, 
    colsample_bytree=0.8, 
    subsample=0.8, 
    eta=0.3,    
    seed=42)

model.fit(
    X_train, 
    Y_train, 
    eval_metric="rmse", 
    eval_set=[(X_train, Y_train), (X_valid, Y_valid)], 
    verbose=True, 
    early_stopping_rounds = 1)

time.time() - ts

In [None]:
#data.head()

In [None]:
#model.best_iteration

In [None]:
#X_train.columns[12]

In [None]:
#xgboost.plot_importance(model)

In [None]:
dot_file = xgboost.to_graphviz(model)

In [None]:
model.save_model(fname="xgb_model_v7")

In [None]:
dot_file.save(filename="dot_file")

In [None]:
"""import pydot

(graph,) = pydot.graph_from_dot_file('somefile.dot')
graph.write_png('somefile.png')"""

In [None]:
#xgboost.plot_tree(model , num_trees = 2)

In [None]:
#pd.Series(model.feature_importances_).plot.bar()

In [None]:
submission = model.predict(X_test)

In [None]:
#submission

In [None]:
submission = pd.DataFrame({"ID" : test.index , "item_cnt_month" : submission})

In [None]:
submission.to_csv("final_v7.csv" , index = False)