In [2]:
import os
import itertools
import time
import json
import tqdm

import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from pathlib import Path
%matplotlib inline

from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, 
                              GradientBoostingClassifier, ExtraTreesClassifier)
from sklearn.model_selection import KFold, train_test_split

# Feature scaling, required for non-tree-based models
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from scipy.stats.mstats import winsorize

# Encoding categorical data for non-tree-based models
from sklearn.preprocessing import OneHotEncoder


In [5]:
mean_2to3 = pd.read_csv('../result/mean_submission_2to3.csv', low_memory = True, thousands = ',')
mean_30 = pd.read_csv('../result/mean_submission_30.csv', low_memory = True, thousands = ',')
mean_40 = pd.read_csv('../result/mean_submission_40.csv', low_memory = True, thousands = ',')
mean_mul = pd.read_csv('../result/mean_submission_mul.csv', low_memory = True, thousands = ',')
mean_52 = pd.read_csv('../result/mean_submission_52.csv', low_memory = True, thousands = ',')
mean_60 = pd.read_csv('../result/mean_submission_60.csv', low_memory = True, thousands = ',')
mean_all = pd.read_csv('../result/mean_submission_all.csv', low_memory = True, thousands = ',')
mean_may = pd.read_csv('../result/mean_submission_may.csv', low_memory = True, thousands = ',')

In [13]:
mean_52_div_mul = pd.concat((mean_52.rename(columns={"week1": "52_week1", "week2": "52_week2","week3": "52_week3",
                                                               "week4": "52_week4","week5": "52_week5"}), mean_mul), axis = 1)

In [15]:
mean_52_div_mul

Unnamed: 0,sku_id,52_week1,52_week2,52_week3,52_week4,52_week5,sku_id.1,week1,week2,week3,week4,week5
0,SKDtK67I,1.076923,1.076923,1.076923,1.076923,1.076923,SKDtK67I,1.766154,1.703077,1.829231,1.892308,1.923846
1,SKpLKkIS,0.673077,0.673077,0.673077,0.673077,0.673077,SKpLKkIS,1.313846,1.266923,1.360769,1.407692,1.431154
2,SK2vQMpX,9.423077,9.423077,9.423077,9.423077,9.423077,SK2vQMpX,11.113846,10.716923,11.510769,11.907692,12.106154
3,SKY0RuBE,1.884615,1.884615,1.884615,1.884615,1.884615,SKY0RuBE,2.670769,2.575385,2.766154,2.861538,2.909231
4,SKvr2o3y,0.403846,0.403846,0.403846,0.403846,0.403846,SKvr2o3y,1.012308,0.976154,1.048462,1.084615,1.102692
5,SK4VKk3b,0.807692,0.807692,0.807692,0.807692,0.807692,SK4VKk3b,1.464615,1.412308,1.516923,1.569231,1.595385
6,SKDbZjmX,1.211538,1.211538,1.211538,1.211538,1.211538,SKDbZjmX,1.916923,1.848462,1.985385,2.053846,2.088077
7,SK9wkGUJ,0.403846,0.403846,0.403846,0.403846,0.403846,SK9wkGUJ,1.012308,0.976154,1.048462,1.084615,1.102692
8,SKjQriLy,0.403846,0.403846,0.403846,0.403846,0.403846,SKjQriLy,1.012308,0.976154,1.048462,1.084615,1.102692
9,SKGy1eQD,1.884615,1.884615,1.884615,1.884615,1.884615,SKGy1eQD,2.670769,2.575385,2.766154,2.861538,2.909231


In [19]:
mean_52_div_2to3 = pd.concat((mean_52.rename(columns={"week1": "52_week1", "week2": "52_week2","week3": "52_week3",
                                                               "week4": "52_week4","week5": "52_week5"}), mean_2to3), axis = 1)
mean_52_div_2to3

Unnamed: 0,sku_id,52_week1,52_week2,52_week3,52_week4,52_week5,sku_id.1,week1,week2,week3,week4,week5
0,SKDtK67I,1.076923,1.076923,1.076923,1.076923,1.076923,SK013i5Y,0.636364,0.636364,0.636364,0.636364,0.636364
1,SKpLKkIS,0.673077,0.673077,0.673077,0.673077,0.673077,SK013PAq,0.477273,0.477273,0.477273,0.477273,0.477273
2,SK2vQMpX,9.423077,9.423077,9.423077,9.423077,9.423077,SK014m67,1.272727,1.272727,1.272727,1.272727,1.272727
3,SKY0RuBE,1.884615,1.884615,1.884615,1.884615,1.884615,SK016iX3,0.159091,0.159091,0.159091,0.159091,0.159091
4,SKvr2o3y,0.403846,0.403846,0.403846,0.403846,0.403846,SK019vAh,0.954545,0.954545,0.954545,0.954545,0.954545
5,SK4VKk3b,0.807692,0.807692,0.807692,0.807692,0.807692,SK01aOV9,0.954545,0.954545,0.954545,0.954545,0.954545
6,SKDbZjmX,1.211538,1.211538,1.211538,1.211538,1.211538,SK01GCgB,0.954545,0.954545,0.954545,0.954545,0.954545
7,SK9wkGUJ,0.403846,0.403846,0.403846,0.403846,0.403846,SK01giMk,2.068182,2.068182,2.068182,2.068182,2.068182
8,SKjQriLy,0.403846,0.403846,0.403846,0.403846,0.403846,SK01GLtA,2.704545,2.704545,2.704545,2.704545,2.704545
9,SKGy1eQD,1.884615,1.884615,1.884615,1.884615,1.884615,SK01hQVy,0.318182,0.318182,0.318182,0.318182,0.318182


In [11]:
row_number = 104510
# (mean_52_div_2to3['week1'] / mean_52_div_2to3['52_week1']).sum()

In [23]:
(mean_52_div_2to3['week2'] / mean_52_div_2to3['52_week2']).sum()

704874.72397879534

In [24]:
(mean_52_div_2to3['week3'] / mean_52_div_2to3['52_week3']).sum()

704874.72397879534

In [None]:
(mean_52_div_2to3['week1'] / mean_52_div_2to3['52_week1']).sum()

In [None]:
(mean_52_div_2to3['week1'] / mean_52_div_2to3['52_week1']).sum()

In [3]:
mean_WL = pd.read_csv('../result/mean_submission_WL.csv', low_memory = True, thousands = ',')

In [18]:
mean_52_div_WL = pd.concat((mean_52.rename(columns={"week1": "52_week1", "week2": "52_week2","week3": "52_week3",
                                                               "week4": "52_week4","week5": "52_week5"}), mean_WL), axis = 1)
(mean_52_div_WL['week5'] / (mean_52_div_WL['52_week5'] - 0.5) ).sum() / row_number

0.96863820686519586

In [25]:
# mean_52_div_2to3 = pd.concat((mean_52.rename(columns={"week1": "52_week1", "week2": "52_week2","week3": "52_week3",
#                                                                "week4": "52_week4","week5": "52_week5"}), mean_2to3), axis = 1)
# (mean_52_div_2to3['week1'] / mean_52_div_2to3['52_week1']).sum()

704874.72397879534

In [27]:
(mean_52_div_2to3['week1']).sum() / (mean_52_div_2to3['52_week1']).sum()

0.9944557550867571

In [30]:
(mean_52['week1']).sum() / (mean_30['week1']).sum()

1.0053743368726615

In [33]:
(mean_52['week1']).sum() / (mean_40['week1']).sum()

1.0052302735634981

In [35]:
(mean_52['week1']).sum() / (mean_60['week1']).sum()

1.0085465049222755

In [37]:
(mean_52['week1']).sum() / (mean_mul['week1']).sum()

0.71871075002474683

In [39]:
(mean_52['week1']).sum() / (mean_all['week1']).sum()

0.96994504977131379

In [40]:
(mean_52['week1']).sum() / (mean_may['week1']).sum()

1.3530115904102211

In [45]:
mean_52_div_mul = pd.concat((mean_52.rename(columns={"week1": "52_week1", "week2": "52_week2","week3": "52_week3",
(mean_52_div_mul['week1'] / (mean_52_div_mul['52_week1'] - 0.5)).sum() / row_number

1.1106416804917596