# Librer√≠as

In [1]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import seaborn as sns

%matplotlib inline

# Predicciones en csv

In [2]:
preds_lightgbm = pd.read_csv('../predictions/week3/preds_lightgbm_pw.csv')
preds_catboost = pd.read_csv('../predictions/week3/preds_catboost_pw.csv')
preds_xgboost = pd.read_csv('../predictions/week3/preds_xgboost_pw.csv')

In [3]:
full_preds = preds_lightgbm.join(preds_catboost.set_index('Unnamed: 0'), on='Unnamed: 0')\
.join(preds_xgboost.set_index('Unnamed: 0'), on='Unnamed: 0')
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw
0,0,50,102,34
1,1,120,242,105
2,2,18,20,15
3,3,9,13,9
4,4,224,400,155
5,5,115,300,87
6,6,89,234,76
7,7,1,1,2
8,8,5,6,11
9,9,47,148,50


In [4]:
full_preds['media'] = (full_preds['predicciones_lightgbm_pw'] +
                       full_preds['predicciones_catboost_pw'] +
                       full_preds['predicciones_xgboost_pw'])/3
full_preds.head(10)

Unnamed: 0.1,Unnamed: 0,predicciones_lightgbm_pw,predicciones_catboost_pw,predicciones_xgboost_pw,media
0,0,50,102,34,62.0
1,1,120,242,105,155.666667
2,2,18,20,15,17.666667
3,3,9,13,9,10.333333
4,4,224,400,155,259.666667
5,5,115,300,87,167.333333
6,6,89,234,76,133.0
7,7,1,1,2,1.333333
8,8,5,6,11,7.333333
9,9,47,148,50,81.666667


In [5]:
y_preds = [int(round(x)) for x in full_preds['media'].values.tolist()]

In [6]:
y_preds[:20]

[62,
 156,
 18,
 10,
 260,
 167,
 133,
 1,
 7,
 82,
 0,
 52,
 4,
 1,
 244,
 391,
 167,
 42,
 15,
 238]

# Apuesta de bloque

In [7]:
product_blocks = pd.read_csv('../data/product_blocks.csv')
product_blocks.head(10)

Unnamed: 0,product_id,block_id
0,612967398,0
1,296892108,0
2,139541214,0
3,963923934,0
4,938230141,0
5,172045154,0
6,663552768,0
7,160621689,1
8,948976891,1
9,556017319,1


In [8]:
productos_por_bloque = product_blocks.groupby('block_id').count()['product_id']
productos_por_bloque.name = 'n_products'
productos_por_bloque.head()

block_id
0     7
1     7
2     7
3     6
4    10
Name: n_products, dtype: int64

In [9]:
product_blocks_n = product_blocks.join(productos_por_bloque, on='block_id', how='left')
product_blocks_n.head()

Unnamed: 0,product_id,block_id,n_products
0,612967398,0,7
1,296892108,0,7
2,139541214,0,7
3,963923934,0,7
4,938230141,0,7


In [11]:
test = pd.read_csv('../data/week3/final_test_pw.csv', usecols=['product_id', 'price'])

In [12]:
test.head()

Unnamed: 0,product_id,price
0,151926,25.95
1,213413,19.95
2,310130,12.95
3,455200,29.95
4,571044,15.95


In [13]:
predicciones = pd.DataFrame({'product_id': test.product_id,
                             'preds': y_preds,
                             'price':test.price,
                             'gain': y_preds * test.price})

predicciones = predicciones.sort_values('gain', ascending=False)
predicciones.head()

Unnamed: 0,product_id,preds,price,gain
4094,450385036,6058,19.95,120857.1
328,37119458,5252,22.95,120533.4
1020,112446414,3163,25.95,82079.85
7186,795471256,3042,22.95,69813.9
4226,466243134,3165,19.95,63141.75


In [14]:
predicciones.shape, test.shape

((8996, 4), (8996, 2))

In [15]:
predicciones_final = predicciones.join(product_blocks_n.set_index('product_id'),
                                       on='product_id', how='left').reset_index(drop=True)
predicciones_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products
0,450385036,6058,19.95,120857.1,2233,8
1,37119458,5252,22.95,120533.4,812,8
2,112446414,3163,25.95,82079.85,2306,3
3,795471256,3042,22.95,69813.9,1580,8
4,466243134,3165,19.95,63141.75,2000,5


In [16]:
group_block_gain = predicciones_final.groupby('block_id').sum().sort_values('gain', ascending=False)['gain']
group_block_gain.name = 'gain_per_block'

In [17]:
preds_final = predicciones_final.join(group_block_gain, on='block_id')
preds_final = preds_final.sort_values('gain_per_block', ascending=False)
preds_final = preds_final.reset_index(drop=True)

In [18]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,185022705,5,22.95,114.75,1580,8,129010.45
1,323423015,1443,29.95,43217.85,1580,8,129010.45
2,795471256,3042,22.95,69813.9,1580,8,129010.45
3,725672670,666,7.95,5294.7,1580,8,129010.45
4,842106369,55,29.95,1647.25,1580,8,129010.45


In [19]:
preds_final = preds_final.iloc[preds_final.block_id.drop_duplicates().index.values.tolist()]
preds_final = preds_final.sort_values('gain_per_block', ascending=False)

In [20]:
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
0,185022705,5,22.95,114.75,1580,8,129010.45
8,19599326,57,15.95,909.15,812,8,127505.05
12,921904870,36,22.95,826.2,2233,8,122435.85
15,146820915,470,25.95,12196.5,388,8,84092.85
20,112446414,3163,25.95,82079.85,2306,3,82079.85


In [21]:
preds_final[preds_final.block_id==560]

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block
259,640144857,122,29.95,3653.9,560,8,37066.9


In [22]:
preds_final['product_cumsum'] = preds_final.n_products.cumsum()
preds_final.head()

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,185022705,5,22.95,114.75,1580,8,129010.45,8
8,19599326,57,15.95,909.15,812,8,127505.05,16
12,921904870,36,22.95,826.2,2233,8,122435.85,24
15,146820915,470,25.95,12196.5,388,8,84092.85,32
20,112446414,3163,25.95,82079.85,2306,3,82079.85,35


In [23]:
bet_blocks = preds_final[preds_final.product_cumsum <= 100]
bet_blocks

Unnamed: 0,product_id,preds,price,gain,block_id,n_products,gain_per_block,product_cumsum
0,185022705,5,22.95,114.75,1580,8,129010.45,8
8,19599326,57,15.95,909.15,812,8,127505.05,16
12,921904870,36,22.95,826.2,2233,8,122435.85,24
15,146820915,470,25.95,12196.5,388,8,84092.85,32
20,112446414,3163,25.95,82079.85,2306,3,82079.85,35
21,582384770,1,39.95,39.95,2574,10,70875.1,45
30,699251225,11,45.95,505.45,1143,10,67033.65,55
39,195407591,8,29.95,239.6,2666,10,66941.25,65
47,519589876,67,25.95,1738.65,2000,5,65928.65,70
50,850667059,2519,19.95,50254.05,387,8,64019.3,78


In [24]:
preds_final[['block_id', 'gain_per_block', 'n_products', 'product_cumsum']].to_csv('../bets/week3_bets_pw.csv', index=None)

### Posibles bloques para la apuesta final:

In [25]:
bet_blocks.block_id

0      812
4     1580
12    1143
21     388
26     442
32    2735
37     487
44    2574
52     530
59    1747
Name: block_id, dtype: int64