## **Predicting purchase probabilities**

### **Loading the data**

In [1]:
import pandas as pd
import numpy as np

from classes import Helper, Purchase_Probabilities

In [2]:
DATA_PATH = '/Users/leonhagel/Documents/coding/github/mlim-g2/data/'
EXPORT_PATH = '/Users/leonhagel/Documents/coding/github/mlim-g2/export/'

In [3]:
files = {
    'data': [
        DATA_PATH + 'baskets.parquet',
        DATA_PATH + 'coupon_index.parquet', 
        DATA_PATH + 'coupons.parquet', 
        DATA_PATH + 'merged.parquet'
    ]
}

In [4]:
purchase = Purchase_Probabilities()
purchase.load(files)

# reduce file sizes
for name, df in purchase.data.items():
    purchase.data[name] = purchase.reduce_data_size(purchase.data[name])

<br>

### **Mappings**

**Price map and product histories**

In [5]:
purchase.data['clean'] = purchase.clean()

In [6]:
df_prices = purchase.data['merged'].copy()
df_prices = df_prices.loc[df_prices['price'].notna(), :]
df_prices.describe().round(2)

Unnamed: 0,week,shopper,product,price,discount
count,68841598.0,68841598.0,68841598.0,68841598.0,2708043.0
mean,44.5,50006.96,124.96,584.9,27.29
std,25.98,28881.66,69.84,97.75,9.78
min,0.0,0.0,0.0,234.0,10.0
25%,22.0,24981.0,66.0,506.0,20.0
50%,44.0,50019.0,123.0,579.0,30.0
75%,67.0,75041.0,189.0,654.0,35.0
max,89.0,99999.0,249.0,837.0,40.0


In [7]:
df_purchased = purchase.data['clean'].copy()
df_purchased = df_purchased.loc[df_purchased['purchased'] == 1, :]
df_purchased.describe().round(2)

Unnamed: 0,week,shopper,product,price,discount,purchased
count,1378720.0,1378720.0,1378720.0,1378720.0,1378720.0,1378720.0
mean,44.48,997.21,125.07,590.73,0.01,1.0
std,25.98,576.25,69.7,92.7,0.06,0.0
min,0.0,0.0,0.0,389.23,0.0,1.0
25%,22.0,499.0,66.0,510.0,0.0,1.0
50%,44.0,993.0,123.0,582.0,0.0,1.0
75%,67.0,1496.0,189.0,657.0,0.0,1.0
max,89.0,1999.0,249.0,837.65,0.4,1.0


In [8]:
map_config = {
    'product_histories': {'df': df_purchased, 'row_name': 'shopper', 'column_name': 'product', 'value_name': 'week', 'initial_array': [-np.inf]},
    'prices': {'df': df_prices, 'row_name': 'week', 'column_name': 'product', 'value_name': 'price', 'initial_array': []}
    #'prices': {'df': df_purchased, 'row_name': 'week', 'column_name': 'product', 'value_name': 'price', 'initial_array': []}
}

purchase.get_mappings(map_config)

100%|██████████| 1378720/1378720 [00:42<00:00, 32370.43it/s]
100%|██████████| 68841598/68841598 [37:49<00:00, 30331.53it/s]  


In [9]:
purchase.mappings['product_histories'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,240,241,242,243,244,245,246,247,248,249
0,[-inf],[-inf],[-inf],[-inf],"[-inf, 24.0, 40.0, 45.0, 51.0, 54.0, 58.0, 60....",[-inf],[-inf],[-inf],[-inf],[-inf],...,"[-inf, 27.0]",[-inf],[-inf],"[-inf, 44.0]","[-inf, 5.0, 8.0, 20.0, 37.0, 38.0, 56.0, 59.0]",[-inf],[-inf],[-inf],[-inf],"[-inf, 10.0, 26.0, 28.0, 32.0, 35.0, 40.0, 65...."
1,"[-inf, 7.0, 11.0, 48.0, 61.0, 66.0, 81.0]","[-inf, 2.0, 6.0, 18.0, 20.0, 27.0, 29.0, 49.0,...",[-inf],[-inf],"[-inf, 5.0, 13.0, 21.0, 23.0, 30.0, 47.0, 69.0...",[-inf],[-inf],[-inf],[-inf],[-inf],...,[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],"[-inf, 4.0, 19.0, 22.0, 35.0, 79.0, 88.0]"
2,[-inf],"[-inf, 2.0, 3.0, 6.0, 7.0, 8.0, 9.0, 11.0, 13....",[-inf],[-inf],[-inf],[-inf],"[-inf, 5.0, 10.0, 15.0, 32.0, 54.0]",[-inf],[-inf],"[-inf, 1.0, 17.0]",...,"[-inf, 4.0, 5.0, 16.0, 18.0, 21.0, 26.0, 33.0,...",[-inf],[-inf],[-inf],[-inf],"[-inf, 75.0]",[-inf],[-inf],"[-inf, 3.0, 14.0, 20.0, 42.0, 45.0, 50.0, 51.0...","[-inf, 0.0, 23.0, 30.0, 31.0, 39.0]"
3,[-inf],[-inf],[-inf],[-inf],"[-inf, 7.0, 19.0, 27.0]",[-inf],"[-inf, 0.0, 13.0, 16.0, 18.0, 20.0, 23.0, 30.0...","[-inf, 11.0, 29.0, 38.0, 47.0, 69.0, 85.0]",[-inf],[-inf],...,[-inf],[-inf],[-inf],"[-inf, 57.0]",[-inf],"[-inf, 14.0, 87.0]",[-inf],[-inf],"[-inf, 2.0, 5.0, 10.0, 23.0, 38.0, 44.0, 47.0,...","[-inf, 16.0, 51.0, 71.0]"
4,[-inf],[-inf],"[-inf, 6.0, 15.0, 21.0, 22.0, 27.0, 28.0, 58.0...",[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],...,[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf],[-inf]


In [10]:
purchase.mappings['prices'].head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,240,241,242,243,244,245,246,247,248,249
0,"[688.0, 688.0, 688.0, 688.0, 688.0, 688.0, 688...","[560.0, 560.0, 560.0, 560.0, 560.0, 560.0, 560...","[773.0, 773.0, 773.0, 773.0, 773.0, 773.0, 773...","[722.0, 722.0, 541.0, 722.0, 722.0, 722.0, 722...","[620.0, 620.0, 620.0, 620.0, 620.0, 620.0, 620...","[721.0, 721.0, 721.0, 721.0, 721.0, 721.0, 721...","[568.0, 568.0, 568.0, 568.0, 568.0, 568.0, 426...","[772.0, 579.0, 772.0, 772.0, 772.0, 772.0, 772...","[669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669...","[759.0, 759.0, 759.0, 759.0, 759.0, 759.0, 759...",...,"[528.0, 528.0, 528.0, 528.0, 528.0, 528.0, 528...","[699.0, 699.0, 699.0, 699.0, 699.0, 454.0, 699...","[511.0, 511.0, 511.0, 511.0, 511.0, 511.0, 511...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[694.0, 694.0, 694.0, 694.0, 694.0, 694.0, 694...","[439.0, 549.0, 549.0, 549.0, 549.0, 549.0, 549...","[702.0, 702.0, 702.0, 702.0, 702.0, 702.0, 702...","[670.0, 670.0, 670.0, 670.0, 670.0, 670.0, 670...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[499.0, 499.0, 499.0, 499.0, 499.0, 499.0, 499..."
1,"[688.0, 688.0, 688.0, 688.0, 688.0, 688.0, 688...","[560.0, 560.0, 560.0, 560.0, 560.0, 560.0, 560...","[503.0, 773.0, 773.0, 773.0, 773.0, 773.0, 773...","[722.0, 722.0, 722.0, 722.0, 722.0, 722.0, 722...","[620.0, 620.0, 372.0, 620.0, 620.0, 620.0, 620...","[721.0, 721.0, 721.0, 721.0, 433.0, 721.0, 721...","[568.0, 568.0, 568.0, 568.0, 568.0, 568.0, 568...","[772.0, 772.0, 772.0, 772.0, 772.0, 772.0, 772...","[669.0, 669.0, 434.0, 669.0, 669.0, 669.0, 669...","[759.0, 759.0, 759.0, 759.0, 759.0, 759.0, 759...",...,"[528.0, 528.0, 528.0, 528.0, 528.0, 528.0, 528...","[699.0, 699.0, 699.0, 699.0, 699.0, 699.0, 699...","[511.0, 511.0, 511.0, 511.0, 511.0, 511.0, 511...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[694.0, 694.0, 694.0, 694.0, 694.0, 694.0, 694...","[549.0, 549.0, 549.0, 549.0, 549.0, 549.0, 549...","[702.0, 702.0, 702.0, 702.0, 702.0, 702.0, 702...","[670.0, 670.0, 670.0, 670.0, 670.0, 670.0, 670...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[499.0, 499.0, 499.0, 499.0, 499.0, 499.0, 499..."
2,"[688.0, 688.0, 688.0, 688.0, 688.0, 688.0, 688...","[560.0, 560.0, 560.0, 560.0, 560.0, 560.0, 560...","[773.0, 773.0, 773.0, 773.0, 773.0, 541.0, 773...","[722.0, 722.0, 722.0, 722.0, 722.0, 722.0, 722...","[620.0, 620.0, 620.0, 620.0, 620.0, 620.0, 620...","[721.0, 505.0, 721.0, 721.0, 721.0, 721.0, 721...","[568.0, 568.0, 568.0, 568.0, 568.0, 568.0, 568...","[772.0, 502.0, 772.0, 772.0, 772.0, 772.0, 772...","[468.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669...","[759.0, 759.0, 569.0, 759.0, 759.0, 759.0, 759...",...,"[528.0, 528.0, 528.0, 528.0, 369.0, 528.0, 528...","[699.0, 699.0, 699.0, 699.0, 699.0, 699.0, 699...","[511.0, 511.0, 511.0, 511.0, 511.0, 511.0, 511...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[694.0, 694.0, 694.0, 694.0, 694.0, 694.0, 694...","[549.0, 549.0, 549.0, 549.0, 549.0, 549.0, 549...","[702.0, 702.0, 702.0, 702.0, 702.0, 702.0, 702...","[670.0, 670.0, 670.0, 670.0, 670.0, 670.0, 670...","[490.0, 294.0, 490.0, 490.0, 490.0, 490.0, 490...","[499.0, 499.0, 349.0, 449.0, 499.0, 374.0, 499..."
3,"[688.0, 688.0, 688.0, 688.0, 688.0, 447.0, 688...","[560.0, 560.0, 560.0, 560.0, 560.0, 560.0, 560...","[773.0, 773.0, 773.0, 773.0, 773.0, 773.0, 773...","[722.0, 722.0, 722.0, 722.0, 650.0, 722.0, 722...","[620.0, 620.0, 620.0, 620.0, 620.0, 620.0, 620...","[721.0, 721.0, 721.0, 721.0, 721.0, 721.0, 721...","[568.0, 568.0, 568.0, 568.0, 568.0, 568.0, 568...","[772.0, 772.0, 772.0, 656.0, 772.0, 772.0, 772...","[669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669...","[759.0, 759.0, 759.0, 759.0, 759.0, 759.0, 759...",...,"[528.0, 475.0, 528.0, 449.0, 528.0, 528.0, 528...","[699.0, 699.0, 699.0, 699.0, 699.0, 699.0, 699...","[511.0, 511.0, 511.0, 511.0, 511.0, 511.0, 511...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[694.0, 694.0, 694.0, 694.0, 694.0, 694.0, 694...","[549.0, 549.0, 549.0, 549.0, 549.0, 549.0, 549...","[702.0, 702.0, 702.0, 702.0, 702.0, 702.0, 702...","[670.0, 670.0, 670.0, 670.0, 670.0, 670.0, 670...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[499.0, 499.0, 499.0, 499.0, 499.0, 499.0, 499..."
4,"[688.0, 688.0, 688.0, 688.0, 688.0, 688.0, 688...","[560.0, 560.0, 560.0, 560.0, 560.0, 560.0, 560...","[773.0, 773.0, 773.0, 773.0, 773.0, 773.0, 773...","[722.0, 722.0, 722.0, 722.0, 722.0, 722.0, 722...","[620.0, 620.0, 620.0, 620.0, 620.0, 620.0, 620...","[613.0, 721.0, 721.0, 721.0, 721.0, 721.0, 721...","[568.0, 568.0, 568.0, 568.0, 568.0, 568.0, 568...","[772.0, 772.0, 772.0, 772.0, 772.0, 772.0, 772...","[669.0, 669.0, 669.0, 669.0, 669.0, 669.0, 669...","[759.0, 759.0, 759.0, 759.0, 759.0, 759.0, 759...",...,"[528.0, 528.0, 449.0, 528.0, 528.0, 528.0, 528...","[699.0, 699.0, 699.0, 699.0, 699.0, 699.0, 419...","[511.0, 511.0, 358.0, 511.0, 511.0, 511.0, 511...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[694.0, 694.0, 694.0, 694.0, 694.0, 694.0, 694...","[549.0, 549.0, 549.0, 549.0, 549.0, 549.0, 549...","[702.0, 702.0, 702.0, 702.0, 702.0, 702.0, 702...","[603.0, 670.0, 402.0, 670.0, 670.0, 670.0, 670...","[490.0, 490.0, 490.0, 490.0, 490.0, 490.0, 490...","[499.0, 499.0, 499.0, 499.0, 499.0, 499.0, 499..."


In [11]:
purchase.dump(EXPORT_PATH, which='mappings')