In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import surprise
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise.prediction_algorithms.knns import KNNBasic

# Ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [3]:
limit_rows = 7000000
train = pd.read_csv("train_ver2.csv", nrows=limit_rows)
# test = pd.read_csv("test_ver2.csv", nrows=limit_rows)
data_desc = pd.read_csv("data_desc.csv")

In [4]:
train.head()

Unnamed: 0,fecha_dato,ncodpers,ind_empleado,pais_residencia,sexo,age,fecha_alta,ind_nuevo,antiguedad,indrel,...,ind_hip_fin_ult1,ind_plan_fin_ult1,ind_pres_fin_ult1,ind_reca_fin_ult1,ind_tjcr_fin_ult1,ind_valo_fin_ult1,ind_viv_fin_ult1,ind_nomina_ult1,ind_nom_pens_ult1,ind_recibo_ult1
0,2015-01-28,1375586,N,ES,H,35,2015-01-12,0.0,6,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
1,2015-01-28,1050611,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
2,2015-01-28,1050612,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
3,2015-01-28,1050613,N,ES,H,22,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0
4,2015-01-28,1050614,N,ES,V,23,2012-08-10,0.0,35,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0


In [5]:
train.isnull().mean() * 100

fecha_dato                0.000000
ncodpers                  0.000000
ind_empleado              0.396200
pais_residencia           0.396200
sexo                      0.396686
age                       0.000000
fecha_alta                0.396200
ind_nuevo                 0.396200
antiguedad                0.000000
indrel                    0.396200
ult_fec_cli_1t           99.843557
indrel_1mes               1.497200
tiprel_1mes               1.497200
indresi                   0.396200
indext                    0.396200
conyuemp                 99.985743
canal_entrada             1.703600
indfall                   0.396200
tipodom                   0.396200
cod_prov                  0.915571
nomprov                   0.915571
ind_actividad_cliente     0.396200
renta                    17.889371
segmento                  1.718914
ind_ahor_fin_ult1         0.000000
ind_aval_fin_ult1         0.000000
ind_cco_fin_ult1          0.000000
ind_cder_fin_ult1         0.000000
ind_cno_fin_ult1    

In [6]:
train.drop(columns=['ult_fec_cli_1t','conyuemp'], inplace=True)

In [7]:
pd.set_option("display.max_columns",1000)

ytrain_tmp = train.iloc[:, 22:]
xtrain_temp = train.iloc[:, :22]

X_train, X_test, y_train, y_test = train_test_split(xtrain_temp, ytrain_tmp, test_size=0.2, random_state=42)
train = X_train.join(y_train)
val = X_test.join(y_test)
# print(train.head)
print(train.shape)
print(val.shape)

(5600000, 46)
(1400000, 46)


In [8]:
# Convert one-hot encoded vectors to a single column

vec = train.iloc[:, 22:].idxmax(1)
encoder = LabelEncoder()
tar = encoder.fit_transform(vec)

train['service_opted'] = tar

# save memory
train['service_opted'] = train['service_opted'].astype('uint8')

train.head(10)

Unnamed: 0,fecha_dato,ncodpers,ind_empleado,pais_residencia,sexo,age,fecha_alta,ind_nuevo,antiguedad,indrel,indrel_1mes,tiprel_1mes,indresi,indext,canal_entrada,indfall,tipodom,cod_prov,nomprov,ind_actividad_cliente,renta,segmento,ind_ahor_fin_ult1,ind_aval_fin_ult1,ind_cco_fin_ult1,ind_cder_fin_ult1,ind_cno_fin_ult1,ind_ctju_fin_ult1,ind_ctma_fin_ult1,ind_ctop_fin_ult1,ind_ctpp_fin_ult1,ind_deco_fin_ult1,ind_deme_fin_ult1,ind_dela_fin_ult1,ind_ecue_fin_ult1,ind_fond_fin_ult1,ind_hip_fin_ult1,ind_plan_fin_ult1,ind_pres_fin_ult1,ind_reca_fin_ult1,ind_tjcr_fin_ult1,ind_valo_fin_ult1,ind_viv_fin_ult1,ind_nomina_ult1,ind_nom_pens_ult1,ind_recibo_ult1,service_opted
2283322,2015-04-28,1352329,N,ES,H,22,2014-11-11,0.0,8,1.0,1.0,I,S,S,KHE,N,1.0,18.0,GRANADA,1.0,115591.44,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1,2
3973547,2015-07-28,747260,N,ES,H,82,2008-02-11,0.0,89,1.0,1.0,I,S,N,KFC,N,1.0,24.0,LEON,0.0,50868.06,02 - PARTICULARES,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,2
3593144,2015-06-28,211080,N,ES,V,47,2000-12-26,0.0,175,1.0,1.0,A,S,N,KAT,N,1.0,28.0,MADRID,0.0,171506.19,02 - PARTICULARES,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0
6029823,2015-09-28,1022312,N,ES,V,24,2012-07-06,0.0,38,1.0,1.0,I,S,N,KHE,N,1.0,46.0,VALENCIA,0.0,165057.84,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,2
1116625,2015-02-28,967122,N,ES,V,25,2011-10-21,0.0,45,1.0,1.0,I,S,N,KHE,N,1.0,45.0,TOLEDO,0.0,37616.01,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,2
5822449,2015-09-28,265163,N,ES,V,42,2001-09-21,0.0,168,1.0,1.0,I,S,N,KAT,N,1.0,41.0,SEVILLA,0.0,36088.47,02 - PARTICULARES,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0
3317721,2015-06-28,1317359,N,ES,V,55,2014-09-25,0.0,10,1.0,1.0,A,S,S,KFC,N,1.0,12.0,CASTELLON,1.0,133955.07,01 - TOP,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0,0.0,0,12
2573810,2015-05-28,1086399,N,ES,H,32,2012-10-23,0.0,33,1.0,1.0,I,S,N,KHE,N,1.0,41.0,SEVILLA,0.0,87818.7,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,2
4836217,2015-08-28,486562,N,ES,V,56,2004-08-09,0.0,132,1.0,1.0,A,S,N,KAT,N,1.0,28.0,MADRID,1.0,193589.88,02 - PARTICULARES,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,7
2040007,2015-04-28,149346,N,ES,H,43,1999-11-15,0.0,188,1.0,1.0,A,S,N,KAT,N,1.0,28.0,MADRID,1.0,127356.0,02 - PARTICULARES,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,1,2


In [9]:
# user-item matrix, each entry indicates the number of times service opted by that user
user_item_matrix = pd.crosstab(index=train["ncodpers"], columns = encoder.transform(vec), values=1, aggfunc='sum')
user_item_matrix.fillna(0, inplace=True)
user_item_matrix

col_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
ncodpers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
15889,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15890,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15891,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15892,0.0,0.0,5.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15893,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495481,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1495482,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1495483,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1495484,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
from tqdm import tqdm

u_arr = np.array(user_item_matrix)

for row, item in tqdm(enumerate(u_arr)):
    for column, item_value in enumerate(item):
        # Change the count to ratio
        u_arr[row, column] = u_arr[row, column] / sum(item)

user_item_ratio_matrix = pd.DataFrame(u_arr, columns = user_item_matrix.columns, index = user_item_matrix.index)

# user_item_ratio_matrix

895674it [00:56, 15809.78it/s]


In [11]:
# Stack and get all values in single column
uir_stack = user_item_ratio_matrix.stack().to_frame()

uir_stack['ncodpers'] = [idx[0] for idx in uir_stack.index]
uir_stack['product_opted'] = [idx[1] for idx in uir_stack.index]

uir_stack.reset_index(drop=True, inplace=True)

uir_stack.rename(columns={0:"product_selection_ratio"}, inplace=True)


uir_stack = uir_stack[['ncodpers','product_opted', 'product_selection_ratio']]

# Drop all the rows with 0 entries(the user has never bought any product)
uir_stack.drop(uir_stack[uir_stack['product_selection_ratio']==0].index, inplace=True)

uir_stack.reset_index(drop=True, inplace=True)

uir_stack

Unnamed: 0,ncodpers,product_opted,product_selection_ratio
0,15889,2,1.000000
1,15890,4,1.000000
2,15891,0,0.500000
3,15891,2,0.666667
4,15892,2,0.555556
...,...,...,...
965517,1495481,0,1.000000
965518,1495482,0,1.000000
965519,1495483,0,1.000000
965520,1495484,0,1.000000


In [12]:
def generate_recommendation(uid, model):    
    rec = [(uid, sid, data_desc[data_desc['Column Name'] == encoder.inverse_transform([sid])[0]]['Description'].values[0], model.predict(uid,sid).est) for sid in range(24)]
    rec = pd.DataFrame(rec, columns=['uid', 'sid', 'product', 'pred'])
    
    rec.sort_values("pred", ascending=False, inplace=True)
    rec.reset_index(drop=True, inplace=True)
    
    return rec

In [13]:
user_to_remove = []

for index, row in tqdm(enumerate(user_item_matrix.values)):
    non_zeroes = np.count_nonzero(row)
    if non_zeroes < 3:
        user_to_remove.append(user_item_matrix.index[index])

895674it [00:01, 652841.66it/s]


In [14]:
# Fetch the index from the uir_stack where the user_to_remove exists
user_to_del = uir_stack[uir_stack['ncodpers'].isin(user_to_remove)].index

uir_stack_rem = uir_stack.drop(user_to_del, axis=0, inplace=False)

uir_stack_rem.head()

Unnamed: 0,ncodpers,product_opted,product_selection_ratio
34,15921,2,0.333333
35,15921,4,0.789474
36,15921,7,0.471074
245,16196,2,0.125
246,16196,4,0.842105


In [15]:
r = Reader(line_format='user item rating', sep=',', rating_scale=(0,1), skip_lines=1)

data_reduced = Dataset.load_from_df(uir_stack_rem, reader = r)
train_reduced = data_reduced.build_full_trainset()

In [16]:
#similarity options.
so = {'name': 'cosine', 'user_based': True}

# use KNN to find similar items
sim = KNNBasic(sim_options = so, verbose = True, random_state = 42)
sim_results = cross_validate(algo = sim, data = data_reduced, cv = 4)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [17]:
# Train the algorithm on the training data set
sim_user = KNNBasic(sim_options = so, verbose = False, random_state = 33)
sim_user.fit(train_reduced)

<surprise.prediction_algorithms.knns.KNNBasic at 0x7fece05860a0>

In [18]:
#checking validation data set, the provided test data set doesn't contain the labels
#therefore use validation as test data set

pd.set_option("display.max_columns",1000)
val.head(10)

Unnamed: 0,fecha_dato,ncodpers,ind_empleado,pais_residencia,sexo,age,fecha_alta,ind_nuevo,antiguedad,indrel,indrel_1mes,tiprel_1mes,indresi,indext,canal_entrada,indfall,tipodom,cod_prov,nomprov,ind_actividad_cliente,renta,segmento,ind_ahor_fin_ult1,ind_aval_fin_ult1,ind_cco_fin_ult1,ind_cder_fin_ult1,ind_cno_fin_ult1,ind_ctju_fin_ult1,ind_ctma_fin_ult1,ind_ctop_fin_ult1,ind_ctpp_fin_ult1,ind_deco_fin_ult1,ind_deme_fin_ult1,ind_dela_fin_ult1,ind_ecue_fin_ult1,ind_fond_fin_ult1,ind_hip_fin_ult1,ind_plan_fin_ult1,ind_pres_fin_ult1,ind_reca_fin_ult1,ind_tjcr_fin_ult1,ind_valo_fin_ult1,ind_viv_fin_ult1,ind_nomina_ult1,ind_nom_pens_ult1,ind_recibo_ult1
949791,2015-02-28,1212414,N,ES,H,23,2013-10-31,0.0,20,1.0,1.0,I,S,N,KHE,N,1.0,41.0,SEVILLA,0.0,82737.18,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
6286578,2015-09-28,1427647,N,ES,H,21,2015-08-04,1.0,1,1.0,1.0,A,S,N,KHQ,N,1.0,8.0,BARCELONA,0.0,272585.82,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
1611109,2015-03-28,1161393,N,ES,H,22,2013-08-09,0.0,23,1.0,1.0,I,S,N,KHE,N,1.0,8.0,BARCELONA,0.0,118543.65,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
5864025,2015-09-28,1302249,N,ES,H,21,2014-09-04,0.0,12,1.0,1.0,I,S,N,KHE,N,1.0,30.0,MURCIA,0.0,,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
6815174,2015-10-28,277860,N,ES,H,55,2001-10-02,0.0,169,1.0,1.0,I,S,N,KAT,N,1.0,45.0,TOLEDO,0.0,60783.39,02 - PARTICULARES,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
4095329,2015-07-28,105889,N,ES,V,47,1998-09-23,0.0,202,1.0,1.0,A,S,N,KAT,N,1.0,28.0,MADRID,1.0,337141.32,02 - PARTICULARES,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0,0.0,0
4858378,2015-08-28,710387,N,ES,V,41,2007-08-09,0.0,96,1.0,1.0,I,S,N,KFC,N,1.0,20.0,GIPUZKOA,0.0,,02 - PARTICULARES,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
6248087,2015-09-28,1363014,N,ES,H,33,2014-11-28,0.0,10,1.0,1.0,A,S,N,KHE,N,1.0,41.0,SEVILLA,1.0,114808.29,03 - UNIVERSITARIO,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
3476620,2015-06-28,297098,N,ES,V,72,2001-11-16,0.0,164,1.0,1.0,I,S,N,KAT,N,1.0,28.0,MADRID,0.0,184076.58,02 - PARTICULARES,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0
3999058,2015-07-28,824661,N,ES,V,68,2012-11-19,0.0,32,1.0,1.0,A,S,N,KAT,N,1.0,48.0,BIZKAIA,1.0,,02 - PARTICULARES,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0


In [19]:
#convert to english names for better undersanding
eng_names = {'ind_ahor_fin_ult1': 'Saving Account', 'ind_aval_fin_ult1': 'Guarantees','ind_cco_fin_ult1':'Current Accounts', 
             'ind_cder_fin_ult1':'Derivada Account','ind_cno_fin_ult1':'Payroll Account', 'ind_ctju_fin_ult1':'	Junior Account',
            'ind_ctma_fin_ult1':'Más particular Account','ind_ctop_fin_ult1':'particular Account','ind_ctpp_fin_ult1':'particular Plus Account',
            'ind_deco_fin_ult1':'Short-term deposits','ind_deme_fin_ult1':'Medium-term deposits','ind_dela_fin_ult1':'Long-term deposits',
            'ind_ecue_fin_ult1':'e-account','ind_fond_fin_ult1':'Funds','ind_hip_fin_ult1':'Mortgage',
            'ind_plan_fin_ult1':'Pensions-fin','ind_pres_fin_ult1':'Loans','ind_reca_fin_ult1':'Taxes',
            'ind_tjcr_fin_ult1':'Credit Card','ind_valo_fin_ult1':'Securities','ind_viv_fin_ult1':'Home Account',
            'ind_nomina_ult1':'Payroll','ind_nom_pens_ult1':'Pensions-ult1','ind_recibo_ult1':'Direct Debit'}

y_test_eng = y_test.rename(columns = eng_names)

In [20]:
val_eng = X_test.join(y_test_eng)

In [21]:
#process test dataset, limiting to a subset of 100000 for faster processing
y_test_eng_sub = y_test_eng[:100000]
new_df = pd.DataFrame(columns=['uid', 'true_product'])

# Iterate over the index of ytest and add products to new_df where their value is 1
for i in y_test_eng_sub.index:
    product_names = []
    for col in y_test_eng_sub.columns:
        cell = y_test_eng_sub.loc[i, col]
        if isinstance(cell, (list, tuple, set, pd.core.series.Series)):
            if not np.array_equal(cell, np.array([0, 0])):
                product_names.append(col)
        else:
            if cell == 1:
                product_names.append(col)
#     print(product_names)
    if product_names:
        new_row = {'uid': i, 'true_product': ','.join(product_names)}
        new_df = new_df.append(new_row, ignore_index=True)

print(new_df)

           uid                                       true_product
0       949791                                   Current Accounts
1      6286578                                   Current Accounts
2      1611109                                   Current Accounts
3      5864025                                   Current Accounts
4      6815174                                 particular Account
...        ...                                                ...
87508  5875611                                   Current Accounts
87509  5874173                                   Current Accounts
87510  2773863                                   Current Accounts
87511  6165216                                   Current Accounts
87512  6790715  Current Accounts,particular Account,particular...

[87513 rows x 2 columns]


In [22]:
#process prediction dataset
prediction = pd.DataFrame(columns=['uid', 'predicted_product'])

for i in y_test_eng_sub.index:
    predicted = generate_recommendation(uid = i, model = sim_user)
    product_names = []
    if predicted['pred'].nunique() == 1:
        product_names = predicted['product'].tolist()
    else:
        product_names = predicted.head(7)['product'].tolist()

    if product_names:
        new_row = {'uid': i, 'predicted_product': ','.join(product_names)}
        prediction = prediction.append(new_row, ignore_index=True)

print(prediction.head())

       uid                                  predicted_product
0   949791  Saving Account,Guarantees,Securities,Credit Ca...
1  6286578  Saving Account,Guarantees,Securities,Credit Ca...
2  1611109  Saving Account,Guarantees,Securities,Credit Ca...
3  5864025  Saving Account,Guarantees,Securities,Credit Ca...
4  6815174  Saving Account,Guarantees,Securities,Credit Ca...


In [23]:
merged_df = pd.merge(prediction, new_df, on='uid')
merged_df['precision'] = merged_df.apply(lambda row: len(set(row['true_product']).intersection(set(row['predicted_product'][:7]))) / min(len(row['true_product']), 7), axis=1)
mean_avg_precision_at_7 = merged_df.groupby('uid')['precision'].mean().mean()


In [24]:
merged_df.tail(10)

Unnamed: 0,uid,predicted_product,true_product,precision
87503,1728196,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87504,941078,"Saving Account,Guarantees,Securities,Credit Ca...",particular Account,0.571429
87505,6015044,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87506,1726880,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87507,4198118,"Saving Account,Guarantees,Securities,Credit Ca...","Payroll Account,particular Plus Account,Long-t...",0.714286
87508,5875611,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87509,5874173,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87510,2773863,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87511,6165216,"Saving Account,Guarantees,Securities,Credit Ca...",Current Accounts,0.285714
87512,6790715,"Saving Account,Guarantees,Securities,Credit Ca...","Current Accounts,particular Account,particular...",0.571429


In [25]:
#MAP@7 on 100000
mean_avg_precision_at_7

0.40641178208614526