### Imports

In [62]:
import pandas as pd
import gensim
import gensim.downloader
import numpy as np
import tensorflow as tf
import random

### Load data

In [63]:
train_customers = pd.read_csv("../data/train_customers.csv")
test_customers = pd.read_csv("../data/test_customers.csv")

In [64]:
train_customers = list(train_customers['akeed_customer_id'])

In [65]:
test_customers = list(test_customers['akeed_customer_id'])

In [66]:
orders = pd.read_csv("../data/orders.csv")
vendors = pd.read_csv("../data/vendors.csv")

  interactivity=interactivity, compiler=compiler, result=result)


### Data Filtering

In [67]:

orders = orders[["customer_id", "vendor_id"]]

In [68]:
orders

Unnamed: 0,customer_id,vendor_id
0,92PEE24,105
1,QS68UD8,294
2,MB7VY5F,83
3,KDJ951Y,90
4,BAL0RVT,83
...,...,...
135298,L6LFY6H,67
135299,2GMMVGI,79
135300,LYTAAV0,28
135301,NEV9A5D,841


In [8]:
orders_cnt = pd.DataFrame(orders.groupby('customer_id').size(), columns=['count'])

In [69]:
orders_cnt.sort_values("count", ascending=False)

Unnamed: 0,customer_id,count
16463,XW90EAP,262
14407,TL7Z2DM,151
15259,VDEJEMP,146
8525,HJFTTGW,128
5420,B28LJKM,119
...,...,...
9263,J3EAF1I,2
9260,J3BX5SX,2
9259,J3742VA,2
9250,J2LYLXT,2


In [70]:
orders_cnt = orders_cnt[orders_cnt['count']!=1].reset_index()

In [71]:
customer_ids = list(orders_cnt['customer_id'])

In [72]:
orders_filtered = orders[orders['customer_id'].isin(customer_ids)]

In [73]:
orders_filtered

Unnamed: 0,customer_id,vendor_id
0,92PEE24,105
1,QS68UD8,294
2,MB7VY5F,83
4,BAL0RVT,83
5,U263OCD,83
...,...,...
135298,L6LFY6H,67
135299,2GMMVGI,79
135300,LYTAAV0,28
135301,NEV9A5D,841


In [74]:
vendors = vendors[["id","vendor_tag_name"]]

In [75]:
vendors

Unnamed: 0,id,vendor_tag_name
0,4,"Arabic,Breakfast,Burgers,Desserts,Free Deliver..."
1,13,"Breakfast,Cakes,Crepes,Italian,Pasta,Pizzas,Sa..."
2,20,"Breakfast,Desserts,Free Delivery,Indian"
3,23,"Burgers,Desserts,Fries,Salads"
4,28,Burgers
...,...,...
95,849,"American,Breakfast,Burgers,Cafe,Desserts,Free ..."
96,855,"American,Burgers,Desserts,Free Delivery,Fries,..."
97,856,"American,Breakfast,Burgers,Cafe,Desserts,Free ..."
98,858,"American,Breakfast,Burgers,Cafe,Desserts,Free ..."


In [76]:
dataset = pd.merge(orders_filtered, vendors, left_on='vendor_id', right_on='id')
dataset

Unnamed: 0,customer_id,vendor_id,id,vendor_tag_name
0,92PEE24,105,105,"American,Burgers,Free Delivery,Hot Dogs,Pasta"
1,I9DNSMJ,105,105,"American,Burgers,Free Delivery,Hot Dogs,Pasta"
2,I9DNSMJ,105,105,"American,Burgers,Free Delivery,Hot Dogs,Pasta"
3,4BQI76G,105,105,"American,Burgers,Free Delivery,Hot Dogs,Pasta"
4,V6JC4VF,105,105,"American,Burgers,Free Delivery,Hot Dogs,Pasta"
...,...,...,...,...
125379,PGE4TTF,907,907,"Fatayers,Mojitos ,Sandwiches"
125380,NNG2RIE,907,907,"Fatayers,Mojitos ,Sandwiches"
125381,1UMBP8X,907,907,"Fatayers,Mojitos ,Sandwiches"
125382,3EABH9S,907,907,"Fatayers,Mojitos ,Sandwiches"


In [77]:
# helper dict
customer_reataurants = {}
for i, row in dataset.iterrows():
    if row['customer_id'] not in customer_reataurants.keys():
        customer_reataurants[row['customer_id']]=[row['vendor_id']]
    else:
        if row['vendor_id'] not in customer_reataurants[row['customer_id']]:
            customer_reataurants[row['customer_id']].append(row['vendor_id'])

In [78]:
customer_reataurants

{'92PEE24': [105],
 'I9DNSMJ': [105, 83, 193, 180, 90, 43, 84, 44, 537, 681],
 '4BQI76G': [105, 289, 160, 846],
 'V6JC4VF': [105, 193, 84, 419, 386, 681],
 'IJHNJ90': [105, 85, 113, 148, 537],
 'IB555AJ': [105, 294, 83, 104, 304, 299, 145, 84, 44, 356, 537, 459, 386],
 'NVDVNOO': [105, 199],
 'VLXLNZM': [105, 83, 90, 160, 148, 459],
 'OD336VP': [105, 154, 193, 106, 189, 86],
 'TVSPG2H': [105, 193, 85, 189, 43, 44, 356, 391, 86, 681],
 'CF80YUJ': [105, 195, 299, 243, 84, 459, 583, 841, 846, 843],
 'DFIDHD7': [105],
 '0ZU8WW6': [105, 154, 193, 195, 145, 843],
 'NGXUX69': [105, 83, 195, 459],
 'YU90KWF': [105, 90, 243, 237, 386],
 '95DOM9X': [105, 145],
 'CLXMEXI': [105, 83, 4, 221, 145, 84, 583],
 'SMB4DEV': [105, 83, 154, 160],
 'LYIHJ4Y': [105, 83, 386],
 'CVYGY00': [105, 90],
 'KE5DMQI': [105, 43, 199, 356],
 'X25Q7CG': [105, 83, 193, 231, 304, 197, 145, 84, 459, 386, 846],
 '6Y7WPB1': [105, 294, 225, 199, 459],
 'MICNNLD': [105, 83, 84],
 'HNAPPT9': [105, 145, 148],
 'QFVI0O3': [105,

In [79]:
restaurant_ids = list(dataset['vendor_id'].unique())

In [80]:
restaurant_ids

[105,
 294,
 83,
 78,
 154,
 193,
 4,
 288,
 85,
 195,
 161,
 196,
 75,
 20,
 221,
 300,
 81,
 180,
 231,
 303,
 225,
 110,
 176,
 33,
 23,
 115,
 113,
 13,
 157,
 159,
 289,
 106,
 134,
 76,
 104,
 310,
 55,
 201,
 274,
 265,
 90,
 304,
 259,
 160,
 28,
 250,
 191,
 189,
 92,
 192,
 271,
 299,
 203,
 298,
 207,
 43,
 197,
 243,
 66,
 145,
 148,
 199,
 84,
 82,
 149,
 188,
 44,
 79,
 237,
 356,
 419,
 391,
 67,
 86,
 537,
 459,
 386,
 295,
 401,
 398,
 216,
 573,
 575,
 577,
 578,
 582,
 583,
 547,
 679,
 681,
 676,
 841,
 846,
 858,
 849,
 845,
 843,
 856,
 855,
 907]

### Build attributes

In [81]:
def split_data_to_history_and_samples(df):
    df = df.groupby('customer_id')
    to_build_attributes = df.apply(lambda x: x.iloc[:x.customer_id.size//2])
    to_train = df.apply(lambda x: x.iloc[x.customer_id.size//2:x.customer_id.size])
    return to_build_attributes, to_train

In [82]:
to_build_attributes, to_train = split_data_to_history_and_samples(dataset)

In [83]:
to_build_attributes = to_build_attributes.reset_index(drop=True)

In [84]:
to_build_attributes

Unnamed: 0,customer_id,vendor_id,id,vendor_tag_name
0,005ECL6,237,237,"American,Burgers,Desserts,Donuts,Fries,Pasta,S..."
1,009UFS1,83,83,"Breakfast,Desserts,Salads,Arabic"
2,009UFS1,193,193,"Arabic,Burgers,Sandwiches,Shawarma,Grills,Pizz..."
3,00GV4J4,259,259,"American,Burgers,Kids meal,Pasta,Sandwiches,Sm..."
4,00HRRAK,113,113,"Arabic,Desserts,Free Delivery,Indian"
...,...,...,...,...
59118,ZZVLIB5,110,110,"Asian,Dimsum,Grills,Japanese,Rice,Soups"
59119,ZZY3N0D,85,85,"Desserts,Omani"
59120,ZZY3N0D,225,225,"Breakfast,Free Delivery,Fresh Juices,Fries,Kar..."
59121,ZZY3N0D,189,189,"Sandwiches,Shawarma,Fresh Juices,Mojitos,Milks..."


In [85]:
vendor_tag = to_build_attributes

In [86]:
vendor_tag = to_build_attributes.groupby(['customer_id'])['vendor_tag_name'].apply(lambda x: list(x))

In [87]:
vendor_tag = vendor_tag.groupby('customer_id').agg(sum)

In [88]:
vendor_tag = pd.DataFrame(vendor_tag).reset_index()

In [89]:
vendor_tag

Unnamed: 0,customer_id,vendor_tag_name
0,005ECL6,"[American,Burgers,Desserts,Donuts,Fries,Pasta,..."
1,009UFS1,"[Breakfast,Desserts,Salads,Arabic, Arabic,Burg..."
2,00GV4J4,"[American,Burgers,Kids meal,Pasta,Sandwiches,S..."
3,00HRRAK,"[Arabic,Desserts,Free Delivery,Indian, Free De..."
4,00HWUU3,"[Desserts,Omani, American,Burgers,Desserts,Moj..."
...,...,...
17521,ZZRJABJ,"[Burgers,Fries,Grills,Mishkak,Mojitos ,Sandwic..."
17522,ZZV76GY,"[Asian,Desserts,Healthy Food,Japanese,Salads,S..."
17523,ZZVGIVK,"[Pizzas,Italian,Breakfast,Soups,Pasta,Salads,D..."
17524,ZZVLIB5,"[Fresh Juices,Healthy Food,Smoothies, Breakfas..."


In [30]:
word2vec_google_news = gensim.downloader.load('word2vec-google-news-300')

In [90]:
def vectorize(x_train, pretrained_model):
    embeddings = []
    for i, text in x_train.iterrows():
        text_vectors = []
        tags = str(text['vendor_tag_name']).replace('Healthy Food', 'healthy').replace('Fresh Juices','juices')\
        .replace('Kids meal','kids').replace('Hot Chocolate', 'chocolate').replace('Free Delivery','delivery')\
        .replace('Family Meal','family').replace('Frozen yoghurt','yoghurt').replace('Spanish Latte','latte')
        tags = tags.replace('\'','').replace('[','').replace(']','').replace(" ","")
        for word in tags.split(","):
            word = word.lower()
            if word in pretrained_model:
                text_vectors.append(pretrained_model[word])
            else:
                text_vectors.append(np.array([0]*pretrained_model.vector_size))
        x_train.at[i, 'vendor_tag_name'] = np.mean(text_vectors, axis=0)
        #text['vendor_tag_name'] = np.mean(text_vectors, axis=0)
        #print(text['vendor_tag_name'])
    return x_train

In [91]:
user_vectors =  vectorize(vendor_tag, word2vec_google_news)

In [92]:
user_vectors = user_vectors.rename(columns = {'vendor_tag_name': 'user_vector'})

In [93]:
to_train = to_train.reset_index(drop=True)

In [94]:
to_train

Unnamed: 0,customer_id,vendor_id,id,vendor_tag_name
0,005ECL6,237,237,"American,Burgers,Desserts,Donuts,Fries,Pasta,S..."
1,009UFS1,304,304,"American,Breakfast,Burgers,Fries,Sandwiches"
2,009UFS1,84,84,"Burgers,Fries,Kids meal,Shawarma"
3,00GV4J4,189,189,"Sandwiches,Shawarma,Fresh Juices,Mojitos,Milks..."
4,00GV4J4,843,843,"American,Breakfast,Burgers,Cafe,Desserts,Free ..."
...,...,...,...,...
66256,ZZY3N0D,356,356,"American,Kids meal,Pasta,Pizzas,Salads"
66257,ZZY3N0D,419,419,"Arabic,Kushari,Sandwiches"
66258,ZZY3N0D,86,86,"Cakes,Crepes,Desserts,Donuts,Fresh Juices,Ice ..."
66259,ZZY3N0D,537,537,"American,Burgers,Desserts,Free Delivery,Fries,..."


In [95]:
restaurant_vectors =  vectorize(to_train, word2vec_google_news)

In [96]:
restaurant_vectors

Unnamed: 0,customer_id,vendor_id,id,vendor_tag_name
0,005ECL6,237,237,"[-0.15133667, 0.05897522, 0.09327698, 0.575439..."
1,009UFS1,304,304,"[-0.14379883, 0.023950195, 0.059326172, 0.5886..."
2,009UFS1,84,84,"[-0.0473938, 0.030151367, 0.08917236, 0.558105..."
3,00GV4J4,189,189,"[-0.17141113, -0.10946045, 0.0920166, 0.392871..."
4,00GV4J4,843,843,"[-0.08470459, 0.014013672, -0.01048584, 0.4401..."
...,...,...,...,...
66256,ZZY3N0D,356,356,"[-0.118896484, 0.1519043, 0.121679686, 0.50390..."
66257,ZZY3N0D,419,419,"[0.023763020833333332, 0.0233154296875, 0.0254..."
66258,ZZY3N0D,86,86,"[-0.1900906, -0.1623603, 0.0057271323, 0.36105..."
66259,ZZY3N0D,537,537,"[-0.11635045, -0.029715402, 0.042567663, 0.475..."


In [97]:
restaurant_vectors = restaurant_vectors.rename(columns = {'vendor_tag_name': 'restaurant_vector'})

In [98]:
restaurant_vectors.drop(['id'], axis='columns', inplace=True)

In [99]:
restaurant_vectors

Unnamed: 0,customer_id,vendor_id,restaurant_vector
0,005ECL6,237,"[-0.15133667, 0.05897522, 0.09327698, 0.575439..."
1,009UFS1,304,"[-0.14379883, 0.023950195, 0.059326172, 0.5886..."
2,009UFS1,84,"[-0.0473938, 0.030151367, 0.08917236, 0.558105..."
3,00GV4J4,189,"[-0.17141113, -0.10946045, 0.0920166, 0.392871..."
4,00GV4J4,843,"[-0.08470459, 0.014013672, -0.01048584, 0.4401..."
...,...,...,...
66256,ZZY3N0D,356,"[-0.118896484, 0.1519043, 0.121679686, 0.50390..."
66257,ZZY3N0D,419,"[0.023763020833333332, 0.0233154296875, 0.0254..."
66258,ZZY3N0D,86,"[-0.1900906, -0.1623603, 0.0057271323, 0.36105..."
66259,ZZY3N0D,537,"[-0.11635045, -0.029715402, 0.042567663, 0.475..."


In [100]:
merged_vectors = pd.merge(restaurant_vectors, user_vectors, on = 'customer_id')

In [101]:
merged_vectors

Unnamed: 0,customer_id,vendor_id,restaurant_vector,user_vector
0,005ECL6,237,"[-0.15133667, 0.05897522, 0.09327698, 0.575439...","[-0.15133667, 0.05897522, 0.09327698, 0.575439..."
1,009UFS1,304,"[-0.14379883, 0.023950195, 0.059326172, 0.5886...","[-0.07784757, 0.050015535, 0.0772816, 0.424360..."
2,009UFS1,84,"[-0.0473938, 0.030151367, 0.08917236, 0.558105...","[-0.07784757, 0.050015535, 0.0772816, 0.424360..."
3,00GV4J4,189,"[-0.17141113, -0.10946045, 0.0920166, 0.392871...","[-0.15634155, 0.0940094, 0.062149048, 0.487304..."
4,00GV4J4,843,"[-0.08470459, 0.014013672, -0.01048584, 0.4401...","[-0.15634155, 0.0940094, 0.062149048, 0.487304..."
...,...,...,...,...
66256,ZZY3N0D,356,"[-0.118896484, 0.1519043, 0.121679686, 0.50390...","[-0.13615337171052633, -0.05586644222861842, 0..."
66257,ZZY3N0D,419,"[0.023763020833333332, 0.0233154296875, 0.0254...","[-0.13615337171052633, -0.05586644222861842, 0..."
66258,ZZY3N0D,86,"[-0.1900906, -0.1623603, 0.0057271323, 0.36105...","[-0.13615337171052633, -0.05586644222861842, 0..."
66259,ZZY3N0D,537,"[-0.11635045, -0.029715402, 0.042567663, 0.475...","[-0.13615337171052633, -0.05586644222861842, 0..."


### Build dataset

In [109]:
merged_vectors[merged_vectors['vendor_id']==419]['restaurant_vector'].iloc[0]

array([ 0.02376302,  0.02331543,  0.02547201,  0.28027344, -0.00211589,
        0.20507812,  0.05094401, -0.0365804 , -0.0255483 ,  0.21223958,
       -0.06445312,  0.0008138 , -0.09960938,  0.07063802, -0.12548828,
        0.01985677, -0.12353516, -0.00777181,  0.06576029, -0.18432617,
        0.02351888,  0.00406901,  0.09554036, -0.02644857, -0.11214193,
       -0.00325521, -0.20638021,  0.1266276 ,  0.04907227, -0.09871419,
       -0.10416667, -0.05314128,  0.01204427,  0.05472819, -0.03694661,
        0.02945964,  0.07069906,  0.15657552,  0.02937826,  0.01078288,
       -0.02799479,  0.04459635,  0.0641276 ,  0.00842285,  0.05777995,
       -0.1344401 , -0.17936198,  0.0016276 , -0.12369792, -0.08081055,
       -0.07633464, -0.05216471,  0.06966146,  0.03011068,  0.04549154,
       -0.00455729,  0.02539062, -0.05155436, -0.1180013 , -0.1258138 ,
       -0.07405599,  0.10514323,  0.0834554 , -0.05777995,  0.07145182,
       -0.14713542, -0.20279948, -0.0456543 , -0.02207438,  0.09

In [305]:
data = pd.DataFrame()
for i, row in merged_vectors.iterrows():
    vectors = []
    vectors.append(row['restaurant_vector'])
    vectors.append(row['user_vector'])
    data = data.append({"x":np.mean(vectors, axis=0),"y":1,"user":row['customer_id'],"rest":row['vendor_id']}, ignore_index=True)
    random_restaurant = random.choice(restaurant_ids)
    if random_restaurant not in customer_reataurants[row['customer_id']]:
        vectors = []
        rest_vector = merged_vectors[merged_vectors['vendor_id']==random_restaurant]['restaurant_vector'].iloc[0]
        vectors.append(rest_vector)
        vectors.append(row['user_vector'])
        data = data.append({"x":np.mean(vectors, axis=0),"y":0,"user":row['customer_id'],"rest":random_restaurant}, ignore_index=True)

In [306]:
data = data.reset_index(drop=True)

In [307]:
data

Unnamed: 0,rest,user,x,y
0,237.0,005ECL6,"[-0.15133667, 0.05897522, 0.09327698, 0.575439...",1.0
1,459.0,005ECL6,"[-0.14848328, 0.007575989, 0.03740692, 0.50146...",0.0
2,304.0,009UFS1,"[-0.1108232, 0.036982864, 0.06830388, 0.506516...",1.0
3,858.0,009UFS1,"[-0.08127608, 0.032014605, 0.03339788, 0.43224...",0.0
4,84.0,009UFS1,"[-0.062620685, 0.040083453, 0.08322698, 0.4912...",1.0
...,...,...,...,...
128573,203.0,ZZY3N0D,"[-0.12120778937088816, -0.07003222013774671, 0...",0.0
128574,537.0,ZZY3N0D,"[-0.12625191066610186, -0.042790922140212434, ...",1.0
128575,180.0,ZZY3N0D,"[-0.12572439093338816, 0.04021253083881579, 0....",0.0
128576,459.0,ZZY3N0D,"[-0.14089162726151316, -0.04984484220805921, 0...",1.0



### Train test split

In [327]:
from sklearn.model_selection import train_test_split

In [328]:
x=data['x'].values

In [329]:
X_train, X_test, y_train, y_test = train_test_split(x, data['y'].values, test_size=0.33, random_state=42)

In [330]:
X_train = X_train.tolist()

In [331]:
X_train = np.asarray(X_train)

In [332]:
X_train

array([[-0.14803493, -0.03718801, -0.00194821, ...,  0.07028588,
         0.14200121,  0.09123743],
       [-0.1420323 , -0.0144819 ,  0.06928013, ..., -0.02369559,
         0.12799279,  0.11520647],
       [-0.088643  ,  0.04243821,  0.07144869, ..., -0.02176608,
         0.09581881,  0.07267526],
       ...,
       [-0.12720427, -0.00847288,  0.01707358, ..., -0.02591343,
         0.09017519,  0.07359514],
       [-0.15431365, -0.07282476,  0.0129954 , ...,  0.03295113,
         0.11384288,  0.07191994],
       [-0.14203709,  0.11062916,  0.10174847, ...,  0.03615662,
         0.09253684,  0.02190876]])

In [333]:
X_test = X_test.tolist()
X_test = np.asarray(X_test)
X_test

array([[-0.16440523,  0.01024733,  0.00526475, ...,  0.00316538,
         0.06206947,  0.05966398],
       [-0.13932478, -0.08630259, -0.02775644, ...,  0.00928357,
         0.13491184,  0.1273241 ],
       [-0.07552083, -0.00341797,  0.07580566, ..., -0.02353923,
         0.06401571,  0.1155599 ],
       ...,
       [-0.17593798,  0.05458768,  0.03088064, ...,  0.03323226,
         0.04041492,  0.04886514],
       [-0.11265346,  0.08049665,  0.04654367, ..., -0.02153669,
         0.16797747,  0.04933384],
       [-0.17034094, -0.00422014, -0.00497437, ...,  0.00637272,
         0.10951669,  0.06781878]])

In [334]:
from keras.models import Sequential
from keras.layers import Dense

In [343]:
model = Sequential([
    Dense(100, activation='relu', input_shape=(300,)),
    Dense(100, activation='relu'),
    Dense(2, activation='softmax'),
])

In [346]:
model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [337]:
X_train.shape

(86147, 300)

In [338]:
y_train = tf.keras.utils.to_categorical(y_train, 2)

In [344]:
y_test2 = tf.keras.utils.to_categorical(y_test, 2)

In [340]:
y_train

array([[0., 1.],
       [0., 1.],
       [1., 0.],
       ...,
       [0., 1.],
       [0., 1.],
       [1., 0.]], dtype=float32)

In [347]:
hist = model.fit(X_train, y_train,
          batch_size=32, epochs=20,
                validation_data=(X_test,y_test2))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [348]:
y_pred = np.argmax(model.predict(X_test), axis=-1)

In [349]:
y_pred

array([1, 1, 0, ..., 1, 1, 1], dtype=int64)

In [350]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.7381867031180034

### Recomenndation for user

In [406]:
#LCY5W4A
user_vector = merged_vectors[merged_vectors['customer_id']=='LCY5W4A']['user_vector'].iloc[0]
restaurant_vectors = merged_vectors[['restaurant_vector','vendor_id']]

In [407]:
restaurant_vectors = restaurant_vectors.drop_duplicates(subset="vendor_id")

In [408]:
data_user = pd.DataFrame()
for i, row in restaurant_vectors.iterrows():
    vectors = []
    vectors.append(row['restaurant_vector'])
    vectors.append(user_vector)
    data_user = data_user.append({"x":np.mean(vectors, axis=0),"rest":row['vendor_id']}, ignore_index=True)

In [409]:
X_user = data_user['x'].tolist()
X_user = np.asarray(X_user)
X_user

array([[-0.15443048,  0.03591565,  0.07200083, ...,  0.02943848,
         0.07281206,  0.03113072],
       [-0.15066156,  0.01840314,  0.05502543, ...,  0.04019746,
         0.06072328,  0.0378278 ],
       [-0.10245905,  0.02150373,  0.06994852, ..., -0.01796294,
         0.06955526,  0.04647343],
       ...,
       [-0.13256464,  0.01918439,  0.00795206, ...,  0.07441758,
         0.06697939, -0.00655086],
       [-0.1920434 , -0.04507953,  0.02638773, ...,  0.06193208,
         0.08791444,  0.02385075],
       [-0.1920434 , -0.04507953,  0.02638773, ...,  0.06193208,
         0.08791444,  0.02385075]])

In [410]:
preds = model.predict(X_user)

In [411]:
preds

array([[4.1493851e-01, 5.8506155e-01],
       [9.0786988e-01, 9.2130087e-02],
       [1.2339167e-02, 9.8766077e-01],
       [7.6440328e-01, 2.3559675e-01],
       [6.1150998e-01, 3.8849002e-01],
       [9.2197835e-01, 7.8021616e-02],
       [3.4702173e-01, 6.5297824e-01],
       [1.4924532e-01, 8.5075468e-01],
       [1.8113431e-01, 8.1886572e-01],
       [1.9920136e-01, 8.0079859e-01],
       [3.4702173e-01, 6.5297824e-01],
       [9.7197545e-01, 2.8024517e-02],
       [8.7414694e-01, 1.2585306e-01],
       [7.7492699e-02, 9.2250729e-01],
       [9.1785526e-01, 8.2144707e-02],
       [9.8662692e-01, 1.3373080e-02],
       [7.6440328e-01, 2.3559675e-01],
       [8.7944186e-01, 1.2055817e-01],
       [8.4601110e-01, 1.5398885e-01],
       [3.8137734e-01, 6.1862260e-01],
       [1.9920136e-01, 8.0079859e-01],
       [6.1150998e-01, 3.8849002e-01],
       [6.1150998e-01, 3.8849002e-01],
       [9.4455904e-01, 5.5441007e-02],
       [6.1150998e-01, 3.8849002e-01],
       [7.7492699e-02, 9.

In [412]:
preds_1 = [pred[1] for pred in preds]

In [413]:
len(preds_1)

100

In [414]:
data_user["pred_1"] = 0.0

In [415]:
def append_buy_prob_column(data_user,preds_1):
    for i, row in data_user.iterrows():
        data_user.at[i, 'pred_1'] = preds_1[i]
    return data_user

In [416]:
data_user = append_buy_prob_column(data_user,preds_1)

In [417]:
data_user

Unnamed: 0,rest,x,pred_1
0,237.0,"[-0.1544304824456936, 0.03591565388004954, 0.0...",0.585062
1,304.0,"[-0.1506615615472561, 0.018403141486753778, 0....",0.092130
2,84.0,"[-0.1024590468988186, 0.021503727610518292, 0....",0.987661
3,189.0,"[-0.16446771091077386, -0.04830218133766477, 0...",0.235597
4,843.0,"[-0.12111444389674722, 0.013434880140532807, 0...",0.388490
...,...,...,...
95,196.0,"[-0.2262230849847561, 0.04426984089176829, 0.1...",0.221912
96,154.0,"[-0.14033441310975608, 0.059492008860518295, 0...",0.132000
97,115.0,"[-0.1325646377191311, 0.019184391673018292, 0....",0.048740
98,300.0,"[-0.1920433974847561, -0.04507952583272283, 0....",0.012930


In [428]:
data_user = data_user.sort_values('pred_1', ascending=False)[:10]

In [429]:
data_user

Unnamed: 0,rest,x,pred_1
2,84.0,"[-0.1024590468988186, 0.021503727610518292, 0....",0.987661
25,106.0,"[-0.15791254108998834, 0.014484684641768292, 0...",0.922507
54,105.0,"[-0.15791254108998834, 0.014484684641768292, 0...",0.922507
13,104.0,"[-0.15791254108998834, 0.014484684641768292, 0...",0.922507
37,386.0,"[-0.3199730849847561, -0.1722828934832317, 0.1...",0.905954
26,356.0,"[-0.1382103896722561, 0.08238019394438441, 0.0...",0.888728
36,86.0,"[-0.1738074458226925, -0.07475210380990331, 0....",0.877349
31,537.0,"[-0.1369373722955948, -0.008429657009134933, 0...",0.869753
7,145.0,"[-0.08201735574047922, 0.03664044636051829, 0....",0.850755
8,459.0,"[-0.1515770888910061, -0.015483577076981708, 0...",0.818866


In [430]:
customer_reataurants['LCY5W4A'] ## 3/10

[105, 83, 85, 90, 197, 243, 148, 84, 237, 419, 391, 537]

In [431]:
vendors_user = [vendors[vendors['id'].isin(customer_reataurants['LCY5W4A'])]]
vendors_recommended = [vendors[vendors['id'].isin(list(data_user['rest']))]]

In [432]:
vendors_user

[     id                                    vendor_tag_name
 17   83                   Breakfast,Desserts,Salads,Arabic
 18   84                   Burgers,Fries,Kids meal,Shawarma
 19   85                                     Desserts,Omani
 21   90           American,Burgers,Desserts,Mojitos ,Pasta
 24  105      American,Burgers,Free Delivery,Hot Dogs,Pasta
 31  148  Breakfast,Burgers,Grills,Pizza,Sandwiches,Shaw...
 47  197                       Free Delivery,Mexican,Salads
 56  237  American,Burgers,Desserts,Donuts,Fries,Pasta,S...
 57  243                                                NaN
 75  391  American,Breakfast,Burgers,Desserts,Italian,Sa...
 78  419                          Arabic,Kushari,Sandwiches
 80  537  American,Burgers,Desserts,Free Delivery,Fries,...]

In [433]:
vendors_recommended

[     id                                    vendor_tag_name
 18   84                   Burgers,Fries,Kids meal,Shawarma
 20   86  Cakes,Crepes,Desserts,Donuts,Fresh Juices,Ice ...
 23  104      American,Burgers,Free Delivery,Hot Dogs,Pasta
 24  105      American,Burgers,Free Delivery,Hot Dogs,Pasta
 25  106      American,Burgers,Free Delivery,Hot Dogs,Pasta
 30  145                           Arabic,Pizzas,Sandwiches
 73  356             American,Kids meal,Pasta,Pizzas,Salads
 74  386                                            Churros
 79  459          Burgers,Free Delivery,Milkshakes,Mojitos 
 80  537  American,Burgers,Desserts,Free Delivery,Fries,...]

In [17]:
#customer_ids = list(dataset['customer_id'].unique())
#idx = int((4/5)*len(customer_ids))
#customer_ids_train = customer_ids[:idx]
#customer_ids_test = customer_ids[idx+1:len(customer_ids)]

In [18]:
# = dataset[dataset['customer_id'].isin(customer_ids_train)]
#test_df = dataset[dataset['customer_id'].isin(customer_ids_test)]