In [84]:
import os
import numpy as np 
import pandas as pd
import datetime as dt

In [2]:
os.chdir('D:\לימודים\Thesis\Data')

In [17]:
os.getcwd()

'D:\\לימודים\\Thesis\\Data'

## Get and Save the Data

In [18]:
# dataloc = 'Weeplaces\weeplace_checkins.csv'
dataloc = 'weeplaces_samp.csv'
data = pd.read_csv(dataloc)

In [5]:
data.head()

Unnamed: 0,userid,placeid,datetime,lat,lon,city,category
0,fred-wilson,tocqueville-new-york,2010-10-22T23:44:29,40.7363,-73.9922,New York,Food:American
1,fred-wilson,wesleyan-university-usdan-university-center-mi...,2010-10-23T17:00:24,41.556974,-72.657571,Middletown,College & Education:Student Center
2,fred-wilson,javapalooza-middletown,2010-10-23T20:30:45,41.558853,-72.648618,Middletown,Food:Coffee Shop
3,fred-wilson,giorgione-new-york,2010-10-24T00:22:49,40.726144,-74.008348,New York,Food:Italian
4,fred-wilson,the-shala-new-york,2010-10-24T16:01:38,40.733075,-73.991478,New York,Home / Work / Other:Gym / Fitness:Yoga Studio


In [6]:
data.shape

(7658368, 7)

In [19]:
data = data.sample(frac=0.5)

In [22]:
data.shape

(38292, 8)

In [21]:
data.to_csv('weeplaces_samp.csv', index=False)

## Data Summary

In [57]:
# Calculate the number of interactions
interaction_count = len(data)

# Calculate the number of unique users and items
user_count = data['userid'].nunique()
item_count = data['placeid'].nunique()

# Calculate sparsity
total_interactions = user_count * item_count
missing_interactions = total_interactions - interaction_count
sparsity = (missing_interactions / total_interactions) * 100


In [73]:
print(f'Total interactios: {total_interactions}')
print(f'Missing interactios: {missing_interactions}')
print(f'Sparsity: {sparsity:.4f}')

Total interactios: 334951578
Missing interactios: 334913286
Sparsity: 99.9886


In [None]:
# Create a new DataFrame with the extracted information
dataset_name = 'weeplaces'

summary_df = pd.DataFrame({
    'Interaction': [interaction_count],
    'User': [user_count],
    'Item': [item_count],
    'Sparsity': [sparsity]
})

# Save the summary DataFrame to a CSV file
summary_df.to_csv(f'{dataset_name}_summary.csv', index=False)

## Feature Engineering

In [27]:
data.head()

Unnamed: 0.1,Unnamed: 0,userid,placeid,datetime,lat,lon,city,category
2582,2589511,guillaume-sagnes,hotel-de-hollande,2010-05-20T16:54:59,48.874248,2.342293,,
56080,5156645,ami-greko,cinema-the-brasserie-new-york,2009-09-14T15:53:33,40.7554,-73.9794,New York,Food:French
73539,309590,christopher-poage,taco-bell-tempe,2010-10-22T01:16:20,33.377728,-111.951483,Tempe,Food:Fast Food
4920,2073162,ralph-m,hamburger-meile-ekz-hamburger-strae-hamburg,2010-02-13T13:30:18,53.572658,10.030046,Hamburg,Shops:Apparel
19154,2980170,alvaro-garnero,russia-ukraine-border,2010-10-25T01:26:53,51.874901,34.332447,,Travel:Other - Travel


In [None]:
data=data.drop(['Unnamed: 0'], axis=1)

In [31]:
data.nunique()

userid      11271
placeid     29718
datetime    38268
lat         31398
lon         31165
city         4813
category      593
dtype: int64

### Replace user and place names with IDs

In [32]:
# Get ID number for each userid name
data['userid'] = data['userid'].astype('category').cat.rename_categories(range(1, data['userid'].nunique()+1))

In [None]:
# Get place number for each placeid name
data['placeid'] = data['placeid'].astype('category').cat.rename_categories(range(1, data['placeid'].nunique()+1))

In [40]:
data.sort_values('user', ascending=False).head()

Unnamed: 0,userid,placeid,datetime,lat,lon,city,category,ID
26958,zwilling,target-burwood-burwood,2010-07-17T05:00:45,-33.874296,151.104012,Burwood,Shops:Department Store,11271
74300,zwilling,brisbane-domestic-terminal-brisbane-airport,2010-10-10T09:32:09,-27.383581,153.12139,Brisbane Airport,Travel:Airport,11271
75501,zwilling,m4-st-clair-st-clair,2010-09-11T13:19:49,-33.791096,150.807567,St Clair,Travel:Highway / Traffic,11271
54902,zvi-band,istrategylabs-washington,2010-02-20T12:36:23,38.910756,-77.043639,Washington,Home / Work / Other:Corporate / Office,11270
60881,zuntsuku,,2010-04-13T15:23:11,35.825431,139.921148,松戸市,Travel:Train Station,11269


In [34]:
# Check unique values of ID == userid
data.nunique()

userid      11271
placeid     29718
datetime    38268
lat         31398
lon         31165
city         4813
category      593
ID          11271
dtype: int64

In [48]:
# Drop userid column 
data = data.drop('place', axis=1)

In [46]:
# Get place number for each placeid name
data['placeid'] = data['placeid'].astype('category').cat.rename_categories(range(1, data['placeid'].nunique()+1))

In [53]:
data.rename(columns={'ID':'userid'}, inplace=True)

In [74]:
data.head()

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid
2582,11829,2010-05-20T16:54:59,48.874248,2.342293,,,4018
56080,5728,2009-09-14T15:53:33,40.7554,-73.9794,New York,Food:French,544
73539,25018,2010-10-22T01:16:20,33.377728,-111.951483,Tempe,Food:Fast Food,2170
4920,10865,2010-02-13T13:30:18,53.572658,10.030046,Hamburg,Shops:Apparel,8820
19154,21823,2010-10-25T01:26:53,51.874901,34.332447,,Travel:Other - Travel,501


### Extract contextual features 

#### Time based contextual features

In [76]:
df = data.copy()

In [81]:
# Convert the timestamp to a pandas datetime object
df['timestamp'] = pd.to_datetime(df['datetime'])

In [99]:
df['datetime'].iloc[0]
# type(df['datetime'][0])

'2010-05-20T16:54:59'

In [87]:
# Extract hour and day of the week information from the timestamp
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek

In [93]:
df.head()

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid,timestamp,hour,day_of_week
2582,11829,2010-05-20T16:54:59,48.874248,2.342293,,,4018,2010-05-20 16:54:59,16,3
56080,5728,2009-09-14T15:53:33,40.7554,-73.9794,New York,Food:French,544,2009-09-14 15:53:33,15,0
73539,25018,2010-10-22T01:16:20,33.377728,-111.951483,Tempe,Food:Fast Food,2170,2010-10-22 01:16:20,1,4
4920,10865,2010-02-13T13:30:18,53.572658,10.030046,Hamburg,Shops:Apparel,8820,2010-02-13 13:30:18,13,5
19154,21823,2010-10-25T01:26:53,51.874901,34.332447,,Travel:Other - Travel,501,2010-10-25 01:26:53,1,0


In [89]:
df.nunique()

placeid        29718
datetime       38268
lat            31398
lon            31165
city            4813
category         593
userid         11271
timestamp      38268
hour              24
day_of_week        7
dtype: int64

In [101]:
# Define the time of day categories
morning_hours = [6, 7, 8, 9, 10, 11]
noon_hours = [12, 13]
afternoon_hours = [14, 15, 16, 17]
evening_hours = [18, 19, 20]
night_hours = [21, 22, 23, 0, 1, 2, 3, 4, 5]

In [102]:
# Create binary variables for each time of day category
df['is_morning'] = np.where(df['hour'].isin(morning_hours), 1, 0)
df['is_noon'] = np.where(df['hour'].isin(noon_hours), 1, 0)
df['is_afternoon'] = np.where(df['hour'].isin(afternoon_hours), 1, 0)
df['is_evening'] = np.where(df['hour'].isin(evening_hours), 1, 0)
df['is_night'] = np.where(df['hour'].isin(night_hours), 1, 0)

In [103]:
# Create binary variables for weekday vs weekend
df['is_weekday'] = np.where(df['day_of_week'] < 5, 1, 0)
df['is_weekend'] = np.where(df['day_of_week'] >= 5, 1, 0)

In [104]:
df.head()

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid,timestamp,hour,day_of_week,is_morning,is_noon,is_afternoon,is_evening,is_night,is_weekday,is_weekend
2582,11829,2010-05-20T16:54:59,48.874248,2.342293,,,4018,2010-05-20 16:54:59,16,3,0,0,1,0,0,1,0
56080,5728,2009-09-14T15:53:33,40.7554,-73.9794,New York,Food:French,544,2009-09-14 15:53:33,15,0,0,0,1,0,0,1,0
73539,25018,2010-10-22T01:16:20,33.377728,-111.951483,Tempe,Food:Fast Food,2170,2010-10-22 01:16:20,1,4,0,0,0,0,1,1,0
4920,10865,2010-02-13T13:30:18,53.572658,10.030046,Hamburg,Shops:Apparel,8820,2010-02-13 13:30:18,13,5,0,1,0,0,0,0,1
19154,21823,2010-10-25T01:26:53,51.874901,34.332447,,Travel:Other - Travel,501,2010-10-25 01:26:53,1,0,0,0,0,0,1,1,0


#### Distance based contextual features

In [106]:
pip install geopy

Collecting geopyNote: you may need to restart the kernel to use updated packages.

  Downloading geopy-2.3.0-py3-none-any.whl (119 kB)
Collecting geographiclib<3,>=1.52
  Downloading geographiclib-2.0-py3-none-any.whl (40 kB)
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-2.0 geopy-2.3.0


In [116]:
import pandas as pd
from geopy.distance import geodesic
from datetime import timedelta

# Sort the DataFrame by 'userid' and 'datetime'
df = df.sort_values(by=['userid', 'datetime'])

# Initialize the 'distance' column with NaN values
df['distance'] = float('NaN')

# Iterate over each user
for user_id, group in df.groupby('userid'):
    if len(group) > 1:  # Check if the user has more than 1 check-in
        for i in range(len(group) - 1):
            curr_time = group.iloc[i]['timestamp']
            next_time = group.iloc[i + 1]['timestamp']
            time_diff = next_time - curr_time
            if time_diff <= timedelta(hours=5):  # Check if time difference is less than 5 hours
                curr_coords = (group.iloc[i]['lat'], group.iloc[i]['lon'])
                next_coords = (group.iloc[i + 1]['lat'], group.iloc[i + 1]['lon'])
                distance = geodesic(curr_coords, next_coords).kilometers
                df.loc[group.iloc[i].name, 'distance'] = distance
                df.loc[group.iloc[i + 1].name, 'distance'] = distance

# Print the updated DataFrame with the 'distance' column

In [121]:
df.sort_values(['distance','userid'], ascending=False).head(20)

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid,timestamp,hour,day_of_week,is_morning,is_noon,is_afternoon,is_evening,is_night,is_weekday,is_weekend,distance
3246,16702,2010-05-15T13:25:19,25.794385,-80.27828,Miami,Travel:Airport,10101,2010-05-15 13:25:19,13,5,0,1,0,0,0,0,1,1926.574971
61899,5448,2010-05-15T18:18:10,41.976593,-87.905045,Chicago,Travel:Airport,10101,2010-05-15 18:18:10,18,5,0,0,0,1,0,0,1,1926.574971
6945,5519,2010-12-26T23:31:47,29.985568,-95.349634,Houston,Food:American Restaurants,6500,2010-12-26 23:31:47,23,6,0,0,0,0,1,0,1,1862.111357
44799,9991,2010-12-27T03:45:18,37.509133,-77.330473,Richmond,Travel Spots:Airports:Airport Gates,6500,2010-12-27 03:45:18,3,0,0,0,0,0,1,1,0,1862.111357
4104,22464,2010-12-12T12:17:50,40.818908,-96.699364,Lincoln,College & Education:Cafeteria,503,2010-12-12 12:17:50,12,6,0,1,0,0,0,0,1,1706.648865
13203,8993,2010-12-12T15:49:18,38.907729,-76.864433,Landover,Arts & Entertainment:Stadium:Football,503,2010-12-12 15:49:18,15,6,0,0,1,0,0,0,1,1706.648865
29669,5701,2010-08-27T11:28:06,-3.745556,-38.484249,Fortaleza,Travel:Embassy,3249,2010-08-27 11:28:06,11,4,1,0,0,0,0,1,0,1141.59308
32181,786,2010-08-27T14:55:39,-1.391736,-48.479962,Bélem,Travel:Airport:Terminal,3249,2010-08-27 14:55:39,14,4,0,0,1,0,0,1,0,1141.59308
11635,23933,2011-05-19T15:49:32,51.278857,6.765432,Düsseldorf,Food:Coffee Shop,7040,2011-05-19 15:49:32,15,3,0,0,1,0,0,1,0,594.829721
73049,3537,2011-05-19T19:29:21,47.836144,13.063679,,Nightlife Spots:Breweries,7040,2011-05-19 19:29:21,19,3,0,0,0,1,0,1,0,594.829721


In [115]:
df.sort_values('distance', ascending=False).head()

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid,timestamp,hour,day_of_week,is_morning,is_noon,is_afternoon,is_evening,is_night,is_weekday,is_weekend,distance
38390,16114,2010-07-20T17:56:49,38.315876,-88.953018,,Food:American,569,2010-07-20 17:56:49,17,1,0,0,1,0,0,1,0,90.069984
12760,24059,2010-07-20T19:34:39,38.573544,-89.931358,O'Fallon,Food:Coffee Shop,569,2010-07-20 19:34:39,19,1,0,0,0,1,0,1,0,90.069984
34329,24506,2010-06-30T10:31:32,51.594616,4.779191,Breda,Food:Sandwiches,4247,2010-06-30 10:31:32,10,2,1,0,0,0,0,1,0,51.454756
37647,24226,2010-06-30T11:29:55,51.442988,5.479517,Eindhoven,Travel:Train Station,4247,2010-06-30 11:29:55,11,2,1,0,0,0,0,1,0,51.454756
13964,22293,2010-05-02T17:57:24,52.311175,4.749513,,,6731,2010-05-02 17:57:24,17,6,0,0,1,0,0,0,1,41.157358


In [123]:
df.isna().sum()

placeid          1449
datetime            0
lat                 0
lon                 0
city             4461
category         3749
userid              0
timestamp           0
hour                0
day_of_week         0
is_morning          0
is_noon             0
is_afternoon        0
is_evening          0
is_night            0
is_weekday          0
is_weekend          0
distance        37641
dtype: int64

In [124]:
df.shape

(38292, 18)

In [125]:
df.to_csv('weeplaces_eng.csv')

### Create user embeddings with contextual features

In [204]:
# Combine the binary variables into a numpy array for each row
context_values = df[
    ['is_morning', 'is_noon', 'is_afternoon', 'is_evening', 'is_night', 'is_weekday', 'is_weekend']].to_numpy()


In [231]:
type(context_values)

numpy.ndarray

In [229]:
context_values.shape

(38292, 7)

In [223]:
print(np.array(context_values[0]))

[1 0 0 0 0 1 0]


In [225]:
df.head()

Unnamed: 0,placeid,datetime,lat,lon,city,category,userid,timestamp,hour,day_of_week,is_morning,is_noon,is_afternoon,is_evening,is_night,is_weekday,is_weekend,distance,context_values
20670,,2010-04-19T06:40:49,35.715805,139.51287,小金井市,Home / Work / Other:Government:Monument / Land...,1,2010-04-19 06:40:49,6,0,1,0,0,0,0,1,0,,1
63796,21881.0,2010-04-30T17:51:52,36.000972,139.667065,蓮田市,Travel:Highway / Traffic,1,2010-04-30 17:51:52,17,4,0,0,1,0,0,1,0,,0
36752,,2010-05-08T04:31:20,35.699214,139.771116,千代田区,Arts & Entertainment:Arcade,1,2010-05-08 04:31:20,4,5,0,0,0,0,1,0,1,,0
25009,,2010-05-10T16:07:53,35.653251,139.708956,渋谷区,Home / Work / Other:Religious:Temple,1,2010-05-10 16:07:53,16,0,0,0,1,0,0,1,0,,0
58564,,2010-10-18T15:00:17,35.657254,139.714632,,,1,2010-10-18 15:00:17,15,0,0,0,1,0,0,1,0,,0


In [224]:
# Add the context_values as a new column in the original dataframe
df['context_values'] = np.array(df['context_values'])

()

In [253]:
df1=df.dropna()

In [254]:
from keras.models import Sequential
from keras.layers import Dense, Concatenate
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np


# Convert the data to numpy arrays
user_item_data1 = np.array(df1[['userid', 'placeid']])
contextual_data1 = context_values


In [255]:
contextual_data1 = contextual_data1[0:10,:]
user_item_data1 = user_item_data1[0:10,:]

In [189]:
contextual_data.shape
user_item_data.shape

(4, 4)

In [236]:
print(user_item_data[0], user_item_data.shape)
print(contextual_data[0], contextual_data.shape)

[1 1 0 0] (4, 4)
[ 1.         -1.          0.57735027] (4, 3)


In [256]:
print(user_item_data1[0], user_item_data1.shape)
print(contextual_data1[0], contextual_data1.shape)

[    6 23818] (10, 2)
[1 0 0 0 0 1 0] (10, 7)


In [238]:
contextual_data1.shape[1]

7

#### Model 1 - dense layers

In [264]:
# Normalize the contextual data
scaler = StandardScaler()
contextual_data1 = scaler.fit_transform(contextual_data1)

# Define the model architecture
model = Sequential()
model.add(Dense(16, input_shape=(user_item_data1.shape[1],), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(8, activation='relu'))

context_input = Input(shape=(contextual_data1.shape[1],))
context_output = Dense(16, activation='relu')(context_input)

merged = Concatenate()([model.output, context_output])
output = Dense(2, activation='relu')(merged)

model = Model(inputs=[model.input, context_input], outputs=output)

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit([user_item_data1, contextual_data1], user_item_data1, epochs=10, batch_size=2)

# Obtain the contextual embeddings
contextual_embeddings = model.predict([user_item_data1, contextual_data1])

# Print the generated contextual embeddings
for i, embedding in enumerate(contextual_embeddings):
    print(f"User-Item Pair {i+1}: {embedding}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
User-Item Pair 1: [   0.    4988.061]
User-Item Pair 2: [   0.     3914.7664]
User-Item Pair 3: [   0.    1122.481]
User-Item Pair 4: [   0.    5267.724]
User-Item Pair 5: [   0.    5859.103]
User-Item Pair 6: [   0.    5005.933]
User-Item Pair 7: [   0.    4641.614]
User-Item Pair 8: [  0.     988.4642]
User-Item Pair 9: [   0.     4922.9033]
User-Item Pair 10: [   0.    5971.657]


In [244]:
from keras.models import Sequential, Model
from keras.layers import Dense, Concatenate, Input
from sklearn.preprocessing import LabelEncoder, StandardScaler
import numpy as np

# Sample data representing user-item interactions and contextual data
user_item_data = [
    [1, 1, 0, 0],
    [1, 0, 1, 0],
    [0, 1, 1, 1],
    [1, 0, 0, 1]
]

contextual_data = [
    [1, 0, 1],
    [0, 1, 0],
    [1, 1, 1],
    [0, 0, 1]
]

# Convert the data to numpy arrays
user_item_data = np.array(user_item_data)
contextual_data = np.array(contextual_data)

# Normalize the contextual data
scaler = StandardScaler()
contextual_data = scaler.fit_transform(contextual_data)

# Define the model architecture
model = Sequential()
model.add(Dense(16, input_shape=(user_item_data.shape[1],), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))

context_input = Input(shape=(contextual_data.shape[1],))
context_output = Dense(4, activation='relu')(context_input)

merged = Concatenate()([model.output, context_output])
output = Dense(4, activation='relu')(merged)

model = Model(inputs=[model.input, context_input], outputs=output)

# Compile and train the model
model.compile(optimizer='adam', loss='mse')
model.fit([user_item_data, contextual_data], user_item_data, epochs=10, batch_size=2)

# Obtain the contextual embeddings
contextual_embeddings = model.predict([user_item_data, contextual_data])

# Print the generated contextual embeddings
for i, embedding in enumerate(contextual_embeddings):
    print(f"User-Item Pair {i+1}: {embedding}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
User-Item Pair 1: [0.         0.1607272  0.11795041 0.        ]
User-Item Pair 2: [0.         0.         0.         0.11733117]
User-Item Pair 3: [0. 0. 0. 0.]
User-Item Pair 4: [0.         0.12348227 0.36427382 0.        ]


In [245]:
model.summary()

Model: "model_13"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 dense_73_input (InputLayer)    [(None, 4)]          0           []                               
                                                                                                  
 dense_73 (Dense)               (None, 16)           80          ['dense_73_input[0][0]']         
                                                                                                  
 dense_74 (Dense)               (None, 8)            136         ['dense_73[0][0]']               
                                                                                                  
 input_15 (InputLayer)          [(None, 3)]          0           []                               
                                                                                           

In [243]:
model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 dense_68_input (InputLayer)    [(None, 2)]          0           []                               
                                                                                                  
 dense_68 (Dense)               (None, 16)           48          ['dense_68_input[0][0]']         
                                                                                                  
 dense_69 (Dense)               (None, 8)            136         ['dense_68[0][0]']               
                                                                                                  
 input_14 (InputLayer)          [(None, 7)]          0           []                               
                                                                                           

#### Model 2 - Embedding layer

In [268]:
from keras.models import Sequential
from keras.layers import Embedding, Flatten, Dense


# Convert inputs to numpy arrays

user_item_input = user_item_data1
context_input = contextual_data1

# Define the model architecture
model = Sequential()
model.add(Embedding(input_dim=10, output_dim=8, input_length=2))  # Assuming the vocabulary size is 24000 and output dimension is 8
model.add(Flatten())
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile and train the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(user_item_input, context_input[:, 0], epochs=10, batch_size=2)

# Obtain the learned embeddings
embeddings = model.layers[0].get_weights()[0]

# Print the learned embeddings
for i, embedding in enumerate(embeddings):
    print(f"Embedding for User-Item Pair {i+1}: {embedding}")

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'sequential_31/embedding_3/embedding_lookup' defined at (most recent call last):
    File "C:\Users\nirco\anaconda3\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\nirco\anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\nirco\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
      app.start()
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\nirco\anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\nirco\anaconda3\lib\asyncio\base_events.py", line 596, in run_forever
      self._run_once()
    File "C:\Users\nirco\anaconda3\lib\asyncio\base_events.py", line 1890, in _run_once
      handle._run()
    File "C:\Users\nirco\anaconda3\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 457, in dispatch_queue
      await self.process_one()
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 446, in process_one
      await dispatch(*args)
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 353, in dispatch_shell
      await result
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 648, in execute_request
      reply_content = await reply_content
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 353, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\nirco\anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2901, in run_cell
      result = self._run_cell(
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2947, in _run_cell
      return runner(coro)
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3172, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3364, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\Users\nirco\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3444, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\nirco\AppData\Local\Temp/ipykernel_14676/1793886438.py", line 19, in <module>
      model.fit(user_item_input, context_input[:, 0], epochs=10, batch_size=2)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 1023, in train_step
      y_pred = self(x, training=True)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\training.py", line 561, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\sequential.py", line 413, in call
      return super().call(inputs, training=training, mask=mask)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\functional.py", line 511, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\functional.py", line 668, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1132, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\nirco\anaconda3\lib\site-packages\keras\layers\core\embedding.py", line 208, in call
      out = tf.nn.embedding_lookup(self.embeddings, inputs)
Node: 'sequential_31/embedding_3/embedding_lookup'
indices[0,0] = 298 is not in [0, 10)
	 [[{{node sequential_31/embedding_3/embedding_lookup}}]] [Op:__inference_train_function_20188]

### Export dataframe with contextual features column (one-hot vector of contexts)

In [None]:
# Combine the binary variables into a numpy array for each row
context_values = df[
    ['is_morning', 'is_noon', 'is_afternoon', 'is_evening', 'is_night', 'is_weekday', 'is_weekend', 'distance']].to_numpy()

# Add the context_values as a new column in the original dataframe
df['context_values'] = list(context_values)

# Remove the intermediate binary columns
df.drop(['hour', 'day_of_week', 'is_morning', 'is_noon', 'is_afternoon', 'is_evening', 'is_night', 'is_weekday',
         'is_weekend', 'distance'], axis=1, inplace=True)

df = df.reindex(columns=["userid", "placeid", "context_values", "rating", "timestamp"])