# Board Games
__________________________

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import sqlite3
import os
import pprint
import tempfile

from typing import Dict, Text

# import math
# import random
# import sklearn
# import scipy
import cv2

# Recommender
import tensorflow as tf
import tensorflow_recommenders as tfrs
from tensorflow.keras.layers.experimental.preprocessing import StringLookup, TextVectorization, Normalization, Discretization, Hashing
from tensorflow.keras.layers import Embedding, Dense, Layer, GlobalAveragePooling1D, Flatten
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adagrad
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError

from nltk.corpus import stopwords
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds


### Import data  

Import the cleaned dataframe, reference dictionaries, and user ratings.

In [2]:
# Open dataframe
infile = open('../datasets/boardgames/clean_bgg_GameItem.pkl', 'rb')
df = pickle.load(infile)
infile.close()

In [3]:
df.shape

(7929, 20)

In [4]:
# Open dictionaries
infile = open('../datasets/boardgames/ref_dictionaries.pkl', 'rb')
ref_dicts = pickle.load(infile)
infile.close()

In [5]:
# Extract ratings from sqlite database
conn = sqlite3.connect("../datasets/boardgames/bgg_5yrs_RatingItem.db")
cur = conn.cursor()

user_df = pd.read_sql_query("""
SELECT *,
    COUNT(bgg_user_name) OVER
         (PARTITION BY bgg_user_name) AS user_count
FROM bgg_ratings

""", conn)

user_df.head()

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating,year,month,user_count
0,fu_koios,223033,9.0,2017,10,1
1,-=yod@=-,7,7.5,2015,3,173
2,-=yod@=-,42,6.5,2016,10,173
3,-=yod@=-,217,6.75,2016,10,173
4,-=yod@=-,432,7.5,2017,5,173


In [6]:
cur.close()
conn.close()

In [7]:
user_df.shape

(12278237, 6)

## Preprocessing

A common problem in recommender systems is known as ***user cold-start***, where it is difficult to recommend items for users with very few number of consumed items (in this case rated board games), due to lack of information to model their preferences. As such, we choose to only keep the users with at least 30 rated board games.

In [8]:
# Filtering dataframe to contain users with at least 30 rates
user_df = user_df[user_df['user_count']>=30]
user_df.shape

(10667845, 6)

We also want to extract the user ratings for the board games that we are left with after extensive EDA and cleaning.

In [9]:
# Filtering dataframe to user ratings of the board games we are concerned with
user_df = user_df[user_df['bgg_id'].isin(df['bgg_id'])]
user_df.shape

(9182849, 6)

In [10]:
user_df['user_count'].describe()

count    9.182849e+06
mean     2.552930e+02
std      3.462861e+02
min      3.000000e+01
25%      7.900000e+01
50%      1.560000e+02
75%      3.160000e+02
max      6.717000e+03
Name: user_count, dtype: float64

In [11]:
# Save df as .pkl
outfile = open('../datasets/boardgames/bgg_users_2015.pkl', 'wb')
pickle.dump(user_df, outfile)
outfile.close()

#### Board Game Mapper

We require a mapper for board game id to the board game name since our predictions would be done on the board game ids. This mapper will be user at the end after an actual prediction has been made.

In [12]:
# Mapper (bgg_id -> name)
bg_mapper = {}
for i, name in zip(df['bgg_id'], df['name']):
    bg_mapper[str(i)] = name

#### Unique id  

We require to map the board game ids to embedding vectors in the models later. Hence, we need lists of the unique board game ids and unique user ids.

In [13]:
# Extract unique users and unique board game ids
# Need to keep it as numpy.ndarray
unique_user = user_df['bgg_user_name'].unique()
unique_bgg_id = df['bgg_id'].unique().astype(str)

In [14]:
unique_bgg_id[:10]

array(['3', '9', '10', '11', '12', '13', '14', '16', '17', '25'],
      dtype='<U21')

## Retrieval Model

This is a two-tower retrieval model, we will build each tower separately and then combine them in the final model.

#### Split into train and test sets

We want to split the user dataframe into train and test sets, by time. The data up to time $T$ would be used to predict user rating after $T$.

In [15]:
# Sort user dataframe by date
user_df = user_df.sort_values(by=['year', 'month']).reset_index(drop=True)

In [16]:
# Split dataset with shuffle False
# We only need the user name and bgg_id
user_train, user_test = train_test_split(user_df[['bgg_id', 'bgg_user_name']], shuffle=False, test_size=0.2)

In [17]:
user_train.tail(2)

Unnamed: 0,bgg_id,bgg_user_name
7346277,267814,nickri1890
7346278,251412,nickrice


In [18]:
user_test.head(2)

Unnamed: 0,bgg_id,bgg_user_name
7346279,113294,nickster1970
7346280,146021,nickster1970


In [None]:
# Convert train and test into Tensor Datasets
# user_train = user_train.to_dict('records')
# user_train = tf.data.Dataset.from_tensor_slices(user_train)

In [None]:
# user_test = user_test.to_dict('records')
# user_test = tf.data.Dataset.from_tensor_slices(user_test)

In [19]:
# Convert train and test into Tensor Datasets
user_train['bgg_id'] = user_train['bgg_id'].astype(str)
user_test['bgg_id'] = user_test['bgg_id'].astype(str)
user_train = tf.data.Dataset.from_tensor_slices(user_train).map(lambda x: {'bgg_id': x[0], 'bgg_user_name': x[1]})
user_test = tf.data.Dataset.from_tensor_slices(user_test).map(lambda x: {'bgg_id': x[0], 'bgg_user_name': x[1]})

#### Query tower

In [21]:
# Dimensionality of the query
embedding_dimension = 32

# Define the model
user_model = Sequential([
    StringLookup(vocabulary=unique_user, mask_token=None),
    # Additional embedding to account for unknown tokens
    Embedding(len(unique_user) + 1, embedding_dimension)
])

#### Candidate tower

In [None]:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Embedding(1000, 64))
# # The model will take as input an integer matrix of size (batch,
# # input_length), and the largest integer (i.e. word index) in the input
# # should be no larger than 999 (vocabulary size).
# # Now model.output_shape is (None, 10, 64), where `None` is the batch
# # dimension.
# input_array = np.random.randint(1000, size=(32, 10))
# model.compile('rmsprop', 'mse')
# output_array = model.predict(input_array)
# print(output_array.shape)


In [22]:
# Define the model for board game names
bg_model = tf.keras.Sequential([
    StringLookup(vocabulary=unique_bgg_id, mask_token=None),
    Embedding(len(unique_bgg_id) + 1, embedding_dimension)
])

#### Metrics

In the training data, there are positive (bgg_id, bgg_user_name) pairs. To gauge on how good the model is, we need to compare the affinity score that the model calculates for a particular pair to the scores of all the other possible candidates. In other words, the higher the score for the positive pair as compared to other candidates, the more accurate the model is.

We use `FactorizedTopK` metric which requires the dataset of candidates that are used as implicit negatives for evaluation. We are implicitly assuming that if a user did not rate a board game, he/she do not like that board game as much.

In [23]:
type(df['bgg_id'].values.astype(str))

numpy.ndarray

In [24]:
type(tf.data.Dataset.from_tensor_slices(df['bgg_id'].values.astype(str)))

tensorflow.python.data.ops.dataset_ops.TensorSliceDataset

In [25]:
# Convert to Tensor Dataset object
bgg_ids = tf.data.Dataset.from_tensor_slices(df['bgg_id'].values.astype(str))

# The metrics
metrics = tfrs.metrics.FactorizedTopK(
    candidates=bgg_ids.batch(128).map(bg_model)
)

#### Loss

We use the `Retrieval` task object to bundle together the loss function and metric computation. This becomes a Keras layer that takes the embeddings from the two towers as arguments, and returning the computed loss.

In [26]:
# Setting up the task
task = tfrs.tasks.Retrieval(
    metrics=metrics
)

#### Full model

We want to combine all of the above together into a model. We use `tfrs.Model` as the base model which take care of creating the appropriate training loop to fit the model.

In [27]:
# Check the format of train and test datasets
count = 0

for element in user_test.as_numpy_iterator():
    if count <=10:
        print(element)
        count += 1
    else:
        break

{'bgg_id': b'113294', 'bgg_user_name': b'nickster1970'}
{'bgg_id': b'146021', 'bgg_user_name': b'nickster1970'}
{'bgg_id': b'214880', 'bgg_user_name': b'nicktaruffi'}
{'bgg_id': b'39856', 'bgg_user_name': b'nickwatt'}
{'bgg_id': b'92828', 'bgg_user_name': b'nickwatt'}
{'bgg_id': b'218603', 'bgg_user_name': b'nickwatt'}
{'bgg_id': b'246784', 'bgg_user_name': b'nickwatt'}
{'bgg_id': b'247160', 'bgg_user_name': b'nickwatt'}
{'bgg_id': b'24181', 'bgg_user_name': b'nicnied'}
{'bgg_id': b'35497', 'bgg_user_name': b'nicnied'}
{'bgg_id': b'132372', 'bgg_user_name': b'nicnied'}


In [28]:
# Full model
class BGRetrievalModel(tfrs.Model):
    
    def __init__(self, user_model, bg_model):
        super().__init__()
        self.bg_model: tf.keras.Model = bg_model
        self.user_model: tf.keras.Model = user_model
        self.task: tf.keras.layers.Layer = task
    
    def compute_loss(self, features, training=False) -> tf.Tensor:
        # Picking out the user features and passing them into the user model
        # Format of each entry is ['bgg_id', 'bgg_user_name']
        user_embeddings = self.user_model(features['bgg_user_name'])
        
        # Picking out the board games features, passing into bg model
        positive_bg_embeddings = self.bg_model(features['bgg_id'])
        
        # Task computes the loss and the metrics
        return self.task(user_embeddings, positive_bg_embeddings)

### Fitting and Evaluation  

In [29]:
# Instantiate and compile the model
retrieval_model = BGRetrievalModel(user_model, bg_model)
retrieval_model.compile(optimizer=Adagrad(learning_rate=0.1))

In [30]:
# Batch and cache the datasets, did not shuffle to keep time order
cached_user_train = user_train.batch(8192).cache()
cached_user_test = user_test.batch(4096).cache()

In [32]:
# Training the model
tf.random.set_seed(42)
retrieval_model.fit(cached_user_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x13c04e727c0>

In [33]:
# Evaluate the model
retrieval_model.evaluate(cached_user_test, return_dict=True)



{'factorized_top_k/top_1_categorical_accuracy': 0.0009713759645819664,
 'factorized_top_k/top_5_categorical_accuracy': 0.005677431356161833,
 'factorized_top_k/top_10_categorical_accuracy': 0.011128352954983711,
 'factorized_top_k/top_50_categorical_accuracy': 0.04705238714814186,
 'factorized_top_k/top_100_categorical_accuracy': 0.08205077797174454,
 'loss': 11230.853515625,
 'regularization_loss': 0,
 'total_loss': 11230.853515625}

These values tell us whether the true positive is in the top-k retrieved items from the entire candidate set. For example, a top-50 categorical accuracy metric of 0.3 means that 30% of the top 50 retrieved items are true positives.

Comparing the metrics, there is a considerable difference between the train and test accuracies, suggesting that the model has been overfitted. It is common since the model has many parameters. A low top-k accuracy would also suggest that the model is recommending board games to users who already rated those board games.

(Maybe can try regularization to generalize better to unseen data.)

### Predictions

In [34]:
# 
df.head(2)

Unnamed: 0,bgg_id,name,year,game_type,designer,artist,publisher,min_players,max_players,min_age,min_time,max_time,category,mechanic,rank,num_votes,avg_rating,stddev_rating,bayes_rating,complexity
0,3,Samurai,1998,5497,2,11883,"17,133,267,29,7340,7335,41,2973,4617,1391,8291...",2,4,10.0,30.0,60.0,10091035,208020402026284620042002,207.0,14648.0,7.45046,1.18569,7.24774,2.4885
1,9,El Caballero,1998,5497,78,74,2671333,2,4,13.0,90.0,90.0,1020,20802002,2679.0,1374.0,6.46354,1.43462,5.94897,3.1824


In [35]:
# Board game dataset needs to be Tensorflow object
bgg_ids

<TensorSliceDataset shapes: (), types: tf.string>

In [36]:
# Create a model that takes in raw query features
index = tfrs.layers.factorized_top_k.BruteForce(retrieval_model.user_model)

# Recommends a board game out of the entire boardgame dataset
index.index(bgg_ids.batch(128).map(retrieval_model.bg_model), bgg_ids)

<tensorflow_recommenders.layers.factorized_top_k.BruteForce at 0x13c7fb745e0>

In [37]:
# Get recommendation
_, board_games = index(tf.constant(['-johnny-']))
print(f'Recommendations for -johnny-: {board_games[0, :3]}')

Recommendations for -johnny-: [b'255675' b'164265' b'123096']


We are able to successfully recommend top 3 games (number of games arbituarily decided) to a user with the username '-johnny-' based on the trained embeddings for both the query tower and candidate tower. However, we are recommending the board game id right now, and we want to map that to the board game name for it to be more meaningful. 

In [38]:
# Map the predicted bgg_id to the board game name
named_games = []
for bgg_id in board_games[0, :3]:
    named_games.append(bg_mapper[bgg_id.numpy().decode("utf-8")])

print(f'Recommendations for -johnny-: {named_games}')

Recommendations for -johnny-: ['Exit: The Game – The Catacombs of Horror', 'Witness', 'Space Cadets']


In [39]:
# Check if our model is re-recommending the user a game he or she has already played
user_df[user_df['bgg_user_name']=='-johnny-']

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating,year,month,user_count
1,-johnny-,59946,6.0,2015,1,45
2,-johnny-,166384,7.0,2015,1,45
73899,-johnny-,150376,6.0,2015,2,45
132254,-johnny-,150658,6.0,2015,3,45
191533,-johnny-,478,6.0,2015,4,45
191534,-johnny-,103885,5.0,2015,4,45
354534,-johnny-,18833,9.0,2015,7,45
354535,-johnny-,54307,8.0,2015,7,45
1056490,-johnny-,94,8.0,2016,4,45
1056491,-johnny-,26566,4.0,2016,4,45


Observing the board games which the user '-johnny-' has rated, we see that the top 3 recommended games are not within them. This is still a good sign, but it may be just so happened that these 45 entries have no false positives. 

The retrieval model is useful for getting quick recommendations, but it is just based on the board game ids and user ids. This model is usually built to be more computationally efficient to filter out all candidates that the user is not interested in.

## Ranking Model

The ranking model is built to be used in tandem with the retrieval model, taking the outputs from the retrieval model and finetuning them to select the best possible recommendations.

#### Split into train and test sets

The train and test data will now include the user ratings to give a sense of ranking to the recommended board games.

In [40]:
rating_train, rating_test = train_test_split(user_df[['bgg_id', 'bgg_user_name', 'bgg_user_rating']], shuffle=False, test_size=0.2)

In [41]:
# Convert train and test into Tensor Datasets
# tensor_user_train = tf.data.Dataset.from_tensor_slices(rating_train[['bgg_id', 'bgg_user_name']].astype(str))
# tensor_user_test = tf.data.Dataset.from_tensor_slices(rating_test[['bgg_id', 'bgg_user_name']].astype(str))
# tensor_rating_train = tf.data.Dataset.from_tensor_slices(rating_train['bgg_user_rating'].astype('float32'))
# tensor_rating_test = tf.data.Dataset.from_tensor_slices(rating_test['bgg_user_rating'].astype('float32'))

In [56]:
# Convert train and test into Tensor Datasets
rating_train = tf.data.Dataset.from_tensor_slices({'bgg_id': rating_train['bgg_id'].astype(str),
                                                  'bgg_user_name': rating_train['bgg_user_name'],
                                                  'bgg_user_rating': rating_train['bgg_user_rating'].astype('float32')})
rating_test = tf.data.Dataset.from_tensor_slices({'bgg_id': rating_test['bgg_id'].astype(str),
                                                  'bgg_user_name': rating_test['bgg_user_name'],
                                                  'bgg_user_rating': rating_test['bgg_user_rating'].astype('float32')})

In [58]:
rating_train

<TensorSliceDataset shapes: {bgg_id: (), bgg_user_name: (), bgg_user_rating: ()}, types: {bgg_id: tf.string, bgg_user_name: tf.string, bgg_user_rating: tf.float32}>

In [39]:
# Combine
# rating_train = tf.data.Dataset.zip((tensor_user_train, tensor_rating_train))
# rating_test = tf.data.Dataset.zip((tensor_user_test, tensor_rating_test))

In [149]:
# Convert train and test into Tensor Datasets
# rating_train['bgg_id'] = rating_train['bgg_id'].astype(str)
# rating_test['bgg_id'] = rating_test['bgg_id'].astype(str)
# # rating_train['bgg_user_rating'] = rating_train['bgg_user_rating'].astype('float32')
# # rating_test['bgg_user_rating'] = rating_test['bgg_user_rating'].astype('float32')


# rating_train = tf.data.Dataset.from_tensor_slices(rating_train[['bgg_id', 'bgg_user_name']], rating_train['bgg_user_rating'])
# rating_test = tf.data.Dataset.from_tensor_slices(rating_test[['bgg_id', 'bgg_user_name']], rating_test['bgg_user_rating'])

#### Ranking layers

The ranking model is composed of multiple layers for ranking tasks.

In [59]:
# Ranking tasks
class RankingModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # User embeddings
        self.user_embeddings = Sequential([
            StringLookup(vocabulary=unique_user, mask_token=None),
            Embedding(len(unique_user) + 1, embedding_dimension)
        ])

        # Board game embeddings
        self.bg_embeddings = Sequential([
            StringLookup(vocabulary=unique_bgg_id, mask_token=None),
            Embedding(len(unique_bgg_id) + 1, embedding_dimension)
        ])

        # Predictions
        self.ratings = Sequential([
            Dense(256, activation="relu"),
            Dense(64, activation="relu"),
          # Rating predictions in the final layer.
            Dense(1)
        ])
    
    def call(self, inputs):
        bgg_user_name, bgg_id = inputs
        user_embedding = self.user_embeddings(bgg_user_name)
        bg_embedding = self.bg_embeddings(bgg_id)
        return self.ratings(tf.concat([user_embedding, bg_embedding], axis=1))

In [60]:
# This model takes user names and bgg ids, and outputs a predicted rating
RankingModel()((['-johnny-'],['20545']))

Consider rewriting this model with the Functional API.
Consider rewriting this model with the Functional API.


<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.02334006]], dtype=float32)>

In [269]:
# This model takes user names and bgg ids, and outputs a predicted rating
# RankingModel()(tf.convert_to_tensor((['-johnny-'],['20545'])))

<tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.01110805]], dtype=float32)>

#### Loss and metrics

This time, we use the `Ranking` task object to put together the loss function and metric computation. The metrics used is `RootMeanSquaredError`.

In [61]:
# The loss + metrics task
task = tfrs.tasks.Ranking(
    loss = MeanSquaredError(),
    metrics = [RootMeanSquaredError()]
)

#### Full model

We want to combine all of the above together into a model. We use `tfrs.Model` as the base model which take care of creating the appropriate training loop to fit the model.

In [62]:
# Check the format of train and test datasets
count = 0
for element in rating_test.as_numpy_iterator():
    if count <=10:
        print(element)
        count += 1
    else:
        break

{'bgg_id': b'113294', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 7.0}
{'bgg_id': b'146021', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 8.0}
{'bgg_id': b'214880', 'bgg_user_name': b'nicktaruffi', 'bgg_user_rating': 8.0}
{'bgg_id': b'39856', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 7.0}
{'bgg_id': b'92828', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 7.0}
{'bgg_id': b'218603', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 8.0}
{'bgg_id': b'246784', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 8.5}
{'bgg_id': b'247160', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 7.0}
{'bgg_id': b'24181', 'bgg_user_name': b'nicnied', 'bgg_user_rating': 9.5}
{'bgg_id': b'35497', 'bgg_user_name': b'nicnied', 'bgg_user_rating': 4.0}
{'bgg_id': b'132372', 'bgg_user_name': b'nicnied', 'bgg_user_rating': 7.0}


In [63]:
# Full model
class BGRankingModel(tfrs.Model):
    
    def __init__(self):
        super().__init__()
        self.ranking_model: tf.keras.Model = RankingModel()
        # The loss + metrics task
        self.task: Layer = tfrs.tasks.Ranking(
            loss = MeanSquaredError(),
            metrics = [RootMeanSquaredError()]
        )
    
    def compute_loss(self, features, training=False) -> tf.Tensor:
        rating_predictions = self.ranking_model(
            (features['bgg_user_name'], features['bgg_id']))
        
        # Task computes the loss and the metrics
        return self.task(labels=features['bgg_user_rating'], predictions=rating_predictions)

### Fitting and Evaluation

In [64]:
# Instantiate and compile the model
ranking_model = BGRankingModel()
ranking_model.compile(optimizer=Adagrad(learning_rate=0.1))

In [65]:
# Batch and cache the datasets, did not shuffle to keep time order
cached_rating_train = rating_train.batch(81920).cache()
cached_rating_test = rating_test.batch(40960).cache()

In [66]:
# Check the format of train and test datasets
count = 0
for element in rating_test.as_numpy_iterator():
    if count <=2:
        print(element)
        count += 1
    else:
        break

{'bgg_id': b'113294', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 7.0}
{'bgg_id': b'146021', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 8.0}
{'bgg_id': b'214880', 'bgg_user_name': b'nicktaruffi', 'bgg_user_rating': 8.0}


In [67]:
# Training the model
tf.random.set_seed(42)
ranking_model.fit(cached_rating_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x13c48911b20>

In [68]:
# Evaluate the model
ranking_model.evaluate(cached_rating_test, return_dict=True)



{'root_mean_squared_error': 1.3503472805023193,
 'loss': 1.8450074195861816,
 'regularization_loss': 0,
 'total_loss': 1.8450074195861816}

We observe that the rmse and loss are both lower than the train data, there may be some underfitting.

### Predictions

## Deep Deep Model

We want to utilize the vast features we have for the board games.

### Features Preprocessing

#### Lookup vocabulary

We want to define a vocabulary for both board games and users.

In [53]:
# Large number of bins to reduce chance of hash collisions
# num_hashing_bins = 200_000
# bgg_id_hashing = Hashing(num_bins=num_hashing_bins)

In [54]:
# Testing the layer out with arbituary bgg ids
# bgg_id_hashing(['34324', '32432'])

<tf.Tensor: shape=(2,), dtype=int64, numpy=array([124101,  90064], dtype=int64)>

In [71]:
bgg_ids

<TensorSliceDataset shapes: (), types: tf.string>

In [69]:
# Vocabulary for bgg_id
bgg_id_lookup = StringLookup()
bgg_id_lookup.adapt(bgg_ids)

In [70]:
bgg_id_lookup.vocab_size()

7931

In [72]:
# Test the layer out with bgg ids
# Note that bgg_id = 2 is not in the list
bgg_id_lookup(['3', '9', '2'])

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([1814,  227,    1], dtype=int64)>

In [73]:
# Vocabulary for user names
user_lookup = StringLookup()
user_lookup.adapt(tf.data.Dataset.from_tensor_slices(unique_user))

In [74]:
user_lookup.vocab_size()

95598

#### Continuous features

Complexity is an important continuous feature to use in the model. However, the reason it is continuous is that the value reflects the average complexity rating as contributed by the BGG community. We can discretize the complexity to a pre-defined number of buckets, since it is reasonable to assume that board games with very similar complexity will fall within the same range of complexity.

In [75]:
# Standardize the feature
complexity_normalization = Normalization()
complexity_normalization.adapt(tf.data.Dataset.from_tensor_slices(df['complexity']).batch(512))

In [76]:
# Test normalization
complexity_normalization(df['complexity'].values[:5])

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[ 0.39823163],
       [ 1.3151934 ],
       [-0.03692577],
       [-0.6782308 ],
       [ 0.2137556 ]], dtype=float32)>

In [77]:
# Set the number of discretized buckets
# Min complexity is 1, Max is 5
complexity_buckets = np.linspace(1, 5, num=50)

#### Categorical features

Try with game type and category first.

In [78]:
# Custom splitting function
def split_on_comma(input_data):
    return tf.strings.split(input_data, sep=',')

max_tokens = 10_000
# Vectorization of game types
vec_game_types = TextVectorization(max_tokens=max_tokens, standardize=None, split=split_on_comma)
vec_game_types.adapt(tf.data.Dataset.from_tensor_slices(df['game_type']).batch(512))

# Vectorization of categories
vec_categories = TextVectorization(max_tokens=max_tokens, standardize=None, split=split_on_comma)
vec_categories.adapt(tf.data.Dataset.from_tensor_slices(df['category']).batch(512))

# Vectorization of mechanics
vec_mechanics = TextVectorization(max_tokens=max_tokens, standardize=None, split=split_on_comma)
vec_mechanics.adapt(tf.data.Dataset.from_tensor_slices(df['mechanic']).batch(512))

In [79]:
# Testing out the vectorization on game types
for value in tf.constant(df['game_type'].sample(20, random_state=42)):
    tf.print("Game type:", value)
    tf.print("Vectorized game types:", vec_game_types(value))
    print('------------------------------')

Game type: "99999"
Vectorized game types: [2]
------------------------------
Game type: "4665"
Vectorized game types: [9]
------------------------------
Game type: "5496"
Vectorized game types: [6]
------------------------------
Game type: "5496"
Vectorized game types: [6]
------------------------------
Game type: "4664"
Vectorized game types: [5]
------------------------------
Game type: "99999"
Vectorized game types: [2]
------------------------------
Game type: "99999"
Vectorized game types: [2]
------------------------------
Game type: "99999"
Vectorized game types: [2]
------------------------------
Game type: "5496,5498"
Vectorized game types: [6 7]
------------------------------
Game type: "5496"
Vectorized game types: [6]
------------------------------
Game type: "5498"
Vectorized game types: [7]
------------------------------
Game type: "5497"
Vectorized game types: [3]
------------------------------
Game type: "5497"
Vectorized game types: [3]
------------------------------
G

In [80]:
# Testing out the vectorization on categories
for value in tf.constant(df['category'].sample(20, random_state=42)):
    tf.print("Categories:", value)
    tf.print("Vectorized categories:", vec_categories(value))
    print('------------------------------')

Categories: "1002,1010"
Vectorized categories: [2 3]
------------------------------
Categories: "1032,1041,1017,1031"
Vectorized categories: [21 22 6 32]
------------------------------
Categories: "1022,1010,1046,1047"
Vectorized categories: [12 3 5 10]
------------------------------
Categories: "1064,1016,1019"
Vectorized categories: [28 7 4]
------------------------------
Categories: "1023,1102,1019"
Vectorized categories: [13 73 4]
------------------------------
Categories: "1021,1010,1008"
Vectorized categories: [8 3 29]
------------------------------
Categories: "1017,1031"
Vectorized categories: [6 32]
------------------------------
Categories: "1118,1030"
Vectorized categories: [77 9]
------------------------------
Categories: "1002,1028,1037"
Vectorized categories: [2 27 23]
------------------------------
Categories: "1022,1002,1020,1016"
Vectorized categories: [12 2 17 7]
------------------------------
Categories: "1024,1093,1037"
Vectorized categories: [24 35 23]
------------

In [81]:
# Testing out the vectorization on mechanics
for value in tf.constant(df['mechanic'].sample(20, random_state=42)):
    tf.print("Mechanics:", value)
    tf.print("Vectorized mechanics:", vec_mechanics(value))
    print('------------------------------')

Mechanics: "2080,2040"
Vectorized mechanics: [7 2]
------------------------------
Mechanics: "2072"
Vectorized mechanics: [3]
------------------------------
Mechanics: "2001,2046,2023,2072,2011,2028,2819,2015"
Vectorized mechanics: [11 15 10 ... 32 20 4]
------------------------------
Mechanics: "2080,2046,2072,2016,2015"
Vectorized mechanics: [7 15 3 33 4]
------------------------------
Mechanics: "2080,2046,2018,2857,2070"
Vectorized mechanics: [7 15 29 79 18]
------------------------------
Mechanics: "2013,2072,2813,2002,2663,2015,2082"
Vectorized mechanics: [44 3 65 ... 51 4 13]
------------------------------
Mechanics: "99999"
Vectorized mechanics: [31]
------------------------------
Mechanics: "2040"
Vectorized mechanics: [2]
------------------------------
Mechanics: "2023"
Vectorized mechanics: [10]
------------------------------
Mechanics: "2023,2676,2040,2959,2011,2822,2819,2027,2015"
Vectorized mechanics: [10 12 2 ... 20 37 4]
------------------------------
Mechanics: "2893,2

In [82]:
# Use to get the vocabulary for a vectorizer
vec_mechanics.get_vocabulary()[30:35]

['2685', '99999', '2028', '2016', '2689']

#### Split into train and test sets

In [180]:
# Merge dataframes
combined_df = user_df.merge(df, how='left', on='bgg_id')
combined_df.head(10)

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating,year_x,month,user_count,name,year_y,game_type,designer,...,min_time,max_time,category,mechanic,rank,num_votes,avg_rating,stddev_rating,bayes_rating,complexity
0,-=yod@=-,160495,7.5,2015,1,173,ZhanGuo,2014,5497,1229312294,...,60.0,120.0,1050,20802040,509.0,2850.0,7.59899,1.25726,6.86182,3.8333
1,-johnny-,59946,6.0,2015,1,45,Dungeons & Dragons: Castle Ravenloft Board Game,2010,5496,274615913131083944,...,60.0,60.0,102210201010104610241047,2023207226762011202828192015,756.0,8363.0,6.97652,1.43756,6.66732,2.5111
2,-johnny-,166384,7.0,2015,1,45,Spyfall,2014,5498,78199,...,15.0,15.0,10231039107910301081,2073204720282892286628142017,631.0,17970.0,6.89022,1.40803,6.76383,1.2434
3,-mide-,20545,6.0,2015,1,130,Rory's Story Cubes,2005,54985499,6409,...,20.0,20.0,1017,2023207220602027,2375.0,3145.0,6.30261,1.55489,6.00793,1.1329
4,-mide-,145639,7.0,2015,1,130,Coconuts,2013,46655499,69564,...,20.0,20.0,103210891041,26612686,919.0,3630.0,7.03525,1.28321,6.55872,1.0429
5,-toni-,1927,5.0,2015,1,123,Munchkin,2001,5496,22,...,60.0,120.0,1002101010461079,204026862015,4371.0,40956.0,5.90224,1.83025,5.73034,1.8011
6,-toni-,4095,5.0,2015,1,123,Star Munchkin,2002,5496,22,...,90.0,90.0,1002104610791016,20412072268620082015,3103.0,4603.0,6.11641,1.65982,5.86991,1.7437
7,-toni-,12194,5.0,2015,1,123,Munchkin Bites!,2004,54965498,22,...,90.0,90.0,1002104610241079,207220082015,4149.0,2889.0,5.99423,1.6853,5.74758,1.7563
8,-toni-,25071,5.0,2015,1,123,Munchkin Cthulhu,2007,5496,1255222,...,90.0,90.0,1002104610241079,207220082015,2736.0,4914.0,6.2068,1.68922,5.93801,1.7082
9,0b1_ita,4491,7.0,2015,1,250,Cave Troll,2002,54965497,222,...,20.0,60.0,1010,20012080204020782015,1975.0,3590.0,6.40988,1.2544,6.10913,1.9784


In [183]:
# Train/test split
combined_train, combined_test = train_test_split(combined_df[['bgg_id', 'bgg_user_name', 'bgg_user_rating', 'complexity', 'game_type', 'category', 'mechanic']], shuffle=False, test_size=0.2)

In [184]:
# Convert train and test into Tensor Datasets
combined_train = tf.data.Dataset.from_tensor_slices({'bgg_id': combined_train['bgg_id'].astype(str),
                                                     'bgg_user_name': combined_train['bgg_user_name'],
                                                     'bgg_user_rating': combined_train['bgg_user_rating'].astype('float32'),
                                                     'complexity': combined_train['complexity'].astype('float32'),
                                                     'game_type': combined_train['game_type'],
                                                     'category': combined_train['category'],
                                                     'mechanic': combined_train['mechanic']
                                                    })
combined_test = tf.data.Dataset.from_tensor_slices({'bgg_id': combined_test['bgg_id'].astype(str),
                                                    'bgg_user_name': combined_test['bgg_user_name'],
                                                    'bgg_user_rating': combined_test['bgg_user_rating'].astype('float32'),
                                                    'complexity': combined_test['complexity'].astype('float32'),
                                                    'game_type': combined_test['game_type'],
                                                    'category': combined_test['category'],
                                                    'mechanic': combined_test['mechanic']
                                                   })

### Model Assembly

#### Embeddings

Define the embedding layers.

In [83]:
embedding_dimension = 32
# Embedding for bgg_id
bgg_id_embedding = Embedding(input_dim=bgg_id_lookup.vocab_size(),
                            output_dim=embedding_dimension)

# Embedding for user names
user_embedding = Embedding(input_dim=user_lookup.vocab_size(),
                          output_dim=embedding_dimension)

# Embedding for complexity
complexity_embedding = Embedding(input_dim=len(complexity_buckets) + 1,
                                output_dim=embedding_dimension)

# Embeddings for categorial features
game_type_embedding = Embedding(max_tokens, embedding_dimension, mask_zero=True)
category_embedding = Embedding(max_tokens, embedding_dimension, mask_zero=True)
mechanic_embedding = Embedding(max_tokens, embedding_dimension, mask_zero=True)

#### Sub-models

In [232]:
# Model for bgg_id
bgg_id_model = Sequential([bgg_id_lookup, bgg_id_embedding])

# Model for user names
user_name_model = Sequential([user_lookup, user_embedding])

# Model for complexity
complexity_model = Sequential([complexity_embedding, complexity_normalization])

# Model for categorical features
game_type_model = Sequential([vec_game_types, game_type_embedding, GlobalAveragePooling1D()])
category_model = Sequential([vec_categories, category_embedding, GlobalAveragePooling1D()])
mechanic_model = Sequential([vec_mechanics, mechanic_embedding, GlobalAveragePooling1D()])

In [237]:
complexity_model(tf.constant([3]))

<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[-2.9122534, -2.9208996, -2.9456112, -2.9236424, -2.8832736,
        -2.8858867, -2.9428413, -2.8871658, -2.9294155, -2.8896465,
        -2.8897088, -2.8914137, -2.839352 , -2.9458134, -2.8446016,
        -2.9036124, -2.9117532, -2.843088 , -2.9012895, -2.9452565,
        -2.8421667, -2.9218328, -2.8857453, -2.9174237, -2.8760943,
        -2.9517388, -2.8599532, -2.833736 , -2.9268801, -2.9508862,
        -2.9073458, -2.9303496]], dtype=float32)>

In [233]:
game_type_model(tf.constant(['5499']))

<tf.Tensor: shape=(1, 32), dtype=float32, numpy=
array([[ 0.02803346,  0.04313293,  0.04163221,  0.0241234 , -0.01672184,
         0.03980862, -0.01575249,  0.00760558,  0.04948891,  0.01349804,
         0.04079599,  0.01835562, -0.01581039, -0.01299304, -0.0116506 ,
         0.0461771 ,  0.0028049 , -0.02966702, -0.02194635,  0.01861787,
         0.03305216, -0.03270494, -0.01168804, -0.01395692, -0.00685375,
         0.04201272,  0.04385069,  0.00355615, -0.02102757,  0.01313055,
         0.042712  ,  0.00594108]], dtype=float32)>

In [227]:
# Check the format of train and test datasets
count = 0
for element in combined_test:
    if count <=3:
        print(element['game_type'])
        count += 1
    else:
        break

tf.Tensor(b'5499', shape=(), dtype=string)
tf.Tensor(b'5496', shape=(), dtype=string)
tf.Tensor(b'5497', shape=(), dtype=string)
tf.Tensor(b'5498', shape=(), dtype=string)


#### Tasks

We are building a multitask recommender, consisting of a retrieval task and rating task. The rating task predicts the ratings as accurately as possible, the retrieval task predicts which board games the user will play.

In [87]:
# Rating task
rating_task = tfrs.tasks.Ranking(loss = MeanSquaredError(), metrics = [RootMeanSquaredError()])

# Retrieval task
retrieval_task = tfrs.tasks.Retrieval(metrics = tfrs.metrics.FactorizedTopK(
                                        candidates = bgg_ids.batch(256).map(bgg_id_model)))

#### Full model

In [179]:
rating_test

<TensorSliceDataset shapes: {bgg_id: (), bgg_user_name: (), bgg_user_rating: ()}, types: {bgg_id: tf.string, bgg_user_name: tf.string, bgg_user_rating: tf.float32}>

In [177]:
tensor_df.take(1)

<TakeDataset shapes: {3: (), 9: (), 10: (), 11: (), 12: (), 13: (), 14: (), 16: (), 17: (), 25: (), 26: (), 42: (), 44: (), 46: (), 47: (), 48: (), 49: (), 50: (), 51: (), 54: (), 59: (), 60: (), 64: (), 66: (), 68: (), 70: (), 72: (), 73: (), 74: (), 76: (), 83: (), 87: (), 88: (), 89: (), 90: (), 93: (), 94: (), 101: (), 102: (), 105: (), 106: (), 108: (), 111: (), 112: (), 113: (), 116: (), 117: (), 124: (), 125: (), 132: (), 134: (), 135: (), 141: (), 142: (), 146: (), 150: (), 152: (), 153: (), 172: (), 176: (), 178: (), 182: (), 189: (), 190: (), 192: (), 194: (), 200: (), 204: (), 207: (), 210: (), 219: (), 220: (), 222: (), 223: (), 229: (), 232: (), 234: (), 247: (), 256: (), 257: (), 258: (), 259: (), 263: (), 267: (), 268: (), 270: (), 271: (), 272: (), 278: (), 279: (), 285: (), 293: (), 294: (), 295: (), 296: (), 301: (), 321: (), 322: (), 323: (), 337: (), 338: (), 339: (), 340: (), 343: (), 344: (), 348: (), 350: (), 357: (), 359: (), 363: (), 368: (), 371: (), 372: (), 

In [168]:
{k: v for (k, v) in zip(df['bgg_id'].astype(str), tf.constant(df[['complexity', 'game_type', 'category', 'mechanic']].astype(str).values))}['3'][1]

<tf.Tensor: shape=(), dtype=string, numpy=b'5497'>

In [159]:
# Convert board game dataframe to tensor dataset
tensor_df = tf.data.Dataset.from_tensor_slices({k: v for (k, v) in zip(df['bgg_id'].astype(str), tf.constant(df[['complexity', 'game_type', 'category', 'mechanic']].astype(str).values))})

In [195]:
# Check the format of train and test datasets
count = 0
for element in combined_test.as_numpy_iterator():
    if count <=5:
        print(element)
        count += 1
    else:
        break

{'bgg_id': b'113294', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 7.0, 'complexity': 1.4813, 'game_type': b'5499', 'category': b'1022,1017,1072,1020,1037', 'mechanic': b'2023,2072,2882,2676,2011,2661,2870,2831,2035,2819,2002'}
{'bgg_id': b'146021', 'bgg_user_name': b'nickster1970', 'bgg_user_rating': 8.0, 'complexity': 3.3192, 'game_type': b'5496', 'category': b'1022,1010,1046,1024,1093,1097', 'mechanic': b'2001,2023,2072,2078,2028,2819,2853,2015'}
{'bgg_id': b'214880', 'bgg_user_name': b'nicktaruffi', 'bgg_user_rating': 8.0, 'complexity': 4.042, 'game_type': b'5497', 'category': b'1021,1094,1088', 'mechanic': b'2013,2911,2005,2002,2015,2082'}
{'bgg_id': b'39856', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 7.0, 'complexity': 1.2203, 'game_type': b'5498', 'category': b'1002,1079,1030', 'mechanic': b'2020,2027,2866,2017'}
{'bgg_id': b'92828', 'bgg_user_name': b'nickwatt', 'bgg_user_rating': 7.0, 'complexity': 1.1943, 'game_type': b'5498', 'category': b'1002,1079,1030', 'me

In [191]:
# Full model
class BGDeepModel(tfrs.models.Model):
    
    def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
        super().__init__()
        embedding_dimension = 32
        
        # User and board game models
        self.user_model: tf.keras.Model = user_name_model
        self.bgg_id_model: tf.keras.Model = bgg_id_model
        self.complexity_model: tf.keras.Model = complexity_model
        self.game_type_model: tf.keras.Model = game_type_model
        self.category_model: tf.keras.Model = category_model
        self.mechanic_model: tf.keras.Model = mechanic_model
            
        # Model to take in user and board game embeddings and predict ratings
        self.rating_model = Sequential([
            Dense(256, activation="relu"),
            Dense(128, activation="relu"),
            Dense(1)
        ])
        
        # Tasks
        self.rating_task: Layer = rating_task
        self.retrieval_task: Layer = retrieval_task
        
        # Loss weights
        self.rating_weight = rating_weight
        self.retrieval_weight = retrieval_weight
        
    def call(self, features) -> tf.Tensor:
        user_embeddings = self.user_model(features['bgg_user_name'])
        bgg_id_embeddings = self.bgg_id_model(features['bgg_id'])
#         complexity_embeddings = self.complexity_model(features['complexity'])
#         game_type_embeddings = self.game_type_model(features['game_type'])
#         category_embeddings = self.category_model(features['category'])
#         mechanic_embeddings = self.mechanic_model(features['mechanic'])
        
        return (
            user_embeddings,
            bgg_id_embeddings,
#             complexity_embeddings,
#             game_type_embeddings,
#             category_embeddings,
#             mechanic_embeddings,
            self.rating_model(tf.concat([user_embeddings, bgg_id_embeddings], axis=1)) )
#                                          complexity_embeddings, 
#                                         game_type_embeddings, category_embeddings, mechanic_embeddings], axis=1))
#         )
    
    def compute_loss(self, features, training=False) -> tf.Tensor:
        ratings = features['bgg_user_rating']
        user_embeddings, bgg_id_embeddings, rating_predictions = self(features)
        # , complexity_embeddings, game_type_embeddings, category_embeddings, mechanic_embeddings,
        
        # Compute loss for each task
        rating_loss = self.rating_task(labels=ratings, predictions=rating_predictions)
        retrieval_loss = self.retrieval_task(user_embeddings, bgg_id_embeddings)
#                                              , complexity_embeddings, 
#                                              game_type_embeddings, category_embeddings, mechanic_embeddings)
        
        #Combine them using the loss weights
        return (self.rating_weight * rating_loss + self.retrieval_weight * retrieval_loss)

In [186]:
rating_model = Sequential([
            Dense(256, activation="relu"),
            Dense(128, activation="relu"),
            Dense(1)
        ])

In [340]:
bgg_id_model(tf.constant('113294'))

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([-0.00334172,  0.02192419, -0.01051096,  0.02832507,  0.03233638,
       -0.0034107 ,  0.00763838,  0.03502926,  0.03369998,  0.04669107,
       -0.00776196, -0.01835135, -0.02277973,  0.02331457,  0.00459262,
       -0.02693508,  0.01650825,  0.03039703, -0.0297819 , -0.02987233,
        0.01327718, -0.04601475,  0.00446068,  0.00320767,  0.0198052 ,
       -0.04767374, -0.03472478,  0.03473499, -0.02685974,  0.00478349,
        0.01015041, -0.01284589], dtype=float32)>

In [349]:
complexity_model(df.loc[df['bgg_id'].isin([tf.strings.to_number(tf.constant('113294'), 'int64')]), 'complexity'].values)[0]

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([-2.9008753, -2.9267635, -2.8363547, -2.9385195, -2.907334 ,
       -2.8450298, -2.8425698, -2.9483438, -2.9238315, -2.8313367,
       -2.8765688, -2.8690014, -2.899426 , -2.8613377, -2.891081 ,
       -2.9402444, -2.858494 , -2.943524 , -2.8726249, -2.9515543,
       -2.841623 , -2.9038723, -2.9121706, -2.8895187, -2.9417942,
       -2.8508327, -2.9225035, -2.8357642, -2.8828928, -2.8638933,
       -2.887265 , -2.83921  ], dtype=float32)>

### Fitting and Evaluation

In [196]:
# Instantiate and compile model
deep_model = BGDeepModel(rating_weight=1, retrieval_weight=1)
deep_model.compile(optimizer=Adagrad(0.1))

In [200]:
# Batch and cache the datasets, did not shuffle to keep time order
cached_combined_train = combined_train.batch(16_384).cache()
cached_combined_test = combined_test.batch(8192).cache()

In [201]:
# Training the model
tf.random.set_seed(42)
deep_model.fit(cached_combined_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x13c50a43c70>

In [None]:
# Evaluate

## TESTING

In [242]:
# User model
class UserModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.user_embedding: tf.keras.Model = user_name_model
        
    def call(self, inputs):
        
        return tf.concat([
            self.user_embedding(inputs['bgg_user_name'])
        ], axis=1)

In [243]:
# Query model
class QueryModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.embedding_model = UserModel()
        self.dense_layers = Sequential([
            Dense(64, activation="relu"),
            Dense(32)
        ])
    
    def call(self, inputs):
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

In [270]:
# Board game model
class BoardGameModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        
        self.bgg_id_embedding: tf.keras.Model = bgg_id_model
        self.complexity_embedding: tf.keras.Model = complexity_model
            
    def call(self, inputs):
        return tf.concat([
            self.bgg_id_embedding(inputs['bgg_id']),
            self.complexity_embedding(inputs['complexity'])
        ], axis=1)

In [276]:
BoardGameModel({'bgg_id': '3', 'complexity': 4.32})

TypeError: __init__() takes 1 positional argument but 2 were given

In [271]:
# Candidate model
class CandidateModel(tf.keras.Model):
    
    def __init__(self):
        super().__init__()
        self.embedding_model = BoardGameModel()
        self.dense_layers = Sequential([
            Dense(64, activation="relu"),
            Dense(32)
        ])
    
    def call(self, inputs):
        feature_embedding = self.embedding_model(inputs)
        return self.dense_layers(feature_embedding)

In [258]:
tensor_candidates = tf.data.Dataset.from_tensor_slices({'bgg_id': df['bgg_id'].astype(str),
                                                        'complexity': df['complexity'].astype('float32')
                                                       })

In [269]:
BoardGameModel(('3', 2.4))

TypeError: __init__() takes 1 positional argument but 2 were given

In [272]:
tensor_candidates.map(CandidateModel())



ValueError: in user code:

    <ipython-input-271-9f1b902dd0ad>:13 call  *
        feature_embedding = self.embedding_model(inputs)
    <ipython-input-270-fbacc1807aae>:13 call  *
        self.complexity_embedding(inputs['complexity'])
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py:1654 concat
        return gen_array_ops.concat_v2(values=values, axis=axis, name=name)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py:1220 concat_v2
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py:742 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\func_graph.py:591 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:3477 _create_op_internal
        ret = Operation(
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1974 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Shape must be rank 1 but is rank 2 for '{{node candidate_model_1/board_game_model_1/concat}} = ConcatV2[N=2, T=DT_FLOAT, Tidx=DT_INT32](candidate_model_1/board_game_model_1/sequential_28/embedding_6/embedding_lookup/Identity_1, candidate_model_1/board_game_model_1/sequential_30/normalization/truediv, candidate_model_1/board_game_model_1/concat/axis)' with input shapes: [32], [32,1], [].


In [262]:
for x in tensor_candidates:
    print(x)

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'3'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.4885>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'9'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.1824>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'10'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.1592>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'11'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6739>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'12'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3489>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'13'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3235>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'14'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8619>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'16'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, n

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2521'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6957>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2533'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2536'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7021>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2539'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5694>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2541'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0597>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2542'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7463>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2543'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5385>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'2555'>, 'complexity': <tf.Tensor: shape=()

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'7950'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.3333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'7976'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6154>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'7989'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'8003'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.52>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'8017'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'8045'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.5105>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'8049'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'8051'>, 'complexity': <tf.Tensor: shape=(), dtype=flo

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'16986'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5353>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'16991'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5342>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'16992'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2368>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'17022'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=4.3667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'17025'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.1964>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'17027'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.1078>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'17030'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5294>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'17053'>, 'complexity': <tf.Tensor: 

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26235'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8841>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26250'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.7333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26305'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.4>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26315'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2222>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26372'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.7857>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26472'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2254>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26474'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.2222>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'26566'>, 'complexity': <tf.Tensor: sha

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38707'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.6>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38735'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5833>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38749'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.375>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38765'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.7225>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38778'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.9398>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38786'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.9747>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38797'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'38821'>, 'complexity': <tf.Tensor: shape=(

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55250'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55253'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8411>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55315'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.3043>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55427'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5425>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55600'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55601'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4189>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55670'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.1366>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'55679'>, 'complexity': <tf.Tensor: shape=

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73369'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.1144>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73439'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.4571>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73472'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.1429>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73538'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.418>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73543'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2308>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73650'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.1034>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73655'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.4286>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'73664'>, 'complexity': <tf.Tensor: s

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98527'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.0435>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98529'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0909>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98739'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2105>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98762'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8723>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98778'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6999>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'98918'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=4.3643>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'99078'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.8846>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'99081'>, 'complexity': <tf.Tensor: 

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118063'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0987>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118174'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.4359>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118177'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8182>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118215'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118247'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0769>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118293'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118337'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.8723>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'118385'>, 'complexity': <tf.Tensor

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'131904'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5588>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132018'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.2553>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132028'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.2647>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132229'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132251'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4222>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132326'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.2>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132372'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0685>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'132416'>, 'complexity': <tf.Ten

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147505'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6154>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147563'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.1429>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147568'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.9048>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147614'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6154>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147623'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7727>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147624'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6875>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147707'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.9259>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'147716'>, 'complexity': <tf.

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160432'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160434'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2727>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160436'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.375>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160477'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7054>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160495'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.8333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160499'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8679>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160524'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2857>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'160559'>, 'complexity': <tf.T

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170477'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170537'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2222>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170561'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.9798>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170568'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170587'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.129>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170604'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.6854>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170624'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.4257>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'170756'>, 'complexity': <tf.Tensor:

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178570'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.1296>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178591'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6429>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178613'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7632>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178655'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.7143>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178688'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178754'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178835'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7647>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'178839'>, 'complexity': <tf.

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189453'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2083>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189504'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189506'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.4167>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189628'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189660'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5909>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189664'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189829'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.75>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'189848'>, 'complexity': <tf.Tensor: 

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197070'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5469>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197071'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197072'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.7333>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197101'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.75>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197178'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197269'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197285'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'197320'>, 'complexity': <tf.Tensor: 

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206091'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.1429>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206150'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3793>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206156'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2812>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206169'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2857>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206175'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.087>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206206'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.75>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206266'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.7692>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'206327'>, 'complexity': <tf.Ten

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'219832'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=3.2609>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220133'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220141'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220155'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3158>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220193'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4286>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220203'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220224'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5455>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'220258'>, 'complexity': <tf.Tensor: s

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230251'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230253'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8654>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230262'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230265'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.25>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230266'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230267'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.9153>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230273'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'230275'>, 'complexity': <tf.Tensor: shape=

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241491'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241590'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241724'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2121>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241760'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.6>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241796'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241829'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5556>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241831'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.1698>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'241987'>, 'complexity': <tf.Tensor: shap

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253284'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.2903>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253344'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.3968>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253368'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4286>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253372'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.9091>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253379'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.4>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253396'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253398'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.9545>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'253470'>, 'complexity': <tf.Tensor

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265256'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0667>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265260'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.875>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265285'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265381'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265399'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265402'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.7273>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265430'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'265524'>, 'complexity': <tf.Tensor: shape

{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284639'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.0>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284665'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.5>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284742'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.6>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284751'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2857>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284760'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284775'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=2.2143>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284777'>, 'complexity': <tf.Tensor: shape=(), dtype=float32, numpy=1.8571>}
{'bgg_id': <tf.Tensor: shape=(), dtype=string, numpy=b'284818'>, 'complexity': <tf.Tensor: shap

In [None]:
# Full model
class BGDeepModel3(tfrs.models.Model):
    
    def __init__(self):
        super().__init__()
        self.query_model = QueryModel()
        self.candidate_model = CandidateModel()
        self.retrieval_task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=
            )
        )
        
        
        movies.batch(128).map(self.candidate_model)

In [257]:
combined_df.head()

Unnamed: 0,bgg_user_name,bgg_id,bgg_user_rating,year_x,month,user_count,name,year_y,game_type,designer,...,min_time,max_time,category,mechanic,rank,num_votes,avg_rating,stddev_rating,bayes_rating,complexity
0,-=yod@=-,160495,7.5,2015,1,173,ZhanGuo,2014,5497,1229312294,...,60.0,120.0,1050,20802040,509.0,2850.0,7.59899,1.25726,6.86182,3.8333
1,-johnny-,59946,6.0,2015,1,45,Dungeons & Dragons: Castle Ravenloft Board Game,2010,5496,274615913131083944,...,60.0,60.0,102210201010104610241047,2023207226762011202828192015,756.0,8363.0,6.97652,1.43756,6.66732,2.5111
2,-johnny-,166384,7.0,2015,1,45,Spyfall,2014,5498,78199,...,15.0,15.0,10231039107910301081,2073204720282892286628142017,631.0,17970.0,6.89022,1.40803,6.76383,1.2434
3,-mide-,20545,6.0,2015,1,130,Rory's Story Cubes,2005,54985499,6409,...,20.0,20.0,1017,2023207220602027,2375.0,3145.0,6.30261,1.55489,6.00793,1.1329
4,-mide-,145639,7.0,2015,1,130,Coconuts,2013,46655499,69564,...,20.0,20.0,103210891041,26612686,919.0,3630.0,7.03525,1.28321,6.55872,1.0429


In [246]:
df.head()

Unnamed: 0,bgg_id,name,year,game_type,designer,artist,publisher,min_players,max_players,min_age,min_time,max_time,category,mechanic,rank,num_votes,avg_rating,stddev_rating,bayes_rating,complexity
0,3,Samurai,1998,5497,2,11883,"17,133,267,29,7340,7335,41,2973,4617,1391,8291...",2,4,10.0,30.0,60.0,10091035,208020402026284620042002,207.0,14648.0,7.45046,1.18569,7.24774,2.4885
1,9,El Caballero,1998,5497,78,74,2671333,2,4,13.0,90.0,90.0,1020,20802002,2679.0,1374.0,6.46354,1.43462,5.94897,3.1824
2,10,Elfenland,1998,5499,9,74,826768181885233953,2,6,10.0,60.0,60.0,10101097,2041204020812078,1016.0,7989.0,6.71382,1.25368,6.50146,2.1592
3,11,Bohnanza,1997,5499,10,"28004,44242,12035,11507,11901,65041,308,12123,...","8,267,46980,7162,2378,6818,8845,155,5530,6214,...",2,7,13.0,45.0,45.0,100210131026,20402981291520042008,446.0,37752.0,7.03672,1.29093,6.93425,1.6739
4,12,Ra,1999,5497,2,2078911883,"9,34,28620,267,29,23205,2973,8291,9881,42294,3...",2,5,12.0,45.0,60.0,10501082,201229232928292226612004,177.0,18975.0,7.47044,1.33831,7.31246,2.3489


In [254]:
combined_train

<TensorSliceDataset shapes: {bgg_id: (), bgg_user_name: (), bgg_user_rating: (), complexity: (), game_type: (), category: (), mechanic: ()}, types: {bgg_id: tf.string, bgg_user_name: tf.string, bgg_user_rating: tf.float32, complexity: tf.float32, game_type: tf.string, category: tf.string, mechanic: tf.string}>

In [238]:
# Full model
class BGDeepModel2(tfrs.models.Model):
    
    def __init__(self, rating_weight: float, retrieval_weight: float) -> None:
        super().__init__()
        embedding_dimension = 32
        
        # User and board game models
        self.user_model: tf.keras.Model = user_name_model
        self.bgg_id_model: tf.keras.Model = bgg_id_model
        self.complexity_model: tf.keras.Model = complexity_model
        self.game_type_model: tf.keras.Model = game_type_model
        self.category_model: tf.keras.Model = category_model
        self.mechanic_model: tf.keras.Model = mechanic_model
            
        # Model to take in user and board game embeddings and predict ratings
        self.rating_model = Sequential([
            Dense(256, activation="relu"),
            Dense(128, activation="relu"),
            Dense(1)
        ])
        
        # Tasks
        self.rating_task: Layer = rating_task
        self.retrieval_task: Layer = retrieval_task
        
        # Loss weights
        self.rating_weight = rating_weight
        self.retrieval_weight = retrieval_weight
        
    def call(self, features) -> tf.Tensor:
        user_embeddings = self.user_model(features['bgg_user_name'])
        bgg_id_embeddings = self.bgg_id_model(features['bgg_id'])
#         complexity_embeddings = self.complexity_model(features['complexity'])
        game_type_embeddings = self.game_type_model(features['game_type'])
        category_embeddings = self.category_model(features['category'])
        mechanic_embeddings = self.mechanic_model(features['mechanic'])
        
        return (
            user_embeddings,
            bgg_id_embeddings,
#             complexity_embeddings,
            game_type_embeddings,
            category_embeddings,
            mechanic_embeddings,
            self.rating_model(tf.concat([user_embeddings, bgg_id_embeddings, #complexity_embeddings, 
                                        game_type_embeddings, category_embeddings, mechanic_embeddings], axis=1))
        )
    
    def compute_loss(self, features, training=False) -> tf.Tensor:
        ratings = features['bgg_user_rating']
        user_embeddings, bgg_id_embeddings, game_type_embeddings, category_embeddings, mechanic_embeddings, rating_predictions = self(features)
        #  complexity_embeddings,
        
        # Compute loss for each task
        rating_loss = self.rating_task(labels=ratings, predictions=rating_predictions)
        retrieval_loss = self.retrieval_task(user_embeddings, bgg_id_embeddings, #complexity_embeddings, 
                                             game_type_embeddings, category_embeddings, mechanic_embeddings)
        
        #Combine them using the loss weights
        return (self.rating_weight * rating_loss + self.retrieval_weight * retrieval_loss)

In [239]:
# Instantiate and compile model
deep_model_test = BGDeepModel2(rating_weight=1, retrieval_weight=1)
deep_model_test.compile(optimizer=Adagrad(0.1))

In [240]:
# Training the model
tf.random.set_seed(42)
deep_model_test.fit(cached_combined_train, epochs=3)

Epoch 1/3


ValueError: in user code:

    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training.py:806 train_function  *
        return step_function(self, iterator)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow_recommenders\tasks\retrieval.py:149 call  *
        loss = self._loss(y_true=labels, y_pred=scores, sample_weight=sample_weight)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:149 __call__  **
        losses = ag_call(y_true, y_pred)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:253 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\keras\losses.py:1535 categorical_crossentropy
        return K.categorical_crossentropy(y_true, y_pred, from_logits=from_logits)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\util\dispatch.py:201 wrapper
        return target(*args, **kwargs)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\keras\backend.py:4687 categorical_crossentropy
        target.shape.assert_is_compatible_with(output.shape)
    C:\Users\riche\anaconda3\lib\site-packages\tensorflow\python\framework\tensor_shape.py:1134 assert_is_compatible_with
        raise ValueError("Shapes %s and %s are incompatible" % (self, other))

    ValueError: Shapes (None, None) and (32, None, 32) are incompatible


In [323]:
cached_rating_test

<CacheDataset shapes: ((None, 2), (None,)), types: (tf.string, tf.float32)>

In [55]:
df.head()

Unnamed: 0,bgg_id,name,year,game_type,designer,artist,publisher,min_players,max_players,min_age,min_time,max_time,category,mechanic,rank,num_votes,avg_rating,stddev_rating,bayes_rating,complexity
0,3,Samurai,1998,5497,2,11883,"17,133,267,29,7340,7335,41,2973,4617,1391,8291...",2,4,10.0,30.0,60.0,10091035,208020402026284620042002,207.0,14648.0,7.45046,1.18569,7.24774,2.4885
1,9,El Caballero,1998,5497,78,74,2671333,2,4,13.0,90.0,90.0,1020,20802002,2679.0,1374.0,6.46354,1.43462,5.94897,3.1824
2,10,Elfenland,1998,5499,9,74,826768181885233953,2,6,10.0,60.0,60.0,10101097,2041204020812078,1016.0,7989.0,6.71382,1.25368,6.50146,2.1592
3,11,Bohnanza,1997,5499,10,"28004,44242,12035,11507,11901,65041,308,12123,...","8,267,46980,7162,2378,6818,8845,155,5530,6214,...",2,7,13.0,45.0,45.0,100210131026,20402981291520042008,446.0,37752.0,7.03672,1.29093,6.93425,1.6739
4,12,Ra,1999,5497,2,2078911883,"9,34,28620,267,29,23205,2973,8291,9881,42294,3...",2,5,12.0,45.0,60.0,10501082,201229232928292226612004,177.0,18975.0,7.47044,1.33831,7.31246,2.3489


In [181]:
df.loc[df['bgg_id'].isin(np.array([3,9])), 'complexity'].values

array([2.4885, 3.1824])

In [179]:
np.array(['42','43']).astype(int)

array([42, 43])

In [162]:
test = tf.constant('42').numpy().decode("utf-8")

In [163]:
type(test)

str

In [257]:
def numpy_arr_int(x):
    return np.array(x)

In [240]:
np.array(tf.reshape(tf.constant(['3','4','4','5']), [-1]))

array([b'3', b'4', b'4', b'5'], dtype=object)

In [268]:
df.loc[df['bgg_id'].isin([113294]), 'complexity']

2867    1.4813
Name: complexity, dtype: float64

In [308]:
tf.print(rating_test)

<ZipDataset shapes: ((2,), ()), types: (tf.string, tf.float32)>


In [354]:
count = 0
for element in cached_rating_test:
    if count <=10:
        test = BGDeepModel(rating_weight=1, retrieval_weight=1)(element)
        print(type(test))
        print(test)
#         print(df.loc[df['bgg_id'].isin([test]), 'complexity'].values)
        print('----------')
        count += 1
    else:
        break

InvalidArgumentError: ConcatOp : Expected concatenating dimensions in the range [-1, 1), but got 1 [Op:ConcatV2] name: concat

In [328]:
count = 0
for element in cached_rating_test:
    if count <=10:
        test = user_model(element[0][0][1])
        print(type(test))
        print(test)
#         print(df.loc[df['bgg_id'].isin([test]), 'complexity'].values)
        print('----------')
        count += 1
    else:
        break

<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[-0.00052718  0.0358969  -0.04152752  0.03461525 -0.03203517  0.00822916
 -0.04941098  0.00573819  0.01563492  0.00611695 -0.00887588 -0.01840105
 -0.00715303  0.00944015  0.02458178  0.04004807 -0.02326866 -0.03996634
 -0.0395707  -0.04318609  0.03783612  0.03712589  0.01163585 -0.03573905
  0.02580333 -0.04680852 -0.00112073 -0.00693706 -0.00046236  0.02940014
 -0.01455249 -0.0289237 ], shape=(32,), dtype=float32)
----------
<class 'tensorflow.python.framework.ops.EagerTensor'>
tf.Tensor(
[-0.00154072 -0.00616825  0.0185701  -0.04434942 -0.04417172  0.04234744
 -0.00449486 -0.03945597 -0.03224485  0.03075543  0.012307   -0.01006582
  0.00221311 -0.02897896  0.01617697 -0.04119324  0.00550137 -0.01469414
  0.03515703 -0.01047357 -0.00142251  0.02032924 -0.02711805  0.03532335
 -0.00902539  0.02516544 -0.00799586 -0.00352271  0.00202223  0.01303159
 -0.04056958  0.01918464], shape=(32,), dtype=float32)
----------
<class '

In [210]:
# Check the format of train and test datasets
# df.loc[df['bgg_id'].isin(features[0][0].numpy().decode("utf-8").astype(int)), 'complexity'].values
count = 0
for element in rating_test.as_numpy_iterator():
    if count <=10:
        test = int(element[0][0].decode("utf-8"))
        print(type(test))
        print(test)
        print(df.loc[df['bgg_id'].isin([test]), 'complexity'].values)
        print('----------')
        count += 1
    else:
        break

<class 'int'>
113294
[1.4813]
----------
<class 'int'>
146021
[3.3192]
----------
<class 'int'>
214880
[4.042]
----------
<class 'int'>
39856
[1.2203]
----------
<class 'int'>
92828
[1.1943]
----------
<class 'int'>
218603
[2.2785]
----------
<class 'int'>
246784
[2.2429]
----------
<class 'int'>
247160
[1.]
----------
<class 'int'>
24181
[3.571]
----------
<class 'int'>
35497
[1.4795]
----------
<class 'int'>
132372
[2.0685]
----------


We want to explore more models which are able to utilize the rich features which our datasets possess, also to give better recommendations.

## Content-based Recommender

In content-based filtering, the features of the dataframe are broken down into "feature baskets". These are the characteristics that represent a board game. The main idea is that if the user likes certain categories, mechanics, or types of a certain board game, then it is likely the user likes another board game that has similar characteristics. 

In [None]:
df.shape

In [None]:
df.head()

In [None]:
test = tuple(df['bgg_id'])[:3]