In [None]:
import pprint

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import os
import tempfile

from typing import Dict, Text



In [None]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q tensorflow-ranking

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.4/150.4 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import tensorflow_recommenders as tfrs

In [None]:
#create business data set
import os
import json
import pandas as pd
os.listdir ()
bcols = ['business_id']
bdata = []
bfile_name = './yelpdata/business.csv'

with open(bfile_name, encoding='latin-1') as f:
    f.readline ()
    for line in f:
        line =line.rstrip ()
        doc = line.split (",")
        lst = [doc[1]]
        bdata.append(lst)

bdf = pd.DataFrame(data=bdata, columns=bcols)
bdf.dtypes

business_id    object
dtype: object

In [None]:
# get unique business id
bds = tf.data.Dataset.from_tensor_slices(dict(bdf))

business = bds.map(lambda x: {
    "business_id": x["business_id"]
})

unique_business_ids = np.unique(np.concatenate(list(business.batch(1_000).map(
    lambda x: x["business_id"]))))

In [None]:
#create review dataset
import os
import json
os.listdir ()
cols = ['user_id', 'business_id', 'stars']
data = []
file_name = './yelpdata/review.csv'

with open(file_name, encoding='latin-1') as f:
    f.readline ()
    for line in f:
        line =line.rstrip ()
        doc = line.split (",")
        lst = [doc[1], doc[2], doc[3]]
        data.append(lst)

df = pd.DataFrame(data=data, columns=cols)
df['stars'] = df['stars'].astype(np.float32)
df = df.head (200000)
df.head ()

Unnamed: 0,user_id,business_id,stars
0,XQfwVwDr-v0ZS3_CbbE5Xw,mh_-eMZ6K5RLWhZyISBhwA,3.0
1,7ATYjTIgM3jUlt4UM3IypQ,OyoGAe7OKpv6SyGZT5g77Q,5.0
2,YjUWPpI6HXG530lwP-fb2A,8g_iMtfSiwikVnbP2etR0A,3.0
3,kxX2SOes4o-D3ZQBkiMRfA,_7bHUi9Uuf5__HHc_Q8guQ,5.0
4,e4Vwtrqf-wpJfwesgvdgxQ,bcjbaE6dDog4jkNY91ncLQ,4.0


In [None]:
df.head()

Unnamed: 0,user_id,business_id,stars
0,XQfwVwDr-v0ZS3_CbbE5Xw,mh_-eMZ6K5RLWhZyISBhwA,3.0
1,7ATYjTIgM3jUlt4UM3IypQ,OyoGAe7OKpv6SyGZT5g77Q,5.0
2,YjUWPpI6HXG530lwP-fb2A,8g_iMtfSiwikVnbP2etR0A,3.0
3,kxX2SOes4o-D3ZQBkiMRfA,_7bHUi9Uuf5__HHc_Q8guQ,5.0
4,e4Vwtrqf-wpJfwesgvdgxQ,bcjbaE6dDog4jkNY91ncLQ,4.0


In [None]:
#conver df to mapdata set in tensorflow
ds = tf.data.Dataset.from_tensor_slices(dict(df))

ratings = ds.map(lambda x: {
    "business_id": x["business_id"],
    "user_id": x["user_id"],
    "user_rating": tf.cast(x['stars'], tf.float32),
})


In [None]:
unique_user_ids = np.unique(np.concatenate(list(ratings.batch(1_000).map(
    lambda x: x["user_id"]))))

In [None]:
tf.random.set_seed(42)

# Split between train and tests sets, as before.
#total=6990280
#traincount=5560000
#testcount=1430280

total=200000
traincount=160000
testcount=40000

shuffled = ratings.shuffle(total, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(traincount)
test = shuffled.skip(traincount).take(testcount)


In [None]:
for example in test.take(1):
  pprint.pprint(example)

{'business_id': <tf.Tensor: shape=(), dtype=string, numpy=b'zLdJ7ByekqMA2_ApDcOwVQ'>,
 'user_id': <tf.Tensor: shape=(), dtype=string, numpy=b'plobBsTtVUODb353xxFT_g'>,
 'user_rating': <tf.Tensor: shape=(), dtype=float32, numpy=5.0>}


In [None]:
class RankingModel(tf.keras.Model):

  def __init__(self):
    super().__init__()
    embedding_dimension = 32

    # Compute embeddings for users.
    self.user_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_user_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
    ])

    # Compute embeddings for business.
    self.business_embeddings = tf.keras.Sequential([
      tf.keras.layers.StringLookup(
        vocabulary=unique_business_ids, mask_token=None),
      tf.keras.layers.Embedding(len(unique_business_ids) + 1, embedding_dimension)
    ])

    # Compute predictions.
    self.ratings = tf.keras.Sequential([
      # Learn multiple dense layers.
      tf.keras.layers.Dense(256, activation="relu"),
      tf.keras.layers.Dense(64, activation="relu"),
      # Make rating predictions in the final layer.
      tf.keras.layers.Dense(1)
  ])

  def call(self, inputs):

    user_id, business_id = inputs

    user_embedding = self.user_embeddings(user_id)
    business_embedding = self.business_embeddings(business_id)

    return self.ratings(tf.concat([user_embedding, business_embedding], axis=1))

In [None]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [None]:
class YelpBModel(tfrs.models.Model):

  def __init__(self):
    super().__init__()
    self.ranking_model: tf.keras.Model = RankingModel()
    self.task: tf.keras.layers.Layer = tfrs.tasks.Ranking(
      loss = tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()]
    )

  def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
    return self.ranking_model(
        (features["user_id"], features["business_id"]))

  def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
    labels = features.pop("user_rating")

    rating_predictions = self(features)

    # The task computes the loss and the metrics.
    return self.task(labels=labels, predictions=rating_predictions)

In [None]:
model = YelpBModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [None]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()
model.fit(cached_train, epochs=6)
model.evaluate(cached_test, return_dict=True)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


{'root_mean_squared_error': 1.3369395732879639,
 'loss': 1.8422843217849731,
 'regularization_loss': 0,
 'total_loss': 1.8422843217849731}

In [None]:
test_ratings = {}
test_business_id = [b'zLdJ7ByekqMA2_ApDcOwVQ']
for business_id in test_business_id:
  test_ratings[business_id] = model({
      "user_id": np.array([b'plobBsTtVUODb353xxFT_g']),
      "business_id": np.array([business_id])
  })

print("Ratings:")
for business_id, score in sorted(test_ratings.items(), key=lambda x: x[1], reverse=True):
  print(f"{business_id}: {tf.round(score)}")

Ratings:
b'zLdJ7ByekqMA2_ApDcOwVQ': [[4.]]


In [None]:

tf.saved_model.save(model, "drive/MyDrive/aimldatafiles/businessexport")



In [None]:
import tensorflow as tf
import numpy as np

loaded = tf.saved_model.load("drive/MyDrive/aimldatafiles/businessexport")

loaded({"user_id": np.array([b'plobBsTtVUODb353xxFT_g']), "business_id": [b'zLdJ7ByekqMA2_ApDcOwVQ']}).numpy()

array([[3.8969119]], dtype=float32)