In [927]:
from supabase import create_client, Client
import pandas as pd
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import StandardScaler
import numpy as np
import h3
import datetime

# Initialize Supabase client
url = "SUPABSE_URL"
key = "SUPABASE_KEY"

supabase: Client = create_client(url, key)

# Query user data
user_data = supabase.table("users").select("*").execute()
users_df = pd.DataFrame(user_data.data)

# Query post data
post_data = supabase.table("posts").select("*").execute()
posts_df = pd.DataFrame(post_data.data)

# Query post data
collab_data = supabase.table("active_collabs").select("title, owner_uid, team_uids").execute()
collabs_df = pd.DataFrame(collab_data.data)

#need to assign titles of collabs to all of their corresponding users' profiles in users_df
# Ensure 'team_uids' is a list (if it's not already in that format)
collabs_df['team_uids'] = collabs_df['team_uids'].apply(lambda x: x if isinstance(x, list) else [x])

# Create a dictionary to store all the collabs titles for each user
user_collabs = {}

# Iterate through each row in collabs_df to map collab titles to users
for _, row in collabs_df.iterrows():
    collab_title = row['title']
    owner_uid = row['owner_uid']
    team_uids = row['team_uids']

    # Add the collab title to the owner's list of collaborations
    if owner_uid not in user_collabs:
        user_collabs[owner_uid] = []
    user_collabs[owner_uid].append(collab_title)
    
    # Add the collab title to each team member's list of collaborations
    for uid in team_uids:
        if uid not in user_collabs:
            user_collabs[uid] = []
        user_collabs[uid].append(collab_title)

# Create a new column in users_df for the collab titles
users_df['project_titles'] = users_df['uid'].map(user_collabs)

# Fill any NaN values (users without collaborations) with an empty list
users_df['project_titles'] = users_df['project_titles'].apply(lambda x: x if isinstance(x, list) else [])

# Query interaction data
interaction_data = supabase.table("user_post_interactions").select("*").execute()
interactions_df = pd.DataFrame(interaction_data.data)

# Query post comments data
comments_data = supabase.table("posts_comments").select("*").execute()
comments_df = pd.DataFrame(comments_data.data)

# need to assign posts_df['comments'] with lists of all of the comments that have posts_df['po_id'] in comments_df['po_id']
# Group comments by 'po_id' and aggregate them into lists
comments_grouped = comments_df.groupby('po_id')['content'].apply(list).to_dict()

# Map the comments to the corresponding posts in posts_df based on 'po_id'
posts_df['comments'] = posts_df['po_id'].map(comments_grouped)

# Fill any NaN values (posts without comments) with an empty list
posts_df['comments'] = posts_df['comments'].apply(lambda x: x if isinstance(x, list) else [])

# Convert created_at timestamp to hour of day and day of week
interactions_df['hour_of_day'] = pd.to_datetime(interactions_df['interaction_time']).dt.hour 
interactions_df['day_of_week'] = pd.to_datetime(interactions_df['interaction_time']).dt.weekday # 0 = Monday, 6 = Sunday

In [928]:
# Replace None values in passions, project titles, and bio with empty strings
users_df['passions'] = users_df['passions'].apply(lambda x: ' '.join(x) if isinstance(x, list) else '')
users_df['project_titles'] = users_df['project_titles'].apply(lambda x: ' '.join(x) if isinstance(x, list) else '')
users_df['bio'] = users_df['bio'].apply(lambda x: str(x) if x else '')

# Combine all text fields into a single list for fitting the tokenizer
all_text = users_df['passions'].tolist() + users_df['project_titles'].tolist() + users_df['bio'].tolist()

# Fit tokenizer on all text once
tokenizer = Tokenizer(num_words=1000)  # Adjust num_words as needed
tokenizer.fit_on_texts(all_text)

# Convert passions to sequences
passions_sequences = tokenizer.texts_to_sequences(users_df['passions'])
passions_padded = pad_sequences(passions_sequences, maxlen=10)  # Adjust maxlen based on your data

# Convert project titles to sequences
project_titles_sequences = tokenizer.texts_to_sequences(users_df['project_titles'])
project_titles_padded = pad_sequences(project_titles_sequences, maxlen=10)

# Convert bio to sequences
bio_sequences = tokenizer.texts_to_sequences(users_df['bio'])
bio_padded = pad_sequences(bio_sequences, maxlen=50)

# You can now repeat this process for your posts data if needed
all_text = posts_df['title'].tolist() + posts_df['description'].tolist()

# Fit tokenizer on all text once
tokenizer = Tokenizer(num_words=1000)  # Adjust num_words as needed
tokenizer.fit_on_texts(all_text)

# Process title and description as text (posts)
title_sequences = tokenizer.texts_to_sequences(posts_df['title'])
title_padded = pad_sequences(title_sequences, maxlen=10)  # Adjust maxlen for title

description_sequences = tokenizer.texts_to_sequences(posts_df['description'])
description_padded = pad_sequences(description_sequences, maxlen=100)  # Adjust maxlen for description

comments_sequences = tokenizer.texts_to_sequences(posts_df['comments'])
comments_padded = pad_sequences(comments_sequences, maxlen=100)  # Adjust maxlen for description

In [929]:
from shapely.geometry import Point
from shapely.wkt import loads as load_wkt
from shapely import wkb

# Ensure that lat_long is either Point object or default Point
users_df['lat_long'] = users_df['lat_long'].apply(lambda wkb_hex: wkb.loads(bytes.fromhex(wkb_hex)) if pd.notnull(wkb_hex) else Point(1, 1))

# Now extract latitude and longitude from the Point objects
users_df['latitude'] = users_df['lat_long'].apply(lambda loc: loc.y if loc else None)
users_df['longitude'] = users_df['lat_long'].apply(lambda loc: loc.x if loc else None)

# Finally, convert latitude and longitude to H3 index
users_df['h3_location'] = users_df.apply(
    lambda row: h3.latlng_to_cell(row['latitude'], row['longitude'], 8) if pd.notnull(row['latitude']) and pd.notnull(row['longitude']) else None,
    axis=1
)

In [930]:
# Calculate post length (number of characters in the description)
posts_df['post_length'] = posts_df['description'].apply(len)
posts_df['vote_count'] = posts_df['votes'].apply(len)
posts_df['upvote_count'] = posts_df['upvotes']-posts_df['downvotes']
posts_df['view_count'] = posts_df['views'].apply(len)
posts_df['comment_count'] = posts_df['comments'].apply(len)
posts_df['avg_time_viewed'] = posts_df['view_lengths'].apply(lambda x: sum(x)/len(x) if len(x) > 0 else 0)


# Normalize numerical features (vote count, upvote count, view count, post length, comment count, avg_time_viewed)
scaler = StandardScaler()

posts_df[['post_length', 'vote_count', 'upvote_count', 'view_count', 'comment_count', 'avg_time_viewed']] = scaler.fit_transform(
    posts_df[['post_length', 'vote_count', 'upvote_count', 'view_count', 'comment_count', 'avg_time_viewed']]
)

In [931]:
# Count behavioral features (viewed, upvoted, and commented posts) for each user
viewed_posts_count = interactions_df[interactions_df['interaction_type'] == 'view'].groupby('user_id')['post_id'].count()
upvoted_posts_count = interactions_df[interactions_df['interaction_type'] == 'upvote'].groupby('user_id')['post_id'].count()
commented_posts_count = interactions_df[interactions_df['interaction_type'] == 'comment'].groupby('user_id')['post_id'].count()

# Map the interaction counts to the corresponding users in users_df
users_df['viewed_posts'] = users_df['uid'].map(viewed_posts_count)
users_df['upvoted_posts'] = users_df['uid'].map(upvoted_posts_count)
users_df['commented_posts'] = users_df['uid'].map(commented_posts_count)

# Fill NaN values with 0 for users with no interactions
users_df[['viewed_posts', 'upvoted_posts', 'commented_posts']] = users_df[['viewed_posts', 'upvoted_posts', 'commented_posts']].fillna(0)

In [932]:
import random

def generate_negative_samples(user_id, all_post_ids, positive_post_ids, num_negatives=5):
    # Get the set of post IDs the user has NOT interacted with
    negative_post_ids = list(set(all_post_ids) - set(positive_post_ids))
    
    # Randomly sample negative examples
    sampled_negatives = random.sample(negative_post_ids, min(num_negatives, len(negative_post_ids)))
    return sampled_negatives

# Generate the dataset with positive and negative samples
all_post_ids = posts_df['po_id'].tolist()
interaction_dataset_with_negatives = []

for user_id, group in interactions_df.groupby('user_id'):
    # Collect all post IDs the user has interacted with
    positive_post_ids = group['post_id'].tolist()
    
    # Generate negative samples for the user
    negative_samples = generate_negative_samples(user_id, all_post_ids, positive_post_ids)
    
    # Add positive interactions to the dataset
    for _, row in group.iterrows():
        interaction_dataset_with_negatives.append({
            'user_id': row['user_id'],
            'post_id': row['post_id'],
            'interaction_type': row['interaction_type'],  # "view", "comment", "upvote"
            'view_duration_secs': row['view_duration_secs'],
            'comment_length': row['comment_length'],
            'hour_of_day': row['hour_of_day'], 
            'day_of_week': row['day_of_week']
        })
    
    # Add negative samples to the dataset
    for post_id in negative_samples:
        interaction_dataset_with_negatives.append({
            'user_id': user_id,
            'post_id': post_id,
            'interaction_type': 'negative',  # Label as "negative"
            'view_duration_secs': 0,  # No view duration for negative samples
            'comment_length': 0,  # No comment length for negative samples
            'hour_of_day': None, # No hour_of_day for negative samples 
            'day_of_week': None # No day_of_week for negative samples
        })

In [933]:
# Extract additional interaction features such as view duration and comment length
interaction_features = interactions_df.copy()

# Calculate comment length (assuming 'comment_text' contains the comment content)
interaction_features['comment_length'] = interaction_features['comment_length'].apply(lambda x: x if pd.notnull(x) else 0)

# Fill NaN values with 0 for interaction-specific features
interaction_features[['view_duration_secs', 'comment_length']] = interaction_features[['view_duration_secs', 'comment_length']].fillna(0)

In [935]:
# Convert DataFrames to TensorFlow Datasets for users
user_dataset = tf.data.Dataset.from_tensor_slices({
    "user_id": users_df["uid"].values,
    "passions": passions_padded,  
    "project_titles": project_titles_padded,  # Add project titles
    "bio": bio_padded,  # Add bio
    "h3_location": users_df["h3_location"].values,  
    "created_at": users_df["created_at"].values,  
    "viewed_posts": users_df["viewed_posts"].values,  # Behavioral data
    "upvoted_posts": users_df["upvoted_posts"].values,
    "commented_posts": users_df["commented_posts"].values,
})

# Convert DataFrames to TensorFlow Datasets for posts
post_dataset = tf.data.Dataset.from_tensor_slices({
    "post_id": posts_df["po_id"].values,
    #"title": posts_df["title"].values,
    #"description": posts_df["description"].values,
    "title": title_padded,
    "description": description_padded,
    "post_length": posts_df["post_length"].values,  # Add post length
    "vote_count": posts_df["vote_count"].values,
    "upvote_count": posts_df["upvote_count"].values,
    "view_count": posts_df["view_count"].values,
    "comment_count": posts_df["comment_count"].values,  # Add comment count
    "comments": comments_padded,  # Add comments
    "avg_time_viewed": posts_df["avg_time_viewed"].values,  # Add avg time viewed
})

# Convert the interaction dataset (with negative samples) into TensorFlow Dataset
interaction_df = pd.DataFrame(interaction_dataset_with_negatives)
interaction_dataset = tf.data.Dataset.from_tensor_slices({
    "user_id": interaction_df["user_id"].values,
    "post_id": interaction_df["post_id"].values,
    "interaction_type": interaction_df["interaction_type"].values,#view,comment,or upvote
    "view_duration_secs": interaction_df["view_duration_secs"].values, #error is caused by this line even though there are no nonetypes in it
    "comment_length": interaction_df["comment_length"].values,
    "hour_of_day": interaction_df["hour_of_day"].values, # Add hour of day 
    "day_of_week": interaction_df["day_of_week"].values, # Add day of week
})
#list(interaction_dataset.as_numpy_iterator())

In [936]:
class UserTower(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.passion_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)
        self.project_title_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)
        self.bio_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)

        # Embeddings for hour_of_day and day_of_week
        self.hour_of_day_embedding = tf.keras.layers.Embedding(input_dim=24, output_dim=8)  # 24 hours in a day
        self.day_of_week_embedding = tf.keras.layers.Embedding(input_dim=7, output_dim=8)   # 7 days in a week
        self.created_at_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)   # 7 days in a week

        # Behavioral data layers
        self.behavioral_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation="relu"),
            tf.keras.layers.Dense(16)
        ])
        
        self.location_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(32, activation="relu"),
            tf.keras.layers.Dense(16)
        ])
        
        self.final_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(32)
        ])

    def build(self, input_shape):
        # Build embeddings and numerical_dense based on individual input features
        self.passion_embedding.build(input_shape['passions'])
        self.project_title_embedding.build(input_shape['project_titles'])
        self.bio_embedding.build(input_shape['bio'])

        # Create an input shape for the concatenated numerical features (since these are individually expanded)
        behavioral_input_shape = (input_shape['viewed_posts'][0], 3)  # Assuming 6 numerical features
        self.behavioral_dense.build(behavioral_input_shape)

        location_input_shape = (input_shape['h3_location'][0], 1)  # Assuming 6 numerical features
        self.location_dense.build(location_input_shape)

        super(UserTower, self).build(input_shape)

    def call(self, inputs):
        passion_embedding = self.passion_embedding(inputs["passions"])
        project_title_embedding = self.project_title_embedding(inputs["project_titles"])
        bio_embedding = self.bio_embedding(inputs["bio"])
    
	    # Embed hour_of_day and day_of_week 
        #hour_of_day_embedding = self.hour_of_day_embedding(inputs["hour_of_day"]) 
        #day_of_week_embedding = self.day_of_week_embedding(inputs["day_of_week"])
        created_at_embedding = self.created_at_embedding(inputs["created_at"]) 
        created_at_embedding = tf.expand_dims(created_at_embedding, axis=1)  # Expand to Shape: [?, 1, 64]
        
        # Combine embeddings for behavioral features
        behavioral_features = tf.concat([
            tf.expand_dims(inputs["viewed_posts"], axis=1), 
            tf.expand_dims(inputs["upvoted_posts"], axis=1), 
            tf.expand_dims(inputs["commented_posts"], axis=1)
        ], axis=1)
        behavioral_embedding = self.behavioral_dense(behavioral_features)  # Shape: [?, 16]
        behavioral_embedding = tf.keras.layers.Dense(64)(behavioral_embedding)  # Project to Shape: [?, 64]
        behavioral_embedding = tf.expand_dims(behavioral_embedding, axis=1)  # Expand to Shape: [?, 1, 64]
    
        # Embed location
        location_embedding = self.location_dense(tf.expand_dims(inputs["h3_location"], -1))  # Shape: [?, 16]
        location_embedding = tf.keras.layers.Dense(64)(location_embedding)  # Project to Shape: [?, 64]
        location_embedding = tf.expand_dims(location_embedding, axis=1)  # Expand to Shape: [?, 1, 64]

        combined = tf.concat([passion_embedding, project_title_embedding, bio_embedding, behavioral_embedding, location_embedding, created_at_embedding], axis=1)
        
        # A dense layer after concatenation to map features to a unified space
        combined_dense = tf.keras.layers.Dense(128, activation="relu")(combined)
        
        output = self.final_dense(combined_dense)

In [937]:
class PostTower(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.title_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)
        self.description_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)
        self.comment_embedding = tf.keras.layers.Embedding(input_dim=5000, output_dim=64)  # Embedding for comments

        # Process numerical features (including new post length, comment count, etc.)
        self.numerical_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation="relu"),  # Project numerical features to 64 dimensions
            tf.keras.layers.Dense(64)  # Ensure it matches the embedding dimensions
        ])

        self.final_dense = tf.keras.Sequential([
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(32)
        ])

    def aggregate_comments(self, post_text, comments):
        post_embedding = self.description_embedding(post_text)
    
        # Use tf.map_fn to apply the embedding function over the comments tensor
        comment_embeddings = tf.map_fn(self.comment_embedding, comments, fn_output_signature=tf.float32)
        
        post_weight = 0.7
        comment_weight = 0.3
        
        # Combine the post and comment embeddings (using weights)
        combined_embedding = post_weight * post_embedding + comment_weight * tf.reduce_sum(comment_embeddings, axis=0)
        
        return combined_embedding    

    def build(self, input_shape):
        # Build embeddings and numerical_dense based on individual input features
        self.title_embedding.build(input_shape['title'])
        self.description_embedding.build(input_shape['description'])

        # Create an input shape for the concatenated numerical features (since these are individually expanded)
        numerical_input_shape = (input_shape['post_length'][0], 6)  # Assuming 6 numerical features
        self.numerical_dense.build(numerical_input_shape)

        super(PostTower, self).build(input_shape)

    def call(self, inputs):
        # Embed the title and description
        title_embedding = self.title_embedding(inputs["title"])  # Shape: (batch_size, None, 10, 64)
        description_embedding = self.description_embedding(inputs["description"])  # Shape: (batch_size, None, 100, 64)
        
        # Embed the post content and comments (ensure rank is consistent)
        combined_post_embedding = self.aggregate_comments(inputs["description"], inputs["comments"])  # Shape: (batch_size, 64)

        numerical_features = tf.concat([
            tf.expand_dims(inputs["post_length"], axis=1),
            tf.expand_dims(inputs["vote_count"], axis=1),
            tf.expand_dims(inputs["upvote_count"], axis=1),
            tf.expand_dims(inputs["view_count"], axis=1),
            tf.expand_dims(inputs["comment_count"], axis=1),
            tf.expand_dims(inputs["avg_time_viewed"], axis=1)
        ], axis=1)

        
        # Project numerical features to match the embedding dimensions (64)
        numerical_embedding = self.numerical_dense(numerical_features)  # Shape: (batch_size, num_numerical_features, 64)
        # Expand numerical_embedding to match the rank of the other tensors
        numerical_embedding = tf.expand_dims(numerical_embedding, axis=1)  # Shape: [batch_size, 1, 64]


        combined = tf.concat([title_embedding, combined_post_embedding, numerical_embedding], axis=1)
        
        return self.final_dense(combined)

In [977]:
import tensorflow_recommenders as tfrs

class TwoTowerModel(tfrs.models.Model):
    def __init__(self, user_model, post_model):
        super().__init__()
        self.user_model = user_model
        self.post_model = post_model
        self.user_dataset = user_dataset  # Store the user dataset
        self.post_dataset = post_dataset  # Store the post dataset

        self.candidate_index = tfrs.layers.factorized_top_k.BruteForce(post_model)
        self.task = tfrs.tasks.Retrieval(metrics=tfrs.metrics.FactorizedTopK(candidates=self.candidate_index))
        
        '''
        # Map post_dataset to post embeddings for FactorizedTopK metric
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=post_dataset.batch(1).map(lambda post: (
                    post["post_id"],
                    post_model({
                        "title": post["title"],
                        "description": post["description"],
                        "vote_count": post["vote_count"],
                        "upvote_count": post["upvote_count"],
                        "view_count": post["view_count"],
                        "comments": post["comments"],
                        "post_length": post["post_length"],
                        "comment_count": post["comment_count"],
                        "avg_time_viewed": post["avg_time_viewed"]
                    })
                ))
            )
        )
        '''

    def get_user_features(self, user_ids):
        # This function retrieves the corresponding user features (passions, h3_location) based on user_id
        user_features = self.user_dataset.filter(lambda user: tf.reduce_any(user["user_id"] == user_ids))
        
        # Use `get_single_element` to retrieve tensors from the dataset
        user_features = user_features.batch(1)  # Ensure batching
        user_tensors = tf.compat.v1.data.experimental.get_single_element(user_features)

        user_passions = user_tensors["passions"]
        user_h3_location = user_tensors["h3_location"]
        user_project_titles = user_tensors["project_titles"]
        user_bio = user_tensors["bio"]
        user_created_at = user_tensors["created_at"]
        user_viewed_posts = user_tensors["viewed_posts"]
        user_upvoted_posts = user_tensors["upvoted_posts"]
        user_commented_posts = user_tensors["commented_posts"]
        
        return user_passions, user_h3_location, user_project_titles, user_bio, user_created_at, user_viewed_posts, user_upvoted_posts, user_commented_posts

    def get_post_features(self, post_ids):
        # This function retrieves the corresponding post features based on post_id
        post_features = self.post_dataset.filter(lambda post: tf.reduce_any(post["post_id"] == post_ids))
        
        # Use `get_single_element` to retrieve tensors from the dataset
        post_features = post_features.batch(1)  # Ensure batching
        post_tensors = tf.compat.v1.data.experimental.get_single_element(post_features)

        post_title = post_tensors["title"]
        post_description = post_tensors["description"]
        post_vote_count = post_tensors["vote_count"]
        post_upvote_count = post_tensors["upvote_count"]
        post_view_count = post_tensors["view_count"]
        post_comments = post_tensors["comments"]
        post_length = post_tensors["post_length"]
        post_comment_count = post_tensors["comment_count"]
        post_avg_time_viewed = post_tensors["avg_time_viewed"]
        
        return post_title, post_description, post_vote_count, post_upvote_count, post_view_count, post_comments, post_length, post_comment_count, post_avg_time_viewed
        

    def compute_loss(self, features, training=False):

        # Extract IDs from interaction_dataset
        user_ids = features["user_id"]
        post_ids = features["post_id"]

        # Get user/post-specific features using the ids
        user_passions, user_h3_location, user_project_titles, user_bio, user_created_at, user_viewed_posts, user_upvoted_posts, user_commented_posts = self.get_user_features(user_ids)
        post_title, post_description, post_vote_count, post_upvote_count, post_view_count, post_comments, post_length, post_comment_count, post_avg_time_viewed = self.get_post_features(post_ids)
        
        # Compute user embeddings
        user_embeddings = self.user_model({
            "passions": user_passions,
            "h3_location": user_h3_location,
            "project_titles": user_project_titles,
            "bio": user_bio,
            "created_at": user_created_at,
            "viewed_posts": user_viewed_posts,
            "upvoted_posts": user_upvoted_posts,
            "commented_posts": user_commented_posts,
        })

        # Compute post embeddings
        post_embeddings = self.post_model({
            "title": post_title,
            "description": post_description,
            "vote_count": post_vote_count,
            "upvote_count": post_upvote_count,
            "view_count": post_view_count,
            "comments": post_comments,
            "post_length": post_length,
            "comment_count": post_comment_count,
            "avg_time_viewed": post_avg_time_viewed
        })

        # Define weights for each interaction type
        interaction_weights = {
            "view": 0.1,
            "comment": 0.5,
            "upvote": 0.4
        }

        # Apply the weight based on the interaction type
        interaction_type = features['interaction_type']
        interaction_weight = tf.where(
            interaction_type == 'view', interaction_weights['view'],
            tf.where(interaction_type == 'comment', interaction_weights['comment'], interaction_weights['upvote'])
        )

        # Positive interaction mask
        positive_interaction_mask = tf.cast(
            tf.logical_or(tf.logical_or(interaction_type == 'view', interaction_type == 'comment'),
                          interaction_type == 'upvote'),
            dtype=tf.float32
        )

        # Negative interaction mask
        negative_interaction_mask = tf.cast(interaction_type == 'negative', dtype=tf.float32)

        # Compute positive and negative scores
        positive_scores = positive_interaction_mask * tf.reduce_sum(user_embeddings * post_embeddings, axis=1) * interaction_weight
        negative_scores = negative_interaction_mask * tf.reduce_sum(user_embeddings * post_embeddings, axis=1)

        # Loss function: Maximize positive scores, minimize negative scores
        loss = self.task(user_embeddings, post_embeddings)
        return loss - tf.reduce_mean(positive_scores) + tf.reduce_mean(negative_scores)


In [978]:
# Normalize numerical features
normalized_view_count = tf.keras.layers.BatchNormalization()(tf.expand_dims(posts_df["view_count"], -1))
normalized_vote_count = tf.keras.layers.BatchNormalization()(tf.expand_dims(posts_df["vote_count"], -1))

In [979]:
# Split the dataset for training and validation
train_dataset = interaction_dataset.take(36)
test_dataset = interaction_dataset.skip(36)

user_model = UserTower()
post_model = PostTower()
model = TwoTowerModel(user_model, post_model)

model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Train the model
cached_train = train_dataset.shuffle(10000).batch(1024).cache()
model.fit(cached_train, epochs=3)

Epoch 1/3
Tensor("DatasetToSingleElement:4", shape=(None, 10), dtype=int32) Tensor("DatasetToSingleElement:3", shape=(None,), dtype=string) Tensor("DatasetToSingleElement:5", shape=(None, 10), dtype=int32) Tensor("DatasetToSingleElement:0", shape=(None, 50), dtype=int32) Tensor("DatasetToSingleElement:2", shape=(None,), dtype=string) Tensor("DatasetToSingleElement:8", shape=(None,), dtype=float64) Tensor("DatasetToSingleElement:6", shape=(None,), dtype=float64) Tensor("DatasetToSingleElement:1", shape=(None,), dtype=float64)


ValueError: None values not supported.