### Deep Structured Semantic Model - Prediction Evaluation
This notebook is used to evaluate the prediction performance of the DSSM model. It is different from the real time recommendation. 

#### Pre-requisites
- The model is trained and the index is created in the notebooks `DSSM Model.ipynb` and `DSSM Index (Faiss).ipynb`.
- All required files are saved in the `Saved_Triplet_Hinge_Loss` folder.

In [1]:
from general_program import *
import warnings
warnings.filterwarnings("ignore")

Loaded 78059 rows from business_details table.
Loaded 360656 rows from business_categories table.
Loaded 980418 rows from review table.
Loaded 229447 rows from user table.
Loaded 173085 rows from tip table.


In [2]:
save_folder_path='Saved_Triplet_Hinge_Loss/'

user_model, item_model, user_id_encoder, business_id_encoder, categories_encoder, business_geohash_encoder, user_scaler, business_scaler = load_saved_models(save_folder_path=save_folder_path)





In [3]:
user_df, business_df, review_df, user_continuous_features_scaled, business_continuous_features_scaled, num_users, num_businesses, num_categories, num_geohashes = prepare_data(user_df, business_df, review_df, categories_df, user_id_encoder, business_id_encoder, categories_encoder, business_geohash_encoder, user_scaler, business_scaler, use_stage='test')

# check if business_category_map
business_category_map = business_df.set_index('business_id_encoded')['category_encoded']

# check if business_geohash_map
business_geohash_map = business_df.set_index('business_id_encoded')['geohash_encoded']

In [4]:
user_continuous_features_scaled = user_continuous_features_scaled.set_index(user_df['user_id_encoded'].values)
business_continuous_features_scaled = business_continuous_features_scaled.set_index(business_df['business_id_encoded'].values)

In [5]:
# Split review_df into train and test sets
train_data, test_data = train_test_split(review_df, test_size=0.2, random_state=42)

# Split the test set into positive and negative samples
positive_reviews = test_data[test_data['stars'] >= 4]
negative_reviews = test_data[test_data['stars'] < 4]

print(f"Number of positive reviews: {len(positive_reviews)}")
print(f"Number of negative reviews: {len(negative_reviews)}")
print(f"Total number of reviews: {len(test_data)}")
print(f"Ratio of positive to negative reviews: {len(positive_reviews) / len(negative_reviews):.2f}")

Number of positive reviews: 136214
Number of negative reviews: 59870
Total number of reviews: 196084
Ratio of positive to negative reviews: 2.28


In [6]:
def balance_test_data(positive_reviews, negative_reviews):
    # down-sample the positive reviews to balance the dataset
    positive_reviews_downsampled = positive_reviews.sample(n=len(negative_reviews), random_state=42)

    # combine the down-sampled positive reviews with the negative reviews
    balanced_test_data = pd.concat([positive_reviews_downsampled, negative_reviews], ignore_index=True)

    # shuffle the balanced test data
    balanced_test_data = balanced_test_data.sample(frac=1, random_state=42).reset_index(drop=True)

    # new statistics for the balanced test data
    positive_reviews = balanced_test_data[balanced_test_data['stars'] >= 4]
    negative_reviews = balanced_test_data[balanced_test_data['stars'] < 4]

    print(f"Number of positive reviews: {len(positive_reviews)}")
    print(f"Number of negative reviews: {len(negative_reviews)}")
    print(f"Total number of reviews: {len(balanced_test_data)}")
    print(f"Ratio of positive to negative reviews: {len(positive_reviews) / len(negative_reviews):.2f}")
    return balanced_test_data

In [7]:
# balance the test data, comment this line to use the original test data
test_data = balance_test_data(positive_reviews, negative_reviews)

# group the test data by user_id and get the business_id
test_data_grouped = test_data.groupby('user_id')['business_id'].apply(list).reset_index()

Number of positive reviews: 59870
Number of negative reviews: 59870
Total number of reviews: 119740
Ratio of positive to negative reviews: 1.00


In [8]:
target_users = 1000
# get until the number of target_users as test data
test_data_grouped = test_data_grouped[:target_users]

# update the test data with the grouped test data
test_data = test_data[test_data['user_id'].isin(test_data_grouped['user_id'])]

In [11]:
# Step 2: Prepare inputs for user and business embeddings
# Extract user features
test_user_ids = test_data['user_id_encoded'].values
test_user_cont_features = user_scaler.transform(user_continuous_features_scaled.loc[test_user_ids].values)

# Extract business features
test_business_ids = test_data['business_id_encoded'].values
test_business_cont_features = business_scaler.transform(business_continuous_features_scaled.loc[test_business_ids].values)
test_business_categories = business_category_map.loc[test_business_ids].apply(
    lambda x: x if isinstance(x, list) else []
)
test_business_category_padded = pad_sequences(test_business_categories.tolist(), maxlen=5, padding="post")

test_business_geohashes = business_geohash_map.take(test_business_ids).values

In [12]:

# Step 3: Predict embeddings using the loaded models
test_user_embeddings = user_model.predict([test_user_ids, test_user_cont_features])

test_business_embeddings = item_model.predict([test_business_ids, 
                                           test_business_category_padded, 
                                            # test_business_geohashes,
                                            test_business_cont_features])

# Step 4: Compute cosine similarity for each user-business pair
def compute_cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Create a list of cosine similarities for each record in test_data
test_data['predicted_similarity'] = [
    compute_cosine_similarity(test_user_embeddings[i], test_business_embeddings[i])
    for i in range(len(test_data))
]

# Step 5: Set the predicted_label based on similarity score
test_data['predicted_label'] = (test_data['predicted_similarity'] >= 0).astype(int)

[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step  
[1m69/69[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [13]:
# Extract ground truth labels and predicted scores
y_true = test_data['label'].values
y_pred_scores = test_data['predicted_similarity'].values  # Cosine similarity scores

# Convert similarity scores into binary predictions (threshold = 0 for cosine similarity)
y_pred_labels = (y_pred_scores >= 0).astype(int)

# Compute confusion matrix elements
true_positive = np.sum((y_true == 1) & (y_pred_labels == 1))
true_negative = np.sum((y_true == 0) & (y_pred_labels == 0))
false_positive = np.sum((y_true == 0) & (y_pred_labels == 1))
false_negative = np.sum((y_true == 1) & (y_pred_labels == 0))

# Compute dataset statistics
total_positive = np.sum(y_true)
total = len(y_true)
total_negative = total - total_positive

# Compute evaluation metrics
accuracy = (true_positive + true_negative) / total if total > 0 else 0
precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
recall = true_positive / total_positive if total_positive > 0 else 0
f1_score = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0

# Weighted Fβ-score
beta = 2
f_beta = (1 + beta**2) * precision * recall / (beta**2 * precision + recall) if (beta**2 * precision + recall) > 0 else 0

# Compute dataset statistics
background_stats = pd.DataFrame({
    'Total Positive': [total_positive],
    'Total Negative': [total_negative],
    'Total': [total],
    'Ratio': [total_positive / total if total > 0 else 0],
})

print("Testing Data Statistics")
display(background_stats)

# Evaluation Metrics
evaluation_metric = pd.DataFrame({
    'Accuracy': [accuracy],
    'Precision': [precision],
    'Recall': [recall],
    'F1 Score': [f1_score],
    'F-beta Score': [f_beta],
    # 'Mean Reciprocal Rank': [mean_reciprocal_rank],
}).apply(lambda x: round(x, 4))

print("Evaluation Metrics")
display(evaluation_metric)

# Confusion Matrix
confusion_matrix = pd.DataFrame({
    'True Positive': [true_positive],
    'True Negative': [true_negative],
    'False Positive': [false_positive],
    'False Negative': [false_negative]
})

print("Confusion Matrix")
display(confusion_matrix)


Testing Data Statistics


Unnamed: 0,Total Positive,Total Negative,Total,Ratio
0,1045,1143,2188,0.477605


Evaluation Metrics


Unnamed: 0,Accuracy,Precision,Recall,F1 Score,F-beta Score
0,0.5672,0.5456,0.5608,0.5531,0.5577


Confusion Matrix


Unnamed: 0,True Positive,True Negative,False Positive,False Negative
0,586,655,488,459
