In [3]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer #convert a collection of text documents, into a numerical representation
#based on term frequency, inverse document frequency
from sklearn.metrics.pairwise import linear_kernel  #computes the linear kernel between two arrays
#it is used to calculate the cosine similarity between reward descriptions.

# Load the dataset
data = pd.read_csv('rewards.csv')  

if data.empty:
    print("Error: The reward dataset is empty.")
    exit()

# Create TF-IDF matrix
tfidf = TfidfVectorizer(stop_words='english')  #raw document into matrix
tfidf_matrix = tfidf.fit_transform(data['description'])

# Compute cosine similarity
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommended rewards
def get_recommendations(reward_id, cosine_similarities):
    try:
        # Get the index of the reward
        idx = data[data['reward_id'] == reward_id].index[0]
    except IndexError:
        print("Error: Reward ID not found in the dataset.")
        exit()
    
    # Get the pairwise similarities
    sim_scores = list(enumerate(cosine_similarities[idx]))
    
    # Sort the rewards based on similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 5 similar rewards (excluding itself)
    top_rewards = sim_scores[1:6]
    
    # Get the reward IDs of the top recommendations
    recommended_rewards = [data.iloc[i[0]]['reward_id'] for i in top_rewards]
    
    return recommended_rewards

# Example usage
reward_id = 3  # ID of the reward for which you want recommendations
recommendations = get_recommendations(reward_id, cosine_sim)
print(recommendations)


[10, 1, 2, 4, 5]
