In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [12]:
# Load the dataset
dataset_path = '../Project/dataset_travel_agent.csv'
df = pd.read_csv(dataset_path)

In [13]:
# Preprocessing: Fill missing values
df['rating'] = df['rating'].replace('no review yet', '0').astype(float)
df['category'] = df['category'].fillna('')
df['area'] = df['area'].fillna('')

In [14]:
# Combine features to create a content string
df['content'] = df['category'] + ' ' + df['area'] + ' Rating:' + df['rating'].astype(str)

In [15]:
# Vectorize the content using TF-IDF
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['content'])


In [16]:
# Compute cosine similarity between all travel agents
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [17]:
# Function to recommend travel agents
def recommend_travel_agents(agent_name, df, cosine_sim, top_n=5):
    """Recommends travel agents similar to the given agent name.

    Args:
        agent_name (str): Name of the travel agent to base recommendations on.
        df (DataFrame): The dataset containing travel agent details.
        cosine_sim (ndarray): Precomputed cosine similarity matrix.
        top_n (int): Number of recommendations to return.

    Returns:
        DataFrame: Top recommended travel agents with their details.
    """
    # Find the index of the travel agent
    indices = pd.Series(df.index, index=df['name']).drop_duplicates()
    
    if agent_name not in indices:
        return f"Travel agent '{agent_name}' not found."
    
    idx = indices[agent_name]

    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort travel agents by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the most similar travel agents
    sim_indices = [i[0] for i in sim_scores[1:top_n+1]]

    # Return the top recommended travel agents
    return df.iloc[sim_indices][['name', 'category', 'area', 'rating']]

In [19]:
# Example: Recommend travel agents similar to 'AG HOLIDAY SDN BHD'
if __name__ == "__main__":
    agent_name = 'AG HOLIDAY SDN BHD'
    recommendations = recommend_travel_agents(agent_name, df, cosine_sim)
    
    if isinstance(recommendations, str):
        print(recommendations)
    else:
        print("Top Recommendations:")
        print(recommendations)

Travel agent 'ABA' not found.
