In [1]:
import pandas as pd
import numpy as np
import os
from utils import (
    get_visitor_interests,
    get_exhibitor_category_info,
    calculate_match_score
)
from tqdm import tqdm

In [2]:
# Load processed data 
base_path = '../../source/'
visitors_df  = pd.read_csv(os.path.join(base_path, 'processed_visitors_answers.csv'))
exhibitors_df = pd.read_csv(os.path.join(base_path, 'processed_exhibitors_categories.csv'))
# Get original exhibitor names map
exhibitor_names = exhibitors_df[['exhibitorid', 'exhibitorName']].drop_duplicates().set_index('exhibitorid')['exhibitorName'].to_dict()

In [3]:
visitors_df

Unnamed: 0,visitor_id,visitor_email,visitor_gender,questionId,questionText,answerId,answerText,answerTypeId
0,67b70a9f2d21f543a1096602,emilija+100_L8gA@bss.mk,F,5c8a78336d41a10da4f730fe,Reason for Attending the Event,5c8a78336d41a10da4f73100,To obtain general information,Answer
1,67b70a9f2d21f543a1096602,emilija+100_L8gA@bss.mk,F,5c8a78336d41a10da4f73227,Which of the following best describes your job...,5c8a78336d41a10da4f73244,Media,Answer
2,67b70a9f2d21f543a1096602,emilija+100_L8gA@bss.mk,F,5c8a78336d41a10da4f73253,Please indicate your company's main area of bu...,5c8a78336d41a10da4f73291,Travel Agent,Answer
3,67b70a9f2d21f543a1096602,emilija+100_L8gA@bss.mk,F,5c8a78336d41a10da4f7336d,What role do you play in the purchasing decisi...,5c8a78336d41a10da4f73371,No influence,Answer
4,67ada1ee197e604dd2722d1b,aleksandar.dimkov+mitt1_n5eA@bss.com.mk,M,5c8a78336d41a10da4f730fe,Reason for Attending the Event,5c8a78336d41a10da4f730ff,To source products and services,Answer
...,...,...,...,...,...,...,...,...
356,uud1nluegdqmisnfzpg75iqg,daniela.p+200_IzcX_QOfF@bss.com.mk,M,5c8a78336d41a10da4f7336d,What role do you play in the purchasing decisi...,5c8a78336d41a10da4f73370,Advisory role,Answer
357,mv0j9yovwhgmtif9l1naciko,daniela.p+200_IzcX_CIHU@bss.com.mk,M,5c8a78336d41a10da4f730fe,Reason for Attending the Event,5c8a78336d41a10da4f73100,To obtain general information,Answer
358,mv0j9yovwhgmtif9l1naciko,daniela.p+200_IzcX_CIHU@bss.com.mk,M,5c8a78336d41a10da4f73227,Which of the following best describes your job...,5c8a78336d41a10da4f7323d,Guided tour services,Answer
359,mv0j9yovwhgmtif9l1naciko,daniela.p+200_IzcX_CIHU@bss.com.mk,M,5c8a78336d41a10da4f73253,Please indicate your company's main area of bu...,5c8a78336d41a10da4f73291,Travel Agent,Answer


In [4]:
exhibitors_df

Unnamed: 0,exhibitorid,exhibitorName,categoryId,categoryName,parentCategory
0,90556,Turkey Travels,52276,1.5 Resort hotel,1. Accomodation providers
1,90556,Turkey Travels,52280,2.1 Inbound tour operator,2. Tour operators
2,90556,Turkey Travels,52281,2.2 Outbound tour operator,2. Tour operators
3,92462,Russian Travel Company,52273,1.2 Apartments / Residential hotel,1. Accomodation providers
4,92462,Russian Travel Company,52283,2.4 Mass market tour operators,2. Tour operators
...,...,...,...,...,...
178,17729,Sunny Travel Journeys,52282,2.3 Receptive / ground operator,2. Tour operators
179,17729,Sunny Travel Journeys,52272,1.1 Hotel / Hotel chain / Inn,1. Accomodation providers
180,17729,Sunny Travel Journeys,52329,10.5 Zoo,10. Museums & parks
181,17729,Sunny Travel Journeys,52366,"17.3 Banking, investments",17. Insurance companies & banking


In [5]:
exhibitor_names

{90556: 'Turkey Travels',
 92462: 'Russian Travel Company',
 92491: 'Indian Travel Company',
 92492: 'Asia Tourism',
 92493: 'SriLanka Adventures',
 92494: 'Exotic Europe Travels',
 92495: 'Luxury Europe Company',
 92496: 'DreamTravel Company ',
 92497: 'Happy Company 2',
 68142: 'Global Tours Voyages',
 27827: 'Exotic Tours Holidays',
 74870: 'Exotic Tours Journeys',
 56923: 'Royal Tours Expeditions',
 80700: 'Exotic Holidays Services',
 46350: 'Prime Travel Journeys',
 55311: 'Sunny Adventures Journeys',
 31331: 'Exotic Adventures Journeys',
 97818: 'Sunny Trips Adventures',
 13033: 'Prime Vacations Expeditions',
 15881: 'Royal Trips Tours',
 10846: 'Royal Adventures Adventures',
 98889: 'Exotic Tours Tours',
 18004: 'Exotic Getaways Journeys',
 44942: 'Royal Holidays Adventures',
 48028: 'Prime Adventures Journeys',
 72153: 'Elite Adventures Holidays',
 32278: 'Exotic Holidays Holidays',
 51839: 'Dream Getaways Expeditions',
 30134: 'Global Holidays Expeditions',
 54579: 'Prime Vaca

In [6]:
exhibitor_categories_map, exhibitor_category_counts = get_exhibitor_category_info(exhibitors_df)

In [7]:
exhibitor_categories_map

{10846: {'agencies',
  'agency',
  'distributor',
  'gastro',
  'gastro tourism',
  'helicopter',
  'helicopter services',
  'machinery',
  'manufacturer',
  'medical',
  'medical products machinery manufacturer distributor',
  'national',
  'national park',
  'park',
  'payment',
  'payment service provider',
  'product',
  'products',
  'provider',
  'service',
  'services',
  'tourism',
  'travel',
  'travel agencies'},
 13033: {'banking',
  'catering',
  'companies',
  'company',
  'equipment',
  'equipment restaurants catering',
  'hostel',
  'hostel motel',
  'insurance',
  'insurance companies banking',
  'motel',
  'radio',
  'radio tv',
  'restaurant',
  'restaurants',
  'tv'},
 15642: {'aggregator',
  'apartment',
  'apartments',
  'apartments residential hotel',
  'chain',
  'engine',
  'event',
  'event organizer',
  'glampings',
  'home',
  'homes',
  'hotel',
  'hotel hotel chain inn',
  'inn',
  'mobile',
  'mobile homes glampings',
  'organizer',
  'residential',
  'sea

In [8]:
# Recommendation parameters
PENALTY_ALPHA = 0.5 # Penalty strength (0=no penalty, higher=stronger)
PENALTY_THRESHOLD = 6 # Number of categories before penalty increases significantly

#Currently considering only these questions since these questions are relevant to exhibitors and their categories.
RELEVANT_QUESTIONS = [
    "Please indicate your company's main area of business",
    "Which of the following best describes your job function?"
]

def recommend_visitors(
    exhibitor_id,
    processed_visitors_df,
    exhibitor_categories_map, 
    exhibitor_category_counts, 
    exhibitor_names_map, 
    penalty_alpha=PENALTY_ALPHA,
    penalty_threshold=PENALTY_THRESHOLD
):
    """
    Recommends top N visitors for a given exhibitor ID.

    Args:
        exhibitor_id (int): The ID of the exhibitor.
        processed_visitors_df (pd.DataFrame): DataFrame with processed visitor answers.
        exhibitor_categories_map (dict): Map of exhibitor IDs to their set of cleaned category names.
        exhibitor_category_counts (dict): Map of exhibitor IDs to their total unique category count.
        exhibitor_names_map (dict): Map of exhibitor IDs to their names.
        top_n (int): Number of recommendations to return.
        penalty_alpha (float): Penalty strength parameter.
        penalty_threshold (int): Threshold for penalty calculation.

    Returns:
        pd.DataFrame: Top N recommended visitors with scores and matching details.
                      Returns empty DataFrame if exhibitor not found or no matches.
    """
    # 1. Get target exhibitor's details
    exhibitor_cats_set = exhibitor_categories_map.get(exhibitor_id)
    total_categories = exhibitor_category_counts.get(exhibitor_id)
    exhibitor_name = exhibitor_names_map[exhibitor_id]

    print(f"\nTarget Exhibitor: {exhibitor_name} (ID: {exhibitor_id})")
    print(f"Categories: {exhibitor_cats_set}")
    print(f"Total Categories (for penalty): {total_categories}")

    scores = []
    unique_visitors = visitors_df['visitor_email'].unique()

    # 2. Iterate through all unique visitors
    print(f"\nCalculating scores against {len(unique_visitors)} unique visitors...")
    for visitor_email in tqdm(unique_visitors, desc="Matching Visitors"):
        # 3. Get visitor interests
        visitor_interests = get_visitor_interests(visitor_email, processed_visitors_df, RELEVANT_QUESTIONS)

        # 4. Calculate score using the target exhibitor's total category count for penalty
        score, num_matches, matched_categories = calculate_match_score(
            visitor_interests,
            exhibitor_cats_set,
            total_categories, # Use the target exhibitor's category count [doesn't matter since the same exhibitor is used]
            penalty_alpha,
            penalty_threshold
        )

        # 5. Store results if score > 0
        if score > 0:
            scores.append({
                'visitor_email': visitor_email,
                'score': score,
                'numMatches': num_matches,
                'matchedCategories': ', '.join(sorted(list(matched_categories)))
            })

    # 6. Sort and return top N
    if not scores:
        print(f"No matching visitors found for exhibitor {exhibitor_name}.")
        return pd.DataFrame(columns=['visitor_email', 'score', 'numMatches', 'matchedCategories'])

    recommendations_df = pd.DataFrame(scores)
    recommendations_df = recommendations_df.sort_values(by='score', ascending=False)

    return recommendations_df

In [9]:
test_exhibitor_id = 92462 



recommendations = recommend_visitors(
    test_exhibitor_id,
    visitors_df,
    exhibitor_categories_map,
    exhibitor_category_counts,
    exhibitor_names
)

recommendations


Target Exhibitor: Russian Travel Company (ID: 92462)
Categories: {'facility', 'mass market tour operators', 'apartment', 'operators', 'engine', 'search', 'market', 'mass', 'independent travel agency', 'search engine travel aggregator', 'tour', 'service', 'apartments residential hotel', 'hotel', 'aggregator', 'agency', 'apartments', 'transport services', 'travel', 'operator', 'independent', 'services', 'transport', 'facilities', 'residential', 'services facilities'}
Total Categories (for penalty): 6

Calculating scores against 100 unique visitors...


Matching Visitors: 100%|██████████| 100/100 [00:00<00:00, 642.77it/s]


Unnamed: 0,visitor_email,score,numMatches,matchedCategories
10,daniela.p+200_IzcX@bss.com.mk,4.0,4,"service, services, tour, travel"
69,daniela.p+200_IzcX_vB6s@bss.com.mk,4.0,4,"service, services, tour, travel"
25,daniela.p+200_IzcX_pGIR@bss.com.mk,4.0,4,"service, services, tour, travel"
24,daniela.p+200_IzcX_T1OA@bss.com.mk,4.0,4,"service, services, tour, travel"
74,daniela.p+200_IzcX_QOfF@bss.com.mk,4.0,4,"service, services, tour, travel"
...,...,...,...,...
68,emilija+3_CmwU_tbhk@bss.com.mk,1.0,1,travel
71,emilija+101_jj3C_mXlu@bss.com.mk,1.0,1,travel
70,tanja+182_Ahsx_Lcd6@bss.mk,1.0,1,travel
73,emilija+2_02Wi_UXK5@bss.com.mk,1.0,1,travel


In [10]:
# Top 7 Recommendations
recommendations.head(7)

Unnamed: 0,visitor_email,score,numMatches,matchedCategories
10,daniela.p+200_IzcX@bss.com.mk,4.0,4,"service, services, tour, travel"
69,daniela.p+200_IzcX_vB6s@bss.com.mk,4.0,4,"service, services, tour, travel"
25,daniela.p+200_IzcX_pGIR@bss.com.mk,4.0,4,"service, services, tour, travel"
24,daniela.p+200_IzcX_T1OA@bss.com.mk,4.0,4,"service, services, tour, travel"
74,daniela.p+200_IzcX_QOfF@bss.com.mk,4.0,4,"service, services, tour, travel"
75,daniela.p+200_IzcX_CIHU@bss.com.mk,4.0,4,"service, services, tour, travel"
21,aleksandar.dimkov+mitt10_V0iB_UmV3@bss.com.mk,2.0,2,"operator, tour"


In [11]:
test_exhibitor_id = 68142 
recommendations = recommend_visitors(
    test_exhibitor_id,
    visitors_df,
    exhibitor_categories_map,
    exhibitor_category_counts,
    exhibitor_names
)

recommendations


Target Exhibitor: Global Tours Voyages (ID: 68142)
Categories: {'agencies', 'museums', 'hostel', 'hostel motel', 'travel agencies', 'event', 'provider', 'mouse', 'service', 'motorhomes', 'office', 'religious tourism', 'business', 'management', 'tourism', 'mice event management', 'regional tourism office', 'mice', 'agency', 'religious', 'nature park', 'regional', 'nature', 'travel', 'parks', 'payment', 'services', 'payment service provider', 'museum', 'park', 'motel', 'services sphere business tourism', 'sphere', 'museums parks'}
Total Categories (for penalty): 8

Calculating scores against 100 unique visitors...


Matching Visitors: 100%|██████████| 100/100 [00:00<00:00, 706.10it/s]


Unnamed: 0,visitor_email,score,numMatches,matchedCategories
66,daniela.p+200_IzcX_QOfF@bss.com.mk,1.5,3,"service, services, travel"
22,daniela.p+200_IzcX_pGIR@bss.com.mk,1.5,3,"service, services, travel"
21,daniela.p+200_IzcX_T1OA@bss.com.mk,1.5,3,"service, services, travel"
9,daniela.p+200_IzcX@bss.com.mk,1.5,3,"service, services, travel"
61,daniela.p+200_IzcX_vB6s@bss.com.mk,1.5,3,"service, services, travel"
...,...,...,...,...
60,emilija+3_CmwU_tbhk@bss.com.mk,0.5,1,travel
63,emilija+101_jj3C_mXlu@bss.com.mk,0.5,1,travel
62,tanja+182_Ahsx_Lcd6@bss.mk,0.5,1,travel
65,emilija+2_02Wi_UXK5@bss.com.mk,0.5,1,travel
