# Freedom Ticketon Hackathon - Enhanced Recommendation System

This notebook implements a recommendation system for the Freedom Ticketon hackathon that incorporates:
1. User preferences for genres and event types
2. Temporal patterns (day of week preferences)
3. Attendance frequency patterns
4. City-based preferences
5. Popularity fallbacks

## 1. Import Libraries

In [1]:
import pandas as pd
import numpy as np
from collections import Counter, defaultdict
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import sys  # For flushing stdout

try:
    from tqdm.notebook import tqdm
    has_tqdm = True
    print("tqdm imported successfully for progress bars")
except ImportError:
    has_tqdm = False
    print("tqdm not available, will use print statements for progress")

def cell_end():
    print("Cell ended!")
    sys.stdout.flush()
    
cell_end()

tqdm imported successfully for progress bars
Cell ended!


## 2. Define Evaluation Metrics

In [2]:
def apk(actual, predicted, k=10):
    if not actual:
        return 0.0
    if not predicted:
        return 0.0
    predicted = predicted[:k]
    score = 0.0
    num_hits = 0.0
    for i, p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i + 1.0)
    return score / min(len(actual), k)

def mapk(y_true, y_pred, k=10):
    apk_scores = []
    for u in y_true:
        if u in y_pred:
            apk_scores.append(apk(y_true[u], y_pred[u], k))
        else:
            apk_scores.append(0.0)
    if not apk_scores:
        return 0.0
    return np.mean(apk_scores)

cell_end()

Cell ended!


## 3. Load Data

In [3]:
print("Loading data...")
train_test = pd.read_csv('train_test.csv')
print(f"Loaded train_test.csv - Shape: {train_test.shape}")

events_description = pd.read_csv('events_description.csv')
print(f"Loaded events_description.csv - Shape: {events_description.shape}")

submission = pd.read_csv('submission.csv')
print(f"Loaded submission.csv - Shape: {submission.shape}")

cell_end()

Loading data...
Loaded train_test.csv - Shape: (572032, 10)
Loaded events_description.csv - Shape: (6028, 9)
Loaded submission.csv - Shape: (53825, 2)
Cell ended!


## 4. Process Data

In [4]:
print("Processing data...")
train_test['reservation_time'] = pd.to_datetime(train_test['reservation_time'])
print("Converted timestamps to datetime")

paid_interactions = train_test[train_test['sale_status'] == 'PAID'].copy()
print(f"Filtered to {len(paid_interactions)} PAID interactions")

april_candidates = events_description[events_description['part_dataset'] == 'submission_movies']['item_id'].unique()
march_candidates = events_description[events_description['part_dataset'] == 'test']['item_id'].unique()

print(f"Found {len(april_candidates)} candidate events for April")
print(f"Found {len(march_candidates)} candidate events for March")

cell_end()

Processing data...
Converted timestamps to datetime
Filtered to 464253 PAID interactions
Found 1033 candidate events for April
Found 1023 candidate events for March
Cell ended!


In [5]:
history_interactions = paid_interactions[paid_interactions['part_dataset'] == 'train'].copy()
print(f"History interactions (before March): {len(history_interactions)} rows")

march_true_interactions = paid_interactions[paid_interactions['part_dataset'] == 'test'].copy()
print(f"March interactions: {len(march_true_interactions)} rows")

full_history_interactions = paid_interactions[paid_interactions['part_dataset'].isin(['train', 'test'])].copy()
print(f"Full history: {len(full_history_interactions)} rows")

march_ground_truth = march_true_interactions.groupby('user_id')['item_id'].apply(list).to_dict()
print(f"Created ground truth for {len(march_ground_truth)} users in March")

history_with_details = pd.merge(history_interactions, events_description, on='item_id', how='left')
march_with_details = pd.merge(march_true_interactions, events_description, on='item_id', how='left')
full_history_with_details = pd.merge(full_history_interactions, events_description, on='item_id', how='left')
print("Merged interactions with event details")

cell_end()

History interactions (before March): 358551 rows
March interactions: 105702 rows
Full history: 464253 rows
Created ground truth for 75127 users in March
Merged interactions with event details
Cell ended!


In [6]:
user_city = paid_interactions[['user_id', 'city']].drop_duplicates().set_index('user_id')['city'].to_dict()
print(f"Created city mapping for {len(user_city)} users")

user_gender = paid_interactions[['user_id', 'gender_main']].drop_duplicates().set_index('user_id')['gender_main'].to_dict()
print(f"Created gender mapping for {len(user_gender)} users")

user_age = paid_interactions[['user_id', 'age']].drop_duplicates().set_index('user_id')['age'].to_dict()
print(f"Created age mapping for {len(user_age)} users")

cell_end()

Created city mapping for 106587 users
Created gender mapping for 106587 users
Created age mapping for 106587 users
Cell ended!


## 5. Event and City Mappings

In [7]:
def get_event_city_mappings(df):
    """Extract city information for events from place_name and interactions"""
    event_city = {}
    event_place = {}
    
    for _, row in df.iterrows():
        item_id = row['item_id']
        city = row['city']
        place = row['place_name']
        
        # Store city and place information
        event_city[item_id] = city
        event_place[item_id] = place
    
    return event_city, event_place

event_city, event_place = get_event_city_mappings(paid_interactions)
print(f"Created city mapping for {len(event_city)} events")

event_genre = events_description[['item_id', 'film_genre']].drop_duplicates().set_index('item_id')['film_genre'].to_dict()
print(f"Created genre mapping for {len(event_genre)} events")

event_type = events_description[['item_id', 'film_type']].drop_duplicates().set_index('item_id')['film_type'].to_dict()
print(f"Created type mapping for {len(event_type)} events")

cell_end()

Created city mapping for 3972 events
Created genre mapping for 4597 events
Created type mapping for 4597 events
Cell ended!


In [8]:
def calculate_popularity(interactions_df, candidate_events):
    """Calculate normalized popularity scores for candidate events"""
    event_counts = interactions_df['item_id'].value_counts().to_dict()
    total_interactions = sum(event_counts.values())
    
    if total_interactions > 0:
        popularity_scores = {event_id: event_counts.get(event_id, 0) / total_interactions 
                            for event_id in candidate_events}
    else:
        popularity_scores = {event_id: 0 for event_id in candidate_events}
    
    return popularity_scores

def calculate_city_popularity(interactions_df, candidate_events):
    """Calculate city-specific popularity scores for candidate events"""
    city_event_pop = {}
    
    for city in interactions_df['city'].unique():
        if pd.isna(city):
            continue
            
        city_data = interactions_df[interactions_df['city'] == city]
        city_counts = city_data['item_id'].value_counts().to_dict()
        total = sum(city_counts.values()) if city_counts else 0
        
        if total > 0:
            city_event_pop[city] = {event_id: city_counts.get(event_id, 0) / total 
                                    for event_id in candidate_events}
        else:
            city_event_pop[city] = {event_id: 0 for event_id in candidate_events}
    
    return city_event_pop

print("Calculating popularity scores...")
march_popularity = calculate_popularity(history_interactions, march_candidates)
print(f"Calculated popularity scores for {len(march_popularity)} March events")

april_popularity = calculate_popularity(full_history_interactions, april_candidates)
print(f"Calculated popularity scores for {len(april_popularity)} April events")

march_city_popularity = calculate_city_popularity(history_interactions, march_candidates)
print(f"Calculated city-specific popularity for March events in {len(march_city_popularity)} cities")

april_city_popularity = calculate_city_popularity(full_history_interactions, april_candidates)
print(f"Calculated city-specific popularity for April events in {len(april_city_popularity)} cities")

cell_end()

Calculating popularity scores...
Calculated popularity scores for 1023 March events
Calculated popularity scores for 1033 April events
Calculated city-specific popularity for March events in 4 cities
Calculated city-specific popularity for April events in 4 cities
Cell ended!


## 6. Temporal Pattern Extraction

In [9]:
print("Extracting temporal patterns...")
def extract_user_temporal_patterns(interactions_df):
    """
    Extract day of week preferences and attendance frequency for each user
    """
    print(f"Processing temporal patterns for {len(interactions_df)} interactions...")
    
    interactions_df['day_of_week'] = interactions_df['reservation_time'].dt.day_name()
    interactions_df['hour_of_day'] = interactions_df['reservation_time'].dt.hour
    interactions_df['month'] = interactions_df['reservation_time'].dt.month
    print("Added temporal columns")
    
    user_monthly_frequency = {}
    user_day_preferences = {}
    user_hour_preferences = {}
    
    total_users = len(interactions_df['user_id'].unique())
    print(f"Extracting patterns for {total_users} users...")
    
    for i, user_id in enumerate(interactions_df['user_id'].unique()):
        if i % 100 == 0 or i == total_users - 1:
            progress = (i + 1) / total_users * 100
            print(f"Processing users: {i+1}/{total_users} ({progress:.1f}%)", end="\r")
            sys.stdout.flush()
            
        user_data = interactions_df[interactions_df['user_id'] == user_id]
        
        if len(user_data) > 0:
            min_date = user_data['reservation_time'].min()
            max_date = user_data['reservation_time'].max()
            
            if pd.notna(min_date) and pd.notna(max_date):
                # Calculate months between first and last attendance
                months_active = ((max_date.year - min_date.year) * 12 + 
                                max_date.month - min_date.month + 1)
                
                if months_active > 0:
                    # Events per month
                    user_monthly_frequency[user_id] = len(user_data) / months_active
                else:
                    # All in one month
                    user_monthly_frequency[user_id] = len(user_data)
            else:
                user_monthly_frequency[user_id] = 0
        else:
            user_monthly_frequency[user_id] = 0
        
        # Calculate day of week preferences
        day_counts = user_data['day_of_week'].value_counts()
        total_days = day_counts.sum()
        
        if total_days > 0:
            user_day_preferences[user_id] = {day: count/total_days 
                                           for day, count in day_counts.items()}
        else:
            user_day_preferences[user_id] = {}
        
        hour_counts = user_data['hour_of_day'].value_counts()
        total_hours = hour_counts.sum()
        
        if total_hours > 0:
            user_hour_preferences[user_id] = {hour: count/total_hours 
                                            for hour, count in hour_counts.items()}
        else:
            user_hour_preferences[user_id] = {}
    
    print(f"\nCompleted temporal pattern extraction for {len(user_monthly_frequency)} users")
    return user_monthly_frequency, user_day_preferences, user_hour_preferences

cell_end()

Extracting temporal patterns...
Cell ended!


In [10]:
def extract_event_temporal_patterns(interactions_df, candidate_events):
    """Extract day of week and hour patterns for events"""
    print(f"Extracting temporal patterns for {len(candidate_events)} events...")
    
    if 'day_of_week' not in interactions_df.columns:
        interactions_df['day_of_week'] = interactions_df['reservation_time'].dt.day_name()
    if 'hour_of_day' not in interactions_df.columns:
        interactions_df['hour_of_day'] = interactions_df['reservation_time'].dt.hour
    
    event_day_patterns = {}
    event_hour_patterns = {}
    
    total_events = len(candidate_events)
    for i, item_id in enumerate(candidate_events):
        if i % 100 == 0 or i == total_events - 1:
            progress = (i + 1) / total_events * 100
            print(f"Processing events: {i+1}/{total_events} ({progress:.1f}%)", end="\r")
            sys.stdout.flush()
            
        item_data = interactions_df[interactions_df['item_id'] == item_id]
        
        if not item_data.empty:
            day_counts = item_data['day_of_week'].value_counts()
            total_days = day_counts.sum()
            
            if total_days > 0:
                event_day_patterns[item_id] = {day: count/total_days 
                                             for day, count in day_counts.items()}
            else:
                event_day_patterns[item_id] = {}
            
            hour_counts = item_data['hour_of_day'].value_counts()
            total_hours = hour_counts.sum()
            
            if total_hours > 0:
                event_hour_patterns[item_id] = {hour: count/total_hours 
                                              for hour, count in hour_counts.items()}
            else:
                event_hour_patterns[item_id] = {}
        else:
            event_day_patterns[item_id] = {}
            event_hour_patterns[item_id] = {}
    
    print(f"\nExtracted temporal patterns for {len(event_day_patterns)} events")
    return event_day_patterns, event_hour_patterns

cell_end()

Cell ended!


In [11]:
# Extract temporal patterns
print("Extracting user temporal patterns...")
history_frequency, history_day_prefs, history_hour_prefs = extract_user_temporal_patterns(history_interactions)
print(f"Extracted frequency data for {len(history_frequency)} users from history data")

full_frequency, full_day_prefs, full_hour_prefs = extract_user_temporal_patterns(full_history_interactions)
print(f"Extracted frequency data for {len(full_frequency)} users from full history")

print("Extracting event temporal patterns...")
march_day_patterns, march_hour_patterns = extract_event_temporal_patterns(
    history_interactions, march_candidates)
print(f"Extracted day patterns for {len(march_day_patterns)} March events")

april_day_patterns, april_hour_patterns = extract_event_temporal_patterns(
    full_history_interactions, april_candidates)
print(f"Extracted day patterns for {len(april_day_patterns)} April events")

cell_end()

Extracting user temporal patterns...
Processing temporal patterns for 358551 interactions...
Added temporal columns
Extracting patterns for 102098 users...
Processing users: 102098/102098 (100.0%)
Completed temporal pattern extraction for 102098 users
Extracted frequency data for 102098 users from history data
Processing temporal patterns for 464253 interactions...
Added temporal columns
Extracting patterns for 106587 users...
Processing users: 106587/106587 (100.0%)
Completed temporal pattern extraction for 106587 users
Extracted frequency data for 106587 users from full history
Extracting event temporal patterns...
Extracting temporal patterns for 1023 events...
Processing events: 1023/1023 (100.0%)
Extracted temporal patterns for 1023 events
Extracted day patterns for 1023 March events
Extracting temporal patterns for 1033 events...
Processing events: 1033/1033 (100.0%)
Extracted temporal patterns for 1033 events
Extracted day patterns for 1033 April events
Cell ended!


## 7. User Preference Analysis

In [12]:
print("Analyzing user preferences...")
def get_user_preferences(user_id, interactions_df):
    """Get top genre and type preferences for a user"""
    user_data = interactions_df[interactions_df['user_id'] == user_id]
    
    if user_data.empty:
        return [], []
    
    genre_counts = Counter()
    for _, row in user_data.iterrows():
        item_id = row['item_id']
        if item_id in event_genre and pd.notna(event_genre[item_id]):
            genre_counts[event_genre[item_id]] += 1
    
    type_counts = Counter()
    for _, row in user_data.iterrows():
        item_id = row['item_id']
        if item_id in event_type and pd.notna(event_type[item_id]):
            type_counts[event_type[item_id]] += 1
    
    top_genres = [genre for genre, _ in genre_counts.most_common(3)]
    top_types = [t_type for t_type, _ in type_counts.most_common(2)]
    
    return top_genres, top_types

cell_end()

Analyzing user preferences...
Cell ended!


## 8. Visualization Functions

In [13]:
def plot_day_preferences():
    """Create plot of aggregate day preferences"""
    plt.figure(figsize=(10, 6))
    
    all_days = defaultdict(float)
    for user_days in full_day_prefs.values():
        for day, weight in user_days.items():
            all_days[day] += weight
    
    days_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_series = pd.Series({day: all_days.get(day, 0) for day in days_order})
    day_series = day_series / day_series.sum()  # Normalize
    
    ax = day_series.plot(kind='bar', color='skyblue')
    plt.title('User Day of Week Preferences')
    plt.xlabel('Day of Week')
    plt.ylabel('Normalized Preference')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig('day_preferences.png')
    plt.close()
    return plt

def plot_user_frequency():
    """Plot distribution of user attendance frequency"""
    plt.figure(figsize=(10, 6))
    
    freq_series = pd.Series(full_frequency)
    freq_series = freq_series[freq_series > 0]  # Filter out zeros
    
    plt.hist(freq_series, bins=20, color='green', alpha=0.7)
    plt.title('Distribution of User Event Attendance Frequency')
    plt.xlabel('Events per Month')
    plt.ylabel('Number of Users')
    plt.tight_layout()
    plt.savefig('user_frequency.png')
    plt.close()
    return plt

# Generate plots
try:
    print("Generating visualizations...")
    plot_day_preferences()
    plot_user_frequency()
    print("Created visualizations: day_preferences.png and user_frequency.png")
except Exception as e:
    print(f"Error generating plots: {e}")

cell_end()

Generating visualizations...
Created visualizations: day_preferences.png and user_frequency.png
Cell ended!


## 9. Recommendation Model

In [14]:
print("Building recommendation model...")
def generate_recommendations(target_user, history_data, candidate_events, 
                           popularity_scores, city_popularity,
                           user_frequency, user_day_prefs, 
                           event_day_patterns,
                           debug=False):  # Add debug parameter for tracking
    """
    Generate recommendations for a user based on preferences, 
    frequency patterns, and day of week preferences
    """
    user_city_val = user_city.get(target_user)
    top_genres, top_types = get_user_preferences(target_user, history_data)
    
    monthly_frequency = user_frequency.get(target_user, 0)
    day_preferences = user_day_prefs.get(target_user, {})
    
    # Debug output
    if debug:
        print(f"Debug for user {target_user}:")
        print(f"  City: {user_city_val}")
        print(f"  Top genres: {top_genres}")
        print(f"  Top types: {top_types}")
        print(f"  Monthly frequency: {monthly_frequency:.2f}")
        print(f"  Day preferences: {day_preferences}")
    
    # Initialize scores
    event_scores = {}
    
    for event_id in candidate_events:
        score = 0
        event_city_val = event_city.get(event_id)
        event_genre_val = event_genre.get(event_id)
        event_type_val = event_type.get(event_id)
        event_day_pattern = event_day_patterns.get(event_id, {})
        
        # 1. Primary boost - genre and type match in user's city
        if user_city_val and event_city_val and user_city_val == event_city_val:
            if event_genre_val and event_genre_val in top_genres:
                score += 6
            if event_type_val and event_type_val in top_types:
                score += 4 
        
        # 2. Secondary boost - genre and type match in any city
        else:
            if event_genre_val and event_genre_val in top_genres:
                score += 3
            if event_type_val and event_type_val in top_types:
                score += 2
        
        # 3. Day of week preference boost
        day_match_score = 0
        for day, user_pref in day_preferences.items():
            if day in event_day_pattern:
                day_match_score += user_pref * event_day_pattern[day]
        
        score += day_match_score * 2
        
        # 4. Frequency-based adjustments
        if monthly_frequency > 0:
            if monthly_frequency > 3:  # High frequency users (>3/month)
                score = score * 1.2  # Boost preference matching
                score += popularity_scores.get(event_id, 0) * 0.1
            elif monthly_frequency > 1:  # Medium frequency (1-3/month)
                score += popularity_scores.get(event_id, 0) * 0.3
            else:  # Low frequency (<1/month)
                score += popularity_scores.get(event_id, 0) * 0.7
        else:
            # 5. Cold start - rely on popularity
            if user_city_val and event_city_val and user_city_val == event_city_val:
                city_pop = city_popularity.get(user_city_val, {}).get(event_id, 0)
                score += city_pop * 3
            else:
                score += popularity_scores.get(event_id, 0) * 1.5
        
        event_scores[event_id] = score
    
    sorted_events = sorted(event_scores.items(), key=lambda x: x[1], reverse=True)
    top_events = [event_id for event_id, _ in sorted_events[:10]]
    
    if debug:
        print("  Top 3 recommended events:")
        for event_id, score in sorted_events[:3]:
            print(f"    {event_id}: {score:.4f} (Genre: {event_genre.get(event_id)}, Type: {event_type.get(event_id)})")
    
    return top_events

cell_end()

Building recommendation model...
Cell ended!


## 10. March Validation

In [16]:
print("Validating on March data...")

march_predictions = {}
march_users = march_true_interactions['user_id'].unique()

print(f"Generating recommendations for {len(march_users)} users in March...")

debug_users = march_users[:3]  # Debug first 3 users

total_users = len(march_users)
for i, user in enumerate(march_users):
    if i % 100 == 0 or i == total_users - 1:
        progress = (i + 1) / total_users * 100
        print(f"Progress: {i+1}/{total_users} users processed ({progress:.1f}%)")
        sys.stdout.flush()
        
    try:
        # Enable debug for sample users
        debug_mode = user in debug_users
        
        recommendations = generate_recommendations(
            user,
            history_with_details,
            march_candidates,
            march_popularity,
            march_city_popularity,
            history_frequency,
            history_day_prefs,
            march_day_patterns,
            debug=debug_mode
        )
        march_predictions[user] = recommendations
    except Exception as e:
        print(f"Error for user {user}: {e}")
        march_predictions[user] = []

print(f"Completed generating recommendations for {len(march_predictions)} March users")

print("Calculating MAP@10 score...")
march_map = mapk(march_ground_truth, march_predictions, 10)
print(f"MAP@10 score for March validation: {march_map}")

cell_end()

Validating on March data...
Generating recommendations for 75127 users in March...
Progress: 1/75127 users processed (0.0%)
Debug for user user_45087:
  City: Шымкент
  Top genres: ['комедия', 'комедия/семейный', '\nкомедия']
  Top types: ['film']
  Monthly frequency: 0.30
  Day preferences: {'Wednesday': np.float64(0.3333333333333333), 'Tuesday': np.float64(0.3333333333333333), 'Sunday': np.float64(0.3333333333333333)}
  Top 3 recommended events:
    event_4425: 10.2517 (Genre: комедия, Type: film)
    event_1720: 6.2541 (Genre: комедия, Type: performance)
    event_2548: 6.0000 (Genre: комедия, Type: performance)
Debug for user user_21325:
  City: Астана
  Top genres: ['комедия', 'Драма/комедия']
  Top types: ['film']
  Monthly frequency: 0.83
  Day preferences: {'Saturday': np.float64(0.2), 'Friday': np.float64(0.2), 'Thursday': np.float64(0.2), 'Sunday': np.float64(0.2), 'Monday': np.float64(0.2)}
  Top 3 recommended events:
    event_751: 6.2872 (Genre: комедия, Type: performance)

## 11. April Predictions

In [17]:
print("Generating April predictions...")

april_predictions = {}
submission_users = submission['user_id'].tolist()

print(f"Generating recommendations for {len(submission_users)} submission users...")

debug_users = submission_users[:3]

total_users = len(submission_users)
for i, user in enumerate(submission_users):
    # Print progress every 100 users
    if i % 100 == 0 or i == total_users - 1:
        progress = (i + 1) / total_users * 100
        print(f"Progress: {i+1}/{total_users} users processed ({progress:.1f}%)")
        sys.stdout.flush()
        
    try:
        # Enable debug for sample users
        debug_mode = user in debug_users
        
        recommendations = generate_recommendations(
            user,
            full_history_with_details,
            april_candidates,
            april_popularity,
            april_city_popularity,
            full_frequency,
            full_day_prefs,
            april_day_patterns,
            debug=debug_mode
        )
        april_predictions[user] = recommendations
    except Exception as e:
        print(f"Error for user {user}: {e}")
        april_predictions[user] = []

print(f"Completed generating recommendations for {len(april_predictions)} submission users")
cell_end()

Generating April predictions...
Generating recommendations for 53825 submission users...
Progress: 1/53825 users processed (0.0%)
Debug for user user_99065:
  City: Алматы
  Top genres: ['мюзикл/фэнтези/комедия']
  Top types: ['film']
  Monthly frequency: 1.00
  Day preferences: {'Monday': np.float64(1.0)}
  Top 3 recommended events:
    event_3852: 4.2110 (Genre: драма, Type: film)
    event_3719: 4.1985 (Genre: комедия, Type: film)
    event_2392: 4.1973 (Genre: фантастика/боевик/драма, Type: film)
Debug for user user_72897:
  City: Алматы
  Top genres: ['балет']
  Top types: ['performance']
  Monthly frequency: 1.00
  Day preferences: {'Wednesday': np.float64(1.0)}
  Top 3 recommended events:
    event_825: 10.4271 (Genre: балет, Type: performance)
    event_682: 10.3899 (Genre: балет, Type: performance)
    event_3929: 10.3896 (Genre: балет, Type: performance)
Debug for user user_88085:
  City: Астана
  Top genres: ['сказка']
  Top types: ['performance']
  Monthly frequency: 1.00
 

## 12. Create Submission File

In [18]:
print("Creating submission file...")
result = []
for user in submission_users:
    items = april_predictions.get(user, [])
    item_str = ','.join(items)
    result.append({'user_id': user, 'item_ids': item_str})

result_df = pd.DataFrame(result)

result_df.to_csv('ticketon_result.csv', index=False, quoting=1)
print("Submission file created: ticketon_result.csv")

print("\nFirst 5 rows of submission file:")
print(result_df.head())
cell_end()

Creating submission file...
Submission file created: ticketon_result.csv

First 5 rows of submission file:
       user_id                                           item_ids
0   user_99065  event_3852,event_3719,event_2392,event_4457,ev...
1   user_72897  event_825,event_682,event_3929,event_183,event...
2   user_88085  event_1855,event_1518,event_920,event_57,event...
3  user_108651  event_4425,event_254,event_2548,event_3719,eve...
4   user_28053  event_3928,event_3939,event_2885,event_3852,ev...
Cell ended!


## 13. Model Summary

In [19]:
print("\nRecommendation Model Summary:")
print("-" * 50)
print("This model incorporates several key behavioral patterns:")
print("1. User preferences for event genres and types")
print("2. Day-of-week attendance patterns")
print("3. User's attendance frequency (events per month)")
print("4. City-based preference weighting")
print("5. Popularity fallbacks for cold-start users")
print("-" * 50)
print(f"MAP@10 on March validation data: {march_map}")
print("Full results saved to: ticketon_result.csv")
print("\nProgram execution completed successfully!")
cell_end()


Recommendation Model Summary:
--------------------------------------------------
This model incorporates several key behavioral patterns:
1. User preferences for event genres and types
2. Day-of-week attendance patterns
3. User's attendance frequency (events per month)
4. City-based preference weighting
5. Popularity fallbacks for cold-start users
--------------------------------------------------
MAP@10 on March validation data: 0.04079090881919163
Full results saved to: ticketon_result.csv

Program execution completed successfully!
Cell ended!
