# 🎵 Event Recommender – Content‑Based Filtering Demo
A self‑contained walkthrough that loads **`merged_events_clean.csv`**, builds a TF‑IDF + metadata feature matrix, fits a cosine‑similarity Nearest‑Neighbors model, and exposes a helper `recommend()` function.

*Python 3 · Pandas · Scikit‑learn · Joblib*

## 0. Setup & Library Imports

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import hstack
import joblib
print('Libraries imported!')

## 1. Load & Inspect the Data

In [None]:
CSV_PATH = Path('merged_events_clean - merged_events_clean.csv')  # adjust if needed
df = pd.read_csv(CSV_PATH)
print(df.shape)
df.head()

## 2. Basic Cleaning

In [None]:
# Convert date/time
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['time'] = pd.to_datetime(df['time'], format='%H:%M', errors='coerce').dt.time

# Price columns
df['price_adv']  = pd.to_numeric(df['in advance'], errors='coerce')
df['price_door'] = pd.to_numeric(df['cover'], errors='coerce')

mid_price = df[['price_adv', 'price_door']].stack().median()
df['price_adv'].fillna(df['price_door'], inplace=True)
df['price_door'].fillna(df['price_adv'], inplace=True)
df[['price_adv', 'price_door']] = df[['price_adv', 'price_door']].fillna(mid_price)

# Location (city + district)
loc_extracted = df['place'].fillna('').str.extract(r'^(?P<city>[^ ]+)\s*(?P<gu>[^ ]+)?')
df['loc_sigu'] = loc_extracted['city'].fillna('') + ' ' + loc_extracted['gu'].fillna('')
df.loc[df['loc_sigu'].str.strip() == '', 'loc_sigu'] = 'unknown'

df[['date','time','price_adv','price_door','loc_sigu']].head()

## 3. Feature Engineering
### 3‑1. TF‑IDF for text

In [None]:
text_corpus = (
    df['content'].fillna('') + ' ' +
    df['place'].fillna('')   + ' ' +
    df['loc_sigu'].fillna('')
)

tfidf = TfidfVectorizer(max_features=10_000,
                        ngram_range=(1,2),
                        min_df=3,
                        stop_words='english')
X_text = tfidf.fit_transform(text_corpus)
X_text

### 3‑2. Numeric & Categorical metadata

In [None]:
num_cols = ['price_adv', 'price_door']
cat_cols = ['loc_sigu']

pre = ColumnTransformer([
    ('num', MinMaxScaler(), num_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols)
])

X_meta = pre.fit_transform(df)
X_meta

### 3‑3. Combine & Build Final Matrix

In [None]:
from scipy.sparse import csr_matrix
X_all = hstack([X_text, X_meta]).tocsr()
X_all

## 4. Fit Cosine Nearest‑Neighbors Model

In [None]:
knn = NearestNeighbors(metric='cosine', n_neighbors=20, n_jobs=-1)
knn.fit(X_all)

# Persist everything
MODEL_DIR = Path('model')
MODEL_DIR.mkdir(exist_ok=True)
joblib.dump({'tfidf': tfidf, 'pre': pre, 'knn': knn, 'df': df}, MODEL_DIR/'recommender.joblib')
print('Model saved to', MODEL_DIR/'recommender.joblib')

## 5. Helper Functions

In [None]:
job = joblib.load(MODEL_DIR/'recommender.joblib')

def encode_query(q: dict):
    txt_vec = job['tfidf'].transform([q.get('keywords','')])
    meta_df = pd.DataFrame([{
        'price_adv' : q.get('price_max', mid_price),
        'price_door': q.get('price_max', mid_price),
        'loc_sigu'  : q.get('location', 'unknown')
    }])
    meta_vec = job['pre'].transform(meta_df)
    return hstack([txt_vec, meta_vec])

def recommend(query: dict, top_k=5):
    q_vec = encode_query(query)
    dist, idx = job['knn'].kneighbors(q_vec, n_neighbors=top_k)
    recs = job['df'].iloc[idx[0]].copy()
    recs['score'] = 1 - dist[0]
    return recs[['link','content','place','date','time','price_adv','price_door','score']]


## 6. Quick Test

In [None]:
sample_query = {
    'keywords': 'psychedelic rock live',
    'price_max': 35000,
    'location': '서울 마포구'
}

recommend(sample_query)

## 7. (Optional) FastAPI Endpoint

In [None]:
"""Run this cell separately (e.g., save as api.py) and launch with:

    uvicorn api:app --host 0.0.0.0 --port 8000
"""
from fastapi import FastAPI
from pydantic import BaseModel
import uvicorn

app = FastAPI()
model = joblib.load('model/recommender.joblib')

class Query(BaseModel):
    keywords:  str = ''
    price_max: float | None = None
    location:  str = ''

@app.post('/recommend')
def rec_api(q: Query, top_k:int=5):
    res = recommend(q.dict(), top_k=top_k)
    return res.to_dict(orient='records')

# if __name__ == '__main__':
#     uvicorn.run(app, host='0.0.0.0', port=8000)