In [1]:
!pip install lightfm scikit-learn pandas numpy matplotlib tqdm -q

## 라이브러리

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import itertools
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import warnings
warnings.filterwarnings('ignore')

import lightfm
from lightfm import LightFM
from lightfm.data import Dataset
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score
from scipy import sparse
from scipy.sparse import lil_matrix

print(f"LightFM version: {lightfm.__version__}")

LightFM version: 1.17


## 하이퍼파라미터

In [4]:
# 하이퍼파라미터 설정
K = 10
TEST_PERCENTAGE = 0.1
LEARNING_RATE = 0.01
NO_COMPONENTS = 5
NO_EPOCHS = 1 # 에폭 값을 1로 변경
NO_THREADS = 4

ITEM_ALPHA = 0.0001
USER_ALPHA = 0.0001

SEED = 3
# np.random.seed(SEED)
random.seed(SEED)

print("설정 완료")

설정 완료


## data & preprocessing


In [5]:
import pandas as pd

github_csv_url = '/content/drive/MyDrive/data_final/'

print("데이터 로딩 중...")

# attendance = pd.read_csv(github_csv_url + 'attendance.csv', index_col=False)
event = pd.read_csv(github_csv_url + 'small_preprocessed_2000.csv', index_col=False)
# event_feature = pd.read_csv(github_csv_url + 'event_feature.csv', index_col=False)
interactions = pd.read_csv(github_csv_url + 'interactions.csv', index_col=False)
user = pd.read_csv(github_csv_url + 'user.csv', index_col=False)
user_feature = pd.read_csv(github_csv_url + 'user_feature.csv', index_col=False)

데이터 로딩 중...


In [6]:
# 범주화
from datetime import datetime

def period_feature(start_date_str, end_date_str):
    """
    start_date_str, end_date_str 예: "2024-07-05"
    """
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date   = datetime.strptime(end_date_str, "%Y-%m-%d")

    days = (end_date - start_date).days

    if days <= 30:
        return "period_short"
    elif days <= 90:
        return "period_medium"
    else:
        return "period_long"


event['period_feature']   = event.apply(lambda row: period_feature(row['봉사기간'].split(' ~ ')[0],
                                                              row['봉사기간'].split(' ~ ')[1]), axis=1)

print(event[['봉사기간', 'period_feature']])

                         봉사기간 period_feature
0     2024-07-05 ~ 2024-09-30  period_medium
1     2024-07-11 ~ 2024-09-30  period_medium
2     2024-07-01 ~ 2024-09-30    period_long
3     2024-07-01 ~ 2024-09-30    period_long
4     2024-07-01 ~ 2024-09-30    period_long
...                       ...            ...
1995  2024-10-01 ~ 2024-11-07  period_medium
1996  2024-10-01 ~ 2024-11-17  period_medium
1997  2024-10-01 ~ 2024-11-19  period_medium
1998  2024-10-01 ~ 2024-11-26  period_medium
1999  2024-10-01 ~ 2024-11-28  period_medium

[2000 rows x 2 columns]


In [7]:
# Feature 컬럼 정의
print("Feature 컬럼 정의...\n")

# 사용자 feature (실제 존재하는 컬럼만)
user_cols = ['region', 'available_time', 'skills', 'preferred_field',
             'age_group', 'job', 'interests', 'similar_group']
user_cols = [col for col in user_cols if col in user.columns]
print(f"사용자 feature: {user_cols}")

# 봉사활동 feature (실제 존재하는 컬럼만)
event_cols = ['period_feature', '봉사자유형',
              '모집기관', '봉사장소', '활동구분']
event_cols = [col for col in event_cols if col in event.columns]
print(f"봉사활동 feature: {event_cols}")

# 추가 feature CSV
user_feature_cols = [col for col in user_feature.columns if col != 'user_id']

print(f"\n사용자 추가 feature: {user_feature_cols}")

Feature 컬럼 정의...

사용자 feature: ['region', 'available_time', 'skills', 'preferred_field']
봉사활동 feature: ['period_feature', '봉사자유형', '모집기관', '봉사장소', '활동구분']

사용자 추가 feature: ['age_group', 'job', 'interests', 'similar_group']


In [8]:
import pandas as pd

# 모든 unique feature 값 수집
print("Feature 값 수집 중...\n")

def extract_features_with_prefixes(df, cols_config):
    """DataFrame에서 feature 값 추출 및 컬럼별 Prefix 적용"""
    features = set()
    for col, config in cols_config.items():
        if col in df.columns:
            prefix = config['prefix']
            splitter = config.get('splitter') # None by default
            for val in df[col].dropna():
                if isinstance(val, str):
                    if splitter and splitter in val:
                        for sub_val in val.split(splitter):
                            features.add(prefix + sub_val.strip())
                    else:
                        features.add(prefix + val.strip())
                else:
                    features.add(prefix + str(val).strip())
    return features

# User features configuration
user_cols_config = {
    'region': {'prefix': 'region_'},
    'available_time': {'prefix': 'available_time_'},
    'skills': {'prefix': 'user_skill_', 'splitter': ','},
    'preferred_field': {'prefix': 'pref_field_'},
}
all_user_features = extract_features_with_prefixes(user, user_cols_config)

# Adding user_feature_cols from user_feature dataframe
user_feature_cols_config = {
    'age_group': {'prefix': 'age_group_'},
    'job': {'prefix': 'job_'},
    'interests': {'prefix': 'interest_', 'splitter': ','},
    'similar_group': {'prefix': 'similar_group_'}
}
all_user_features.update(extract_features_with_prefixes(user_feature, user_feature_cols_config))

print(f"총 사용자 feature: {len(all_user_features)}개")
# print(all_user_features) # Keep commented for brevity

# Event features configuration
event_cols_config = {
    'period_feature': {'prefix': ''}, # Already prefixed like 'period_short'
    '봉사자유형': {'prefix': 'vol_type_'},
    '모집기관': {'prefix': 'org_'},
    '봉사장소': {'prefix': 'location_detail_', 'splitter': ','}, # Split by comma
    '활동구분': {'prefix': 'activity_type_'},
    '봉사분야': {'prefix': 'skill_', 'splitter': '>'}, # Split by '>'
    '등록기관': {'prefix': 'reg_org_loc_', 'splitter': ' '} # Split by ' '
}
all_item_features = extract_features_with_prefixes(event, event_cols_config)

print(f"총 봉사활동 feature: {len(all_item_features)}개")
# print(all_item_features) # Keep commented for brevity

Feature 값 수집 중...

총 사용자 feature: 116개
총 봉사활동 feature: 2662개


In [9]:
#봉사분야, 등록기관 전처리

# The event_cols list is for general understanding, not directly used by Dataset.fit for features
# Features are already correctly extracted into all_item_features by extract_features_with_prefixes
# in cell XFqPoSE2CJWT using event_cols_config.
# The manual update of all_item_features here is redundant and causes inconsistencies if called after dataset.fit.

# Ensure '봉사분야' and '등록기관' are part of event_cols for logical consistency in other parts of the code
# if they are expected, but they are not used for LightFM feature generation directly.
# This block will simply print event_cols and rely on earlier feature generation for LightFM.

if '봉사분야' not in event_cols:
    event_cols.append('봉사분야')
if '등록기관' not in event_cols:
    event_cols.append('등록기관')

print(event_cols)
print(f"총 봉사활동 feature: {len(all_item_features)}개")
print(all_item_features)


['period_feature', '봉사자유형', '모집기관', '봉사장소', '활동구분', '봉사분야', '등록기관']
총 봉사활동 feature: 2662개
{'location_detail_강남구 테헤란로 8길 36', 'org_바른샘어린이도서관', 'location_detail_울산시립미술관', 'reg_org_loc_구례군', 'org_구리시청소년수련관 방과후아카데미(재)', 'location_detail_노틀담복지관', 'org_(주)보배어르신모심터', 'location_detail_설봉공원 내 이천시립박물관', 'location_detail_수택동 외', 'location_detail_나주종합스포츠파크 및 영산강 강변도로 일원', 'org_동탄목동이음터도서관', 'org_서은단기보호시설', 'location_detail_두드림발달센터', 'org_모현도서관', 'reg_org_loc_기장군', 'org_중랑구3호점 면목4동우리동네키움센터', 'reg_org_loc_제주특별자치도', 'org_수원서호노인복지관', 'reg_org_loc_사하구', 'org_다원이음터도서관', 'location_detail_영암군 관내', 'location_detail_동대문노인종합복지관', 'location_detail_경기도 부천시 삼작로 301번길 5', 'location_detail_천안시 서북구 늘푸른3길 7-1 2', 'reg_org_loc_진도군', 'vol_type_청소년', 'org_아름다운가게개봉점', 'location_detail_서울화계초등학교병설유치원', 'org_명일2동 우리동네키움센터', 'org_울산광역시 남구', 'location_detail_오산시 현충로72번길 39', 'location_detail_부산 기장군 정관읍 산단4로139 동원로얄 듀크112동앞 동원1차어린이집', 'location_detail_너른마루실(3층)', 'org_금쪽같은내부모님복지센터', 'location_detail_로뎀나무요양원', 'location_detail

## LightFM

In [10]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer
from lightfm.evaluation import auc_score
from scipy.stats import loguniform, randint

param_distributions = {
    'no_components': randint(10, 200),  # Number of latent factors
    'learning_rate': loguniform(0.0001, 0.5), # Learning rate
    'item_alpha': loguniform(0.000001, 0.1), # L2 penalty on item embeddings
    'user_alpha': loguniform(0.000001, 0.1) # L2 penalty on user embeddings
}
print("Hyperparameter distributions defined.")

Hyperparameter distributions defined.


In [11]:
# LightFM Dataset 생성
print("LightFM Dataset 생성 중...\n")

dataset = Dataset()

dataset.fit(
    users=user['user_id'].unique(),
    items=event['event_id'].unique(),
    user_features=all_user_features,
    item_features=all_item_features
)

num_users, num_items = dataset.interactions_shape()
print(f'사용자 수: {num_users}')
print(f'봉사활동 수: {num_items}')
print(f'사용자 feature 수: {len(all_user_features)}')
print(f'봉사활동 feature 수: {len(all_item_features)}')

print("\n✓ Dataset 생성 완료")

LightFM Dataset 생성 중...

사용자 수: 1800
봉사활동 수: 2000
사용자 feature 수: 116
봉사활동 feature 수: 2662

✓ Dataset 생성 완료


In [12]:
# 상호작용 행렬 구축
print("상호작용 행렬 구축 중...\n")

# (user_id, item_id, weight) 튜플 리스트
interactions_list = list(zip(
    interactions['user_id'],
    interactions['item_id'],
    interactions['weight']
))

print(f"총 상호작용: {len(interactions_list)}개")
print(f"샘플: {interactions_list[:3]}")

# 전체 상호작용 행렬 생성
(full_interactions, full_weights) = dataset.build_interactions(interactions_list)

# COO matrix일 수 있으므로 CSR matrix로 명시적 변환하여 .indices 접근 가능하게 함
full_interactions = full_interactions.tocsr()

print(f"\n상호작용 행렬 shape: {full_interactions.shape}")
print(f"Non-zero: {full_interactions.nnz}")
print(f"Sparsity: {1 - (full_interactions.nnz / (full_interactions.shape[0] * full_interactions.shape[1])):.4f}")

상호작용 행렬 구축 중...

총 상호작용: 107170개
샘플: [('U1', 'E1939', 3), ('U1', 'E1581', 3), ('U1', 'E936', 1)]

상호작용 행렬 shape: (1800, 2000)
Non-zero: 107170
Sparsity: 0.9702


In [13]:
def user_feature_generator(user_df, user_cols_config):
    """사용자별 feature 리스트 생성"""
    for _, row in user_df.iterrows():
        features = []
        for col, config in user_cols_config.items():
            if col in row and pd.notna(row[col]):
                prefix = config['prefix']
                splitter = config.get('splitter')
                val = str(row[col])
                if splitter and splitter in val:
                    features.extend([prefix + v.strip() for v in val.split(splitter)])
                else:
                    features.append(prefix + val.strip())
        yield (row['user_id'], features)

def event_feature_generator(event_df, event_cols_config):
    for _, row in event_df.iterrows():
        features = []
        for col, config in event_cols_config.items():
            if col in row and pd.notna(row[col]):
                prefix = config['prefix']
                splitter = config.get('splitter')
                val = str(row[col])
                if splitter and splitter in val:
                    features.extend([prefix + v.strip() for v in val.split(splitter)])
                else:
                    features.append(prefix + val.strip())
        yield (row['event_id'], features)

print("Feature generator 함수 정의 완료")

Feature generator 함수 정의 완료


In [14]:
# LightFMWrapper의 에폭(epochs) 값을 1로 설정하시려면, 셀 ib1z-gXeoY40에서 NO_EPOCHS 변수를 변경해주셔야 합니다.
# 현재 선택하신 셀은 feature 행렬 구축과 관련된 부분입니다.
# Feature 행렬 구축
print("Feature 행렬 구축 중...\n")

# 봉사활동 feature 행렬
item_features = dataset.build_item_features(
    event_feature_generator(event, event_cols_config) # Pass event_cols_config here
)
# csr matrix로 변환

print(f"봉사활동 feature 행렬 shape: {item_features.shape}")
print(f"Non-zero: {item_features.nnz}")
print(f"평균 feature/활동: {item_features.nnz / item_features.shape[0]:.2f}")

# 사용자 feature 행렬
user_features = dataset.build_user_features(
    user_feature_generator(pd.concat([user, user_feature.drop(columns='user_id')], axis=1), user_cols_config) # Pass user_cols_config here, combine user and user_feature
)
# csr matrix로 변환

print(f"\n사용자 feature 행렬 shape: {user_features.shape}")
print(f"Non-zero: {user_features.nnz}")
print(f"평균 feature/사용자: {user_features.nnz / user_features.shape[0]:.2f}")

print("\n✓ Feature 행렬 구축 완료")

Feature 행렬 구축 중...

봉사활동 feature 행렬 shape: (2000, 4662)
Non-zero: 20000
평균 feature/활동: 10.00

사용자 feature 행렬 shape: (1800, 1916)
Non-zero: 10596
평균 feature/사용자: 5.89

✓ Feature 행렬 구축 완료


In [15]:
def lightfm_auc_scorer(estimator, X, y=None, user_features=None, item_features=None, num_threads=NO_THREADS):
    # X is the interaction matrix (e.g., test_interactions)
    # y is not used in this context but required by make_scorer
    auc_val = auc_score(
        estimator,
        X,
        user_features=user_features,
        item_features=item_features,
        num_threads=num_threads
    ).mean()
    return auc_val

# Create a custom scorer using make_scorer
auc_scorer = make_scorer(
    lightfm_auc_scorer,
    greater_is_better=True,
    needs_X_for_fit=False, # Interactions matrix X is directly passed to the scorer
    user_features=user_features, # Pass user_features to the scorer
    item_features=item_features,  # Pass item_features to the scorer
    num_threads=NO_THREADS
)

print("Custom AUC scorer defined.")

Custom AUC scorer defined.


In [16]:
from lightfm.cross_validation import random_train_test_split

print("상호작용 데이터 분할 중...")

# 'weights' argument is not supported in this version of random_train_test_split
# The weights are already included in 'full_interactions'
(train_interactions, test_interactions) = random_train_test_split(
    interactions=full_interactions,
    test_percentage=TEST_PERCENTAGE,
    random_state=np.random.RandomState(SEED)
)

print(f"\n훈련 상호작용 행렬 shape: {train_interactions.shape}")
print(f"훈련 상호작용 Non-zero: {train_interactions.nnz}")
print(f"테스트 상호작용 행렬 shape: {test_interactions.shape}")
print(f"테스트 상호작용 Non-zero: {test_interactions.nnz}")

print("\n✓ 데이터 분할 완료")

상호작용 데이터 분할 중...

훈련 상호작용 행렬 shape: (1800, 2000)
훈련 상호작용 Non-zero: 96453
테스트 상호작용 행렬 shape: (1800, 2000)
테스트 상호작용 Non-zero: 10717

✓ 데이터 분할 완료


## LightFMWrapper

In [17]:
# Define a simple wrapper for LightFM to conform to sklearn API for RandomizedSearchCV
class LightFMWrapper(LightFM):
    def __init__(self, loss='warp', no_components=50, learning_rate=0.01, item_alpha=0.0, user_alpha=0.0, random_state=None, learning_schedule='adagrad', **kwargs): # Added **kwargs
        super().__init__(loss=loss, no_components=no_components, learning_rate=learning_rate,
                         item_alpha=item_alpha, user_alpha=user_alpha, random_state=random_state, learning_schedule=learning_schedule, **kwargs) # Pass it to super
        self.user_features = None
        self.item_features = None
        self.train_interactions = None

    def fit(self, interactions, user_features=None, item_features=None, sample_weight=None, epochs=1, num_threads=1, verbose=False):
        self.user_features = user_features
        self.item_features = item_features
        self.train_interactions = interactions
        super().fit(interactions=interactions, user_features=user_features, item_features=item_features,
                      sample_weight=sample_weight, epochs=epochs, num_threads=num_threads, verbose=verbose)
        return self

    def score(self, interactions, y=None, user_features=None, item_features=None, num_threads=1):
        # For scoring, we need access to the features used during training.
        # This is an approximation as RandomizedSearchCV's scoring might
        # not perfectly align with LightFM's specific feature handling.
        if user_features is None: user_features = self.user_features
        if item_features is None: item_features = self.item_features

        # Use AUC score for evaluation as defined in the custom scorer
        # Note: This 'score' method is separate from the 'auc_scorer' defined earlier.
        # RandomizedSearchCV expects the estimator to have a score method or rely on the custom scorer passed.
        # When a custom scorer is passed to RandomizedSearchCV, its 'score' method is not directly used for CV.
        # The custom 'auc_scorer' will handle the evaluation during RandomizedSearchCV's cross-validation.
        # This 'score' method might be called for the final best_estimator_._score.
        return auc_score(
            self,
            interactions,
            user_features=user_features,
            item_features=item_features,
            num_threads=num_threads
        ).mean()

# Re-create RandomizedSearchCV with the wrapper
random_search = RandomizedSearchCV(
    estimator=LightFMWrapper(random_state=SEED), # Removed epochs from here
    param_distributions=param_distributions,
    n_iter=10,  # Number of parameter settings that are sampled
    cv=3,  # Number of cross-validation folds
    scoring=auc_scorer,
    random_state=SEED,
    verbose=2, # Set to a higher value for more detailed output
    n_jobs=NO_THREADS, # Use multiple threads for parallel processing
    error_score=0.0 # Score to assign to errors that occur during cross-validation
)

In [19]:
import os

print("추천 생성에 필요한 매핑 변수 정의 중...")

# Load the best model
model_filename = 'output/best_lightfm_model.pkl'
if not os.path.exists(model_filename):
    print(f"Error: Model file not found at {model_filename}")
    # Optionally, you could try to re-run the model fitting here if best_model is not defined
    # For now, we assume best_model is loaded successfully or will be defined.
    # As an alternative, if best_model is already loaded, use it.
    if 'best_model' not in locals(): # Check if best_model is defined in local scope
        print("Attempting to load best_model from previous context.")
        # This part might need manual intervention if the context is not preserved
        # For now, we proceed assuming it will be loaded or exist.

else:
    with open(model_filename, 'rb') as f:
        best_model = pickle.load(f)
    print("✓ Optimal LightFM model loaded for mapping.")

# Get mappings from the dataset object
user_id_map, _, item_id_map, _ = dataset.mapping()

# Reverse mapping for displaying recommendations
idx_to_event_id = {v: k for k, v in item_id_map.items()}

# Create a DataFrame to map event_id to 봉사활동명
event_titles = event[['event_id', '봉사활동명']].set_index('event_id')

# Internal LightFM item IDs
all_item_ids_internal = np.arange(num_items)

print("✓ 매핑 변수 정의 완료")

추천 생성에 필요한 매핑 변수 정의 중...
✓ Optimal LightFM model loaded for mapping.
✓ 매핑 변수 정의 완료


In [20]:
model_filename = 'output/best_lightfm_model.pkl'

# Check if the file exists before attempting to load
if not os.path.exists(model_filename):
    print(f"Error: Model file not found at {model_filename}")
else:
    print(f"Loading optimal LightFM model from '{model_filename}'...")
    with open(model_filename, 'rb') as f:
        best_model = pickle.load(f)
    print("✓ Optimal LightFM model loaded successfully.")

Loading optimal LightFM model from 'output/best_lightfm_model.pkl'...
✓ Optimal LightFM model loaded successfully.


In [21]:
import numpy as np
import pandas as pd

print("추천 생성에 필요한 매핑 변수 정의 중...")

# Load the best model
model_filename = 'output/best_lightfm_model.pkl'
if not os.path.exists(model_filename):
    print(f"Error: Model file not found at {model_filename}")
    # Optionally, you could try to re-run the model fitting here if best_model is not defined
    # For now, we assume best_model is loaded successfully or will be defined.
    # As an alternative, if best_model is already loaded, use it.
    if 'best_model' not in locals(): # Check if best_model is defined in local scope
        print("Attempting to load best_model from previous context.")
        # This part might need manual intervention if the context is not preserved
        # For now, we proceed assuming it will be loaded or exist.

else:
    with open(model_filename, 'rb') as f:
        best_model = pickle.load(f)
    print("✓ Optimal LightFM model loaded for mapping.")

# Get mappings from the dataset object
user_id_map, _, item_id_map, _ = dataset.mapping()

# Reverse mapping for displaying recommendations
idx_to_event_id = {v: k for k, v in item_id_map.items()}

# Create a DataFrame to map event_id to 봉사활동명
event_titles = event[['event_id', '봉사활동명']].set_index('event_id')

# Internal LightFM item IDs
all_item_ids_internal = np.arange(num_items)

print("✓ 매핑 변수 정의 완료")

추천 생성에 필요한 매핑 변수 정의 중...
✓ Optimal LightFM model loaded for mapping.
✓ 매핑 변수 정의 완료


In [22]:
print("샘플 사용자 선택 및 특징 표시 중...")

# 1. user DataFrame에서 랜덤으로 샘플 사용자 ID 선택
sample_user_id = random.choice(user['user_id'].unique())

# 2. dataset 객체에서 사용자 feature 매핑 가져오기
# dataset.mapping()은 (user_id_map, user_feature_map, item_id_map, item_feature_map)을 반환합니다.
user_id_map, user_feature_map, _, _ = dataset.mapping()

# 3. 선택된 sample_user_id에 해당하는 내부(internal) 사용자 ID 찾기
# user_id_map을 사용하여 외부 ID를 내부 ID로 변환합니다.
user_internal_id = user_id_map[sample_user_id]

# 4. user_features 행렬에서 선택된 샘플 사용자의 활성화된 feature 인덱스 추출
# user_features는 CSR matrix이므로 직접 인덱싱하여 해당 사용자의 feature를 가져옵니다.
active_feature_indices = user_features[user_internal_id].indices

# 5. 추출된 feature 인덱스를 실제 feature 이름으로 변환
# user_feature_map을 역으로 매핑하여 internal index를 feature name으로 변환합니다.
idx_to_user_feature = {idx: name for name, idx in user_feature_map.items()}
readable_features = [idx_to_user_feature[idx] for idx in active_feature_indices]

print(f"\n샘플 사용자 ID: {sample_user_id}")
print(f"LightFM 내부 사용자 ID: {user_internal_id}")
print(f"\nLightFM 모델에 사용된 특징:\n{', '.join(readable_features)}")

# 6. 샘플 사용자의 원본 특징 및 user_feature DataFrame에 있는 추가 특징 출력
print("\n\n사용자 DataFrame의 원본 특징:")
display(user[user['user_id'] == sample_user_id])

combined_user_data = pd.merge(user, user_feature, on='user_id', how='left')
print("\n사용자 feature DataFrame의 추가 특징:")
display(combined_user_data[combined_user_data['user_id'] == sample_user_id])

print("\n✓ 샘플 사용자 특징 표시 완료")

샘플 사용자 선택 및 특징 표시 중...

샘플 사용자 ID: U488
LightFM 내부 사용자 ID: 487

LightFM 모델에 사용된 특징:
U488, pref_field_보건의료, user_skill_통역, user_skill_응급처치, region_경기, available_time_주말 오후


사용자 DataFrame의 원본 특징:


Unnamed: 0,user_id,region,available_time,skills,preferred_field
487,U488,경기,주말 오후,"응급처치,통역",보건의료



사용자 feature DataFrame의 추가 특징:


Unnamed: 0,user_id,region,available_time,skills,preferred_field,age_group,job,interests,similar_group
487,U488,경기,주말 오후,"응급처치,통역",보건의료,20대,학생,"노인,어린이,IT",G5



✓ 샘플 사용자 특징 표시 완료


In [23]:
print(f"\n샘플 사용자 {sample_user_id}에 대한 상위 5개 봉사 활동 추천 생성 중...")

# Get items the sample user has already interacted with in the full dataset
# Use the CSR matrix for efficient slicing
known_positives = full_interactions[user_internal_id].indices

# Predict scores for all items for the sample user
symmetrical_scores = best_model.predict(
    user_ids=user_internal_id,
    item_ids=all_item_ids_internal,
    user_features=user_features,
    item_features=item_features,
    num_threads=NO_THREADS
)

# Filter out items the user has already interacted with
item_mask = np.ones(num_items, dtype=bool)
item_mask[known_positives] = False

unseen_scores = symmetrical_scores[item_mask]
unseen_item_internal_ids = all_item_ids_internal[item_mask]

# Get the top 5 recommended item internal IDs from unseen items
top_5_indices = unseen_scores.argsort()[-5:][::-1]
top_5_internal_item_ids = unseen_item_internal_ids[top_5_indices]

# Map internal item IDs back to original event_ids (string)
top_5_event_ids = [idx_to_event_id[item_idx] for item_idx in top_5_internal_item_ids]

# Map event_ids to 봉사활동명
top_5_event_names = event_titles.loc[top_5_event_ids]['봉사활동명'].tolist()



print(f"\nLightFM 모델에 사용된 특징:\n{', '.join(readable_features)}")

print(f"\n사용자 {sample_user_id} 추천: {top_5_event_names}")
print("\n✓ 샘플 사용자 추천 생성 완료")


샘플 사용자 U488에 대한 상위 5개 봉사 활동 추천 생성 중...

LightFM 모델에 사용된 특징:
U488, pref_field_보건의료, user_skill_통역, user_skill_응급처치, region_경기, available_time_주말 오후

사용자 U488 추천: ['(영종-비대면) 이면지 모아!(원데이 자원봉사 체험의 날)활동', '대전세종충남넥슨후원공공어린이재활병원 삼킴치료실 연하치료 물품제작', '소중한 지구를 위한 탄소중립 캠페인', '[종이팩착한순환] 종이팩 해체 및 수량 확인 자원봉사자를 모집합니다. (망우 마중 마을활력소)', '서초구립반포도서관 자원봉사(10월) 18:00~20:00 도서배가, 이용자 교육 및 안내 활동']

✓ 샘플 사용자 추천 생성 완료
