In [6]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 데이터 불러오기
data = pd.read_csv('prob-0101 (1).csv')

# 영화의 특징 결합 (장르 + 감독 + 출연진)
data['features'] = data['장르'] + ' ' + data['감독'] + ' ' + data['출연진']

# 텍스트를 숫자로 변환 (벡터화)
vectorizer = CountVectorizer()
feature_matrix = vectorizer.fit_transform(data['features'])

# 영화 간의 유사도 계산 (코사인 유사도)
cosine_sim = cosine_similarity(feature_matrix, feature_matrix)

# 영화 추천 함수
def recommend_movies(title, data, cosine_sim):
    # 선택한 영화의 인덱스 찾기
    idx = data[data['제목'] == title].index[0]
    
    # 해당 영화와 다른 영화들 간의 유사도 점수 가져오기
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # 유사도가 높은 상위 5개 영화 추천
    sim_indices = [i[0] for i in sim_scores[1:6]]
    return data['제목'].iloc[sim_indices]

# 예시: 특정 영화와 비슷한 영화 추천
print("추천된 영화 목록:")
print(recommend_movies('스위치', data, cosine_sim))  # '스위치' 대신 다른 영화 제목을 넣어보세요!


추천된 영화 목록:
46          스프린터
1           강남좀비
2     별 볼일 없는 인생
3       10일간의 애인
4             교섭
Name: 제목, dtype: object


In [7]:
pip install surprise

Defaulting to user installation because normal site-packages is not writeable
Collecting surprise
  Using cached surprise-0.1-py2.py3-none-any.whl.metadata (327 bytes)
Collecting scikit-surprise (from surprise)
  Using cached scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Using cached surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml): started
  Building wheel for scikit-surprise (pyproject.toml): finished with status 'error'
Failed to build scikit-surprise
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  Building wheel for scikit-surprise (pyproject.toml) did not run successfully.
  exit code: 1
  
  [115 lines of output]
  running bdist_wheel
  running build
  running build_py
  creating build\lib.win-amd64-cpython-312\surprise
  copying surprise\accuracy.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\builtin_datasets.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\dataset.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\dump.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\reader.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\trainset.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\utils.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\__init__.py -> build\lib.win-amd64-cpython-312\surprise
  copying surprise\__main__.py -> build\lib.win-amd64-cpython-312\surprise
  creating build\lib.win-amd64-cpython-312\surprise\mod

In [8]:
import pandas as pd
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import accuracy

# prob 데이터 불러오기
data_path = 'prob-0101 (1).csv'
prob_data = pd.read_csv(data_path)

# 가상의 사용자 평점 데이터 생성
# 사용자들이 랜덤하게 영화를 평가했다고 가정
import random

random.seed(42)
user_ids = [random.randint(1, 5) for _ in range(len(prob_data))]  # 사용자 ID 생성 (1~5번 사용자)
ratings = [random.randint(1, 5) for _ in range(len(prob_data))]  # 평점 (1~5점 사이 랜덤)

# 새로운 데이터프레임으로 결합
ratings_df = pd.DataFrame({
    "user_id": user_ids,
    "item_id": prob_data['제목'],
    "rating": ratings
})

# Surprise 라이브러리 데이터셋으로 변환
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_df[['user_id', 'item_id', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25)

# 협업 필터링 알고리즘 적용 (SVD)
model = SVD()
model.fit(trainset)

# 추천 예측
predictions = model.test(testset)
accuracy.rmse(predictions)  # 모델의 정확도 확인

# 특정 사용자에게 영화 추천 함수
def recommend_for_user(user_id, df, model, n=3):
    all_movies = df['item_id'].unique()
    watched_movies = df[df['user_id'] == user_id]['item_id'].unique()
    recommendations = []
    
    for movie in all_movies:
        if movie not in watched_movies:
            pred = model.predict(user_id, movie)
            recommendations.append((movie, pred.est))
    
    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)
    return recommendations[:n]

# 예시: 사용자 1에게 영화 추천
print("사용자 1을 위한 추천 영화:")
print(recommend_for_user(1, ratings_df, model))


ModuleNotFoundError: No module named 'surprise'