In [1]:
# Gerekli kütüphaneler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

print("FilmMate projesine hoş geldiniz! 🎬")

FilmMate projesine hoş geldiniz! 🎬


In [2]:
# Veri setini oku
movies_df = pd.read_csv('../data/movies.csv')
print(f"Veri seti boyutu: {movies_df.shape[0]} satır, {movies_df.shape[1]} sütun\n")
movies_df.head()

Veri seti boyutu: 87585 satır, 3 sütun



Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
# Türleri vektörize et
movies_df['genres_str'] = movies_df['genres'].str.replace('(no genres listed)', 'no_genres')
movies_df['genres_str'] = movies_df['genres_str'].str.replace('|', ' ')

# CountVectorizer'ı özel karakterleri koruyacak şekilde ayarla
cv = CountVectorizer(token_pattern=r'[a-zA-Z\-_]+')
genre_matrix = cv.fit_transform(movies_df['genres_str'])

In [4]:
def get_recommendations_efficient(movie_title, genre_matrix, n=5):
    idx = movies_df[movies_df['title'] == movie_title].index[0]
    movie_vector = genre_matrix[idx:idx+1]
    sim_scores = cosine_similarity(movie_vector, genre_matrix).flatten()
    
    # Film yılını çıkar
    try:
        movie_year = int(movies_df.iloc[idx]['title'].split('(')[-1].strip(')'))
    except:
        movie_year = 2000
    
    # En benzer 20 filmi al
    top_20_indices = sim_scores.argsort()[::-1][1:21]
    recommendations = movies_df.iloc[top_20_indices][['title', 'genres']]
    recommendations['similarity'] = sim_scores[top_20_indices]
    
    # Geçici olarak yıl sütunu ekle
    recommendations['temp_year'] = recommendations['title'].apply(
        lambda x: int(x.split('(')[-1].strip(')')) if '(' in x else 2000
    )
    
    # Yıl farkına göre benzerliği güncelle
    recommendations['similarity'] = recommendations.apply(
        lambda x: x['similarity'] * (1 - min(abs(movie_year - x['temp_year']) * 0.01, 0.5)), 
        axis=1
    )
    
    # Geçici yıl sütununu kaldır
    recommendations = recommendations.drop('temp_year', axis=1)
    
    # Son sıralamayı yap
    recommendations = recommendations.sort_values('similarity', ascending=False).head(n)
    return recommendations

In [5]:
# Test edelim
print("Güncellenmiş öneriler - Toy Story (1995) için:\n")
print(get_recommendations_efficient('Toy Story (1995)', genre_matrix))

print("\nGüncellenmiş öneriler - Jumanji (1995) için:\n")
print(get_recommendations_efficient('Jumanji (1995)', genre_matrix))

Güncellenmiş öneriler - Toy Story (1995) için:

                                                   title  \
22298                                     Aladdin (1992)   
2203                                         Antz (1998)   
3021                                  Toy Story 2 (1999)   
3913                    Emperor's New Groove, The (2000)   
17436  Asterix and the Vikings (Astérix et les Viking...   

                                            genres  similarity  
22298  Adventure|Animation|Children|Comedy|Fantasy        0.97  
2203   Adventure|Animation|Children|Comedy|Fantasy        0.97  
3021   Adventure|Animation|Children|Comedy|Fantasy        0.96  
3913   Adventure|Animation|Children|Comedy|Fantasy        0.95  
17436  Adventure|Animation|Children|Comedy|Fantasy        0.89  

Güncellenmiş öneriler - Jumanji (1995) için:

                                                   title  \
43363                    Escape to Witch Mountain (1995)   
9559                           Mag