In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.width', 500)
pd.set_option('display.expand_frame_repr', False)

In [2]:
df = pd.read_csv('movies_31-tem.csv', low_memory=False)
df.head()

In [3]:
df.info()

In [4]:
df['YEAR'] = pd.to_datetime(df['YEAR'])
df['YEAR'] = df['YEAR'].dt.year

In [5]:
pd.set_option('display.max_rows', None)
df['YEAR'].sort_values(ascending=False).head(5)

In [6]:
#'Nostalji': (1906, 1979),'Retro': (1980, 1989), 'Mazi': (1990, 1999), 'Yeniler': (2000, 2009), 'Günümüz': (2010, 2022)

In [7]:
bins = [1905, 1979, 1989, 1999, 2009, 2022]
labels = ['Nostalji', 'Retro', 'Mazi', 'Yeniler', 'Günümüz']
df['CATEGORY'] = pd.cut(df['YEAR'], bins=bins, labels=labels, right=True)

In [8]:
df.head()

In [9]:
#1)Direk Sütun Bazlı Seçme.Seçtiğimiz kategoriye göre istediğimiz sayıda rastgele film getirme
def get_titles_by_category(df, category, num_titles):
    """
    Belirli bir kategoriye göre istenilen sayıda 'ORIGINAL_TITLE' döndürür.
    
    Args:
    df (pd.DataFrame): Verileri içeren DataFrame
    category (str): Filtrelemek istenilen kategori
    num_titles (int): Döndürülmek istenilen başlık sayısı
    
    Returns:
    list: Belirtilen kategoriye göre başlık listesi
    """
    # Kategoriye göre filtrele
    filtered_df = df[df['CATEGORY'] == category]
    
    # İlk 'num_titles' kadar 'ORIGINAL_TITLE' al
    titles = filtered_df['ORIGINAL_TITLE'].head(num_titles).tolist()
    
    return titles

# Fonksiyonu test edelim
category_to_filter = 'Nostalji'
number_of_titles = 3
titles = get_titles_by_category(df, category_to_filter, number_of_titles)
print(titles)

In [10]:
##2) KATEGORİLERE GÖRE EN POPÜLER FİLMLERİ GETİRME (NOT:Tolganın en popüler filmleri getirme yani "recommend_most_popular" fonksiyonuna "CATEGORY" şartını ekledim)
def recommend_most_popular_per_category(df):
    # Create an empty list to store recommendations
    recommendations = []

    # Get all unique categories
    all_categories = df['CATEGORY'].dropna().unique()

    for category in all_categories:
        # Filter the dataset by the selected category
        category_filter = df['CATEGORY'] == category
        filtered_df = df[category_filter]

        if not filtered_df.empty:
            # Get the most popular movie for this category
            # Sort by VOTE_COUNT and AVG_RATING and select the top one
            most_popular = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).iloc[0] 
            recommendations.append(most_popular)

    # Create a DataFrame for the recommendations
    recommendations_df = pd.DataFrame(recommendations)

    # Ensure the DataFrame has the required columns
    if not recommendations_df.empty:
        # Select relevant columns to display, ensuring all columns exist
        columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
        recommendations_df = recommendations_df[[
            col for col in columns_to_display if col in recommendations_df.columns
        ]]

    return recommendations_df

# Example usage:
print(recommend_most_popular_per_category(df))

In [11]:
###3)KATEGORİLERE GÖRE EN POPÜLER 5 FİLMİ GETİRME (NOT:Most_Popüler Fonksiyonuna CATEGORY şartını ekledim)
def recommend_top_5_per_category(df):
    # Create an empty list to store recommendations
    recommendations = []

    # Get all unique categories
    all_categories = df['CATEGORY'].dropna().unique()

    for category in all_categories:
        # Filter the dataset by the selected category
        category_filter = df['CATEGORY'] == category
        filtered_df = df[category_filter]

        if not filtered_df.empty:
            # Get the top 5 movies for this category
            # Sort by VOTE_COUNT and AVG_RATING and select the top 5
            top_5 = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).head(5)
            recommendations.append(top_5)

    # Concatenate all the top 5 DataFrames into one DataFrame
    recommendations_df = pd.concat(recommendations)

    # Ensure the DataFrame has the required columns
    if not recommendations_df.empty:
        # Select relevant columns to display, ensuring all columns exist
        columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
        recommendations_df = recommendations_df[[
            col for col in columns_to_display if col in recommendations_df.columns
        ]]

    return recommendations_df

# Örnek kullanım:
print(recommend_top_5_per_category(df))

In [12]:
####4)SEÇTİĞİMİZ KATEGORİYE GÖRE EN POPÜLER FİLMLERİ SEÇME (NOT:Most_Popüler Fonksiyonuna CATEGORY şartını ekledim)
def recommend_top_5_per_category(df, category):
    # Filter the dataset by the selected category
    category_filter = df['CATEGORY'] == category
    filtered_df = df[category_filter]

    if not filtered_df.empty:
        # Get the top 5 movies for this category
        # Sort by VOTE_COUNT and AVG_RATING and select the top 5
        top_5 = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).head(5)

        # Create a DataFrame for the recommendations
        recommendations_df = top_5

        # Ensure the DataFrame has the required columns
        if not recommendations_df.empty:
            # Select relevant columns to display, ensuring all columns exist
            columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
            recommendations_df = recommendations_df[[
                col for col in columns_to_display if col in recommendations_df.columns
            ]]

        return recommendations_df
    else:
        return pd.DataFrame(columns=['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY'])


# Example usage:
category = 'Retro'
print(recommend_top_5_per_category(df, category))