In [1]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
pd.set_option('display.width', 500)
pd.set_option('display.expand_frame_repr', False)

In [2]:
df = pd.read_csv('movies_31-tem.csv', low_memory=False)
df.head()

Unnamed: 0,TCONST,ORIGINAL_TITLE,TYPE,AVG_RATING,VOTE_COUNT,GENRES,DIRECTORS,YEAR,OVERVIEW,COMBINED_FEATURES
0,tt0000574,The Story of the Kelly Gang,movie,6.0,916,"Action,Adventure,Biography",Charles Tait,1906-01-01,Just as Fritz Lang’s Metropolis (1927) is test...,"the story of the kelly gang action,adventure,b..."
1,tt0003643,The Avenging Conscience: or 'Thou Shalt Not Kill',movie,6.4,1446,"Crime,Drama,Horror",D.W. Griffith,1914-01-01,Thwarted by his despotic uncle from continuing...,the avenging conscience: or 'thou shalt not ki...
2,tt0004181,Judith of Bethulia,movie,6.2,1455,Drama,D.W. Griffith,1914-01-01,The story is from the Biblical Book of Judith....,judith of bethulia drama d.w. griffith the sto...
3,tt0004972,The Birth of a Nation,movie,6.1,26531,"Drama,History,War",D.W. Griffith,1915-01-01,The Birth of A Nation is a silent film from 19...,"the birth of a nation drama,history,war d.w. g..."
4,tt0006864,Intolerance: Love's Struggle Throughout the Ages,movie,7.7,16868,"Drama,History",D.W. Griffith,1916-01-01,"The story of a poor young woman, separated by ...",intolerance: love's struggle throughout the ag...


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29573 entries, 0 to 29572
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   TCONST             29573 non-null  object 
 1   ORIGINAL_TITLE     29573 non-null  object 
 2   TYPE               29573 non-null  object 
 3   AVG_RATING         29573 non-null  float64
 4   VOTE_COUNT         29573 non-null  int64  
 5   GENRES             29573 non-null  object 
 6   DIRECTORS          29573 non-null  object 
 7   YEAR               29573 non-null  object 
 8   OVERVIEW           29573 non-null  object 
 9   COMBINED_FEATURES  29573 non-null  object 
dtypes: float64(1), int64(1), object(8)
memory usage: 2.3+ MB


In [4]:
df['YEAR'] = pd.to_datetime(df['YEAR'])
df['YEAR'] = df['YEAR'].dt.year

In [5]:
pd.set_option('display.max_rows', None)
df['YEAR'].sort_values(ascending=False).head(5)

17010    2022
13144    2022
2986     2018
21688    2017
27546    2017
Name: YEAR, dtype: int64

In [6]:
#'Nostalji': (1906, 1979),'Retro': (1980, 1989), 'Mazi': (1990, 1999), 'Yeniler': (2000, 2009), 'Günümüz': (2010, 2022)

In [7]:
bins = [1905, 1979, 1989, 1999, 2009, 2022]
labels = ['Nostalji', 'Retro', 'Mazi', 'Yeniler', 'Günümüz']
df['CATEGORY'] = pd.cut(df['YEAR'], bins=bins, labels=labels, right=True)

In [8]:
df.head()

Unnamed: 0,TCONST,ORIGINAL_TITLE,TYPE,AVG_RATING,VOTE_COUNT,GENRES,DIRECTORS,YEAR,OVERVIEW,COMBINED_FEATURES,CATEGORY
0,tt0000574,The Story of the Kelly Gang,movie,6.0,916,"Action,Adventure,Biography",Charles Tait,1906,Just as Fritz Lang’s Metropolis (1927) is test...,"the story of the kelly gang action,adventure,b...",Nostalji
1,tt0003643,The Avenging Conscience: or 'Thou Shalt Not Kill',movie,6.4,1446,"Crime,Drama,Horror",D.W. Griffith,1914,Thwarted by his despotic uncle from continuing...,the avenging conscience: or 'thou shalt not ki...,Nostalji
2,tt0004181,Judith of Bethulia,movie,6.2,1455,Drama,D.W. Griffith,1914,The story is from the Biblical Book of Judith....,judith of bethulia drama d.w. griffith the sto...,Nostalji
3,tt0004972,The Birth of a Nation,movie,6.1,26531,"Drama,History,War",D.W. Griffith,1915,The Birth of A Nation is a silent film from 19...,"the birth of a nation drama,history,war d.w. g...",Nostalji
4,tt0006864,Intolerance: Love's Struggle Throughout the Ages,movie,7.7,16868,"Drama,History",D.W. Griffith,1916,"The story of a poor young woman, separated by ...",intolerance: love's struggle throughout the ag...,Nostalji


In [9]:
#1)Direk Sütun Bazlı Seçme.Seçtiğimiz kategoriye göre istediğimiz sayıda rastgele film getirme
def get_titles_by_category(df, category, num_titles):
    """
    Belirli bir kategoriye göre istenilen sayıda 'ORIGINAL_TITLE' döndürür.
    
    Args:
    df (pd.DataFrame): Verileri içeren DataFrame
    category (str): Filtrelemek istenilen kategori
    num_titles (int): Döndürülmek istenilen başlık sayısı
    
    Returns:
    list: Belirtilen kategoriye göre başlık listesi
    """
    # Kategoriye göre filtrele
    filtered_df = df[df['CATEGORY'] == category]
    
    # İlk 'num_titles' kadar 'ORIGINAL_TITLE' al
    titles = filtered_df['ORIGINAL_TITLE'].head(num_titles).tolist()
    
    return titles

# Fonksiyonu test edelim
category_to_filter = 'Nostalji'
number_of_titles = 3
titles = get_titles_by_category(df, category_to_filter, number_of_titles)
print(titles)

['The Story of the Kelly Gang', "The Avenging Conscience: or 'Thou Shalt Not Kill'", 'Judith of Bethulia']


In [10]:
##2) KATEGORİLERE GÖRE EN POPÜLER FİLMLERİ GETİRME (NOT:Tolganın en popüler filmleri getirme yani "recommend_most_popular" fonksiyonuna "CATEGORY" şartını ekledim)
def recommend_most_popular_per_category(df):
    # Create an empty list to store recommendations
    recommendations = []

    # Get all unique categories
    all_categories = df['CATEGORY'].dropna().unique()

    for category in all_categories:
        # Filter the dataset by the selected category
        category_filter = df['CATEGORY'] == category
        filtered_df = df[category_filter]

        if not filtered_df.empty:
            # Get the most popular movie for this category
            # Sort by VOTE_COUNT and AVG_RATING and select the top one
            most_popular = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).iloc[0] 
            recommendations.append(most_popular)

    # Create a DataFrame for the recommendations
    recommendations_df = pd.DataFrame(recommendations)

    # Ensure the DataFrame has the required columns
    if not recommendations_df.empty:
        # Select relevant columns to display, ensuring all columns exist
        columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
        recommendations_df = recommendations_df[[
            col for col in columns_to_display if col in recommendations_df.columns
        ]]

    return recommendations_df

# Example usage:
print(recommend_most_popular_per_category(df))

          TCONST                                  ORIGINAL_TITLE   TYPE  AVG_RATING  VOTE_COUNT  CATEGORY
7179   tt0068646                                   The Godfather  movie       9.200     2034349  Nostalji
6152   tt0080684  Star Wars: Episode V - The Empire Strikes Back  movie       8.700     1392829     Retro
12856  tt0111161                        The Shawshank Redemption  movie       9.300     2919274      Mazi
18927  tt0468569                                 The Dark Knight  movie       9.000     2900164   Yeniler
18931  tt1375666                                       Inception  movie       8.800     2576105   Günümüz


In [11]:
###3)KATEGORİLERE GÖRE EN POPÜLER 5 FİLMİ GETİRME (NOT:Most_Popüler Fonksiyonuna CATEGORY şartını ekledim)
def recommend_top_5_per_category(df):
    # Create an empty list to store recommendations
    recommendations = []

    # Get all unique categories
    all_categories = df['CATEGORY'].dropna().unique()

    for category in all_categories:
        # Filter the dataset by the selected category
        category_filter = df['CATEGORY'] == category
        filtered_df = df[category_filter]

        if not filtered_df.empty:
            # Get the top 5 movies for this category
            # Sort by VOTE_COUNT and AVG_RATING and select the top 5
            top_5 = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).head(5)
            recommendations.append(top_5)

    # Concatenate all the top 5 DataFrames into one DataFrame
    recommendations_df = pd.concat(recommendations)

    # Ensure the DataFrame has the required columns
    if not recommendations_df.empty:
        # Select relevant columns to display, ensuring all columns exist
        columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
        recommendations_df = recommendations_df[[
            col for col in columns_to_display if col in recommendations_df.columns
        ]]

    return recommendations_df

# Örnek kullanım:
print(recommend_top_5_per_category(df))

          TCONST                                     ORIGINAL_TITLE   TYPE  AVG_RATING  VOTE_COUNT  CATEGORY
7179   tt0068646                                      The Godfather  movie       9.200     2034349  Nostalji
7739   tt0076759                                          Star Wars  movie       8.600     1462432  Nostalji
7181   tt0071562                              The Godfather Part II  movie       9.000     1376877  Nostalji
7154   tt0073486                    One Flew Over the Cuckoo's Nest  movie       8.700     1080467  Nostalji
10888  tt0078748                                              Alien  movie       8.500      961342  Nostalji
6152   tt0080684     Star Wars: Episode V - The Empire Strikes Back  movie       8.700     1392829     Retro
9865   tt0088763                                 Back to the Future  movie       8.500     1321558     Retro
11359  tt0086190         Star Wars: Episode VI - Return of the Jedi  movie       8.300     1131104     Retro
4538   tt0081505   

In [12]:
####4)SEÇTİĞİMİZ KATEGORİYE GÖRE EN POPÜLER FİLMLERİ SEÇME (NOT:Most_Popüler Fonksiyonuna CATEGORY şartını ekledim)
def recommend_top_5_per_category(df, category):
    # Filter the dataset by the selected category
    category_filter = df['CATEGORY'] == category
    filtered_df = df[category_filter]

    if not filtered_df.empty:
        # Get the top 5 movies for this category
        # Sort by VOTE_COUNT and AVG_RATING and select the top 5
        top_5 = filtered_df.sort_values(by=['VOTE_COUNT', 'AVG_RATING'], ascending=False).head(5)

        # Create a DataFrame for the recommendations
        recommendations_df = top_5

        # Ensure the DataFrame has the required columns
        if not recommendations_df.empty:
            # Select relevant columns to display, ensuring all columns exist
            columns_to_display = ['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY']
            recommendations_df = recommendations_df[[
                col for col in columns_to_display if col in recommendations_df.columns
            ]]

        return recommendations_df
    else:
        return pd.DataFrame(columns=['TCONST', 'ORIGINAL_TITLE', 'TYPE', 'AVG_RATING', 'VOTE_COUNT', 'CATEGORY'])


# Example usage:
category = 'Retro'
print(recommend_top_5_per_category(df, category))

          TCONST                                  ORIGINAL_TITLE   TYPE  AVG_RATING  VOTE_COUNT CATEGORY
6152   tt0080684  Star Wars: Episode V - The Empire Strikes Back  movie       8.700     1392829    Retro
9865   tt0088763                              Back to the Future  movie       8.500     1321558    Retro
11359  tt0086190      Star Wars: Episode VI - Return of the Jedi  movie       8.300     1131104    Retro
4538   tt0081505                                     The Shining  movie       8.400     1118289    Retro
6280   tt0082971                         Raiders of the Lost Ark  movie       8.400     1044159    Retro
