In [2]:
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import linear_kernel
from tabulate import tabulate 

In [3]:
meta = pd.read_csv('movies_metadata.csv', low_memory=False)
keywords = pd.read_csv('keywords.csv')
credits = pd.read_csv('credits.csv')

In [4]:
meta['id'] = pd.to_numeric(meta['id'], errors='coerce').fillna(0).astype(int)
keywords['id'] = keywords['id'].astype(int)
credits['id'] = credits['id'].astype(int)

In [5]:
df = meta.merge(keywords, on='id').merge(credits, on='id')

In [6]:
df = df.sort_values('popularity', ascending=False).head(5000).reset_index(drop=True)

In [7]:
def get_list(x):
    try:
        if isinstance(x, str):
            names = [i['name'] for i in ast.literal_eval(x)]
            return names[:3]
    except: return []
    return []

df['genres_list'] = df['genres'].apply(get_list)
df['cast_list'] = df['cast'].apply(get_list)
df['genres_display'] = df['genres_list'].apply(lambda x: ', '.join(x) if x else 'Other')


In [8]:
def create_soup(x):
    gen = ' '.join([i.lower().replace(" ","") for i in x['genres_list']])
    cast = ' '.join([i.lower().replace(" ","") for i in x['cast_list']])
    desc = str(x['overview']).lower()
    return f"{gen} {cast} {desc}"

df['metadata_soup'] = df.apply(create_soup, axis=1)

In [9]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['metadata_soup'])

kmeans = KMeans(n_clusters=30, random_state=42, n_init=10)
df['cluster_id'] = kmeans.fit_predict(tfidf_matrix)


In [13]:
clusters = df.groupby('cluster_id')['title'].apply(lambda x: ', '.join(x.head(5))).reset_index()
clusters['count'] = df.groupby('cluster_id')['title'].count().values
clusters.columns = ['ID кластера', 'Примеры фильмов', 'Всего фильмов']
print(tabulate(clusters[['ID кластера', 'Всего фильмов', 'Примеры фильмов']], 
               headers='keys', tablefmt='grid', showindex=False))


+---------------+-----------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|   ID кластера |   Всего фильмов | Примеры фильмов                                                                                                                                                                                                                 |
|             0 |             117 | Me, Myself & Irene, Analyze That, The Aviator, The X Files: I Want to Believe, Kramer vs. Kramer                                                                                                                                |
+---------------+-----------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [18]:
def recommend_me(movie_name):
    search = movie_name.lower().strip()
    df['title_lower'] = df['title'].str.lower()
    
    if search not in df['title_lower'].values:
        suggestion = df[df['title_lower'].str.contains(m_search, na=False)]['title'].head(3).tolist()
        return f"Фильм '{movie_name}' не найден. Возможно, вы имели в виду: {suggestion}"
    
    movie_row = df[df['title_lower'] == search].iloc[0]
    cluster = movie_row['cluster_id']
    
    recommendations = df[df['cluster_id'] == cluster][['title', 'genres_display']].head(10)
    recommendations = recommendations[recommendations['title'].str.lower() != search].head(6)
    
    recommendations.columns = ['Название фильма', 'Жанры']
    print(f"Фильм: {movie_row['title']} (Кластер №{cluster})")
    print(f"Похожие фильмы :")
    
    return tabulate(recommendations, headers='keys', tablefmt='grid', showindex=False)

print(recommend_me('Toy Story That Time Forgot'))


Фильм: Toy Story That Time Forgot (Кластер №7)
Похожие фильмы :
+------------------------+------------------------------+
| Название фильма        | Жанры                        |
| Dinosaur               | Animation, Family            |
+------------------------+------------------------------+
| Shaun the Sheep Movie  | Family, Animation, Comedy    |
+------------------------+------------------------------+
| The Summer I Turned 15 | Drama                        |
+------------------------+------------------------------+
| The Substitute         | Thriller, Comedy             |
+------------------------+------------------------------+
| Flushed Away           | Adventure, Animation, Comedy |
+------------------------+------------------------------+
| Small Fry              | Animation, Family            |
+------------------------+------------------------------+
