In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from itertools import islice, cycle, product

import warnings
warnings.filterwarnings('ignore')

In [2]:
interactions = pd.read_parquet('interactions.parquet',engine='pyarrow')
movies_md = pd.read_parquet('movies_metdata.parquet',engine='pyarrow')

In [3]:
interactions_filtered = interactions.loc[interactions['movie_id'].isin(movies_md['movie_id'])]
print(interactions.shape, interactions_filtered.shape)

(1800000, 6) (1782240, 6)


In [4]:
users = interactions[['user_id']].drop_duplicates().reset_index(drop = True)

In [5]:
item_name_mapper = dict(zip(movies_md['movie_id'], movies_md['title']))

In [6]:
duration = movies_md[['movie_id','duration']]

In [7]:
interactions = pd.merge(interactions_filtered,duration,how='left',on='movie_id')

In [8]:
interactions = interactions.dropna()

In [9]:
interactions['watch_percentage'] = interactions['watch_duration_minutes']/interactions['duration']

In [10]:
def compute_popularity(df: pd.DataFrame, item_id: str, max_candidates: int):
    """
    calculates mean rating to define popular titles
    """
    popular_titles = df.groupby(item_id).agg({'watch_percentage': sum})\
                     .sort_values(['watch_percentage'], ascending=False).head(max_candidates).index.values

    return popular_titles

In [11]:
ITEM_COLUMN = 'movie_id'
USER_COLUMN = 'user_id'
base_recommendations = compute_popularity(interactions,ITEM_COLUMN,20)

In [12]:
known_items = interactions_filtered.groupby(USER_COLUMN)[ITEM_COLUMN].apply(list).to_dict

In [13]:
def fit(
    data: pd.DataFrame,
    item_col: str,
    max_candidates: int = 20
    ):
    
    recommendations = compute_popularity(data, item_col, max_candidates)

    return recommendations

In [14]:
recommendations = fit(interactions,ITEM_COLUMN,20)

In [15]:
def recommend(
    users: pd.DataFrame,
    recommendations: pd.DataFrame
    ):
    
    output = users.copy(deep = True)
    recs = list(islice(cycle([recommendations]), len(users['user_id'])))
    output['rekkos'] = recs

    return output

In [16]:
recommend(users,recommendations)

Unnamed: 0,user_id,rekkos
0,58073,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
1,63698,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
2,66655,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
3,67981,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
4,68523,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
...,...,...
127055,219430078,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
127056,219433995,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
127057,239015456,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."
127058,211248863,"[e088637d-967f-478b-8158-98b90aded146, 42f5cb3..."


In [17]:
for i in base_recommendations:
    print(item_name_mapper[i])

Сердце Пармы
Вышка
Барбоскины Team
Три тысячи лет желаний
Гринч
Три кота и море приключений
Босс-молокосос
Один дома
Либерея: Охотники за сокровищами
Пес-самурай и город кошек
Щенячий патруль в кино
Начать сначала
Красная Шапочка
Ирония судьбы в Голливуде
Зверопой 2
Любовники
Холодное сердце
Холодное сердце 2
Гадкий я
Один дома 2: Затерянный в Нью-Йорке
