In [52]:
import pandas as pd
import numpy as np
from datetime import date
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler,RobustScaler
import random

In [2]:
interactions = pd.read_parquet('interactions.parquet',engine='pyarrow')
movies_md = pd.read_parquet('movies_metdata.parquet',engine='pyarrow')
movies_rd = pd.read_excel('movies_rd.xlsx')

In [3]:
def preprocess_movies_rd(movies_rd):
    movies_rd['rating'] = [str(x) for x in movies_rd['rating'] .values]
    movies_rd['rating'] = [x.replace('dict_values([','') for x in movies_rd['rating'].values]
    movies_rd['rating'] = [x.replace('])','') for x in movies_rd['rating'].values]
    movies_rd['rating'] = [float(x) for x in movies_rd['rating'] .values]
    movies_rd['description'] = [str(x) for x in movies_rd['description'] .values]
    movies_rd = movies_rd.drop('Unnamed: 0', axis=1)
    movies_rd.columns = ['rating', 'description', 'title']
    return movies_rd

In [4]:
movies_rd = preprocess_movies_rd(movies_rd)

In [5]:
movies_rd.head(1)

Unnamed: 0,rating,description,title
0,6.664,"Мама, папа и… пятеро щенят. У Барбоскиных все,...",Барбоскины


In [6]:
def get_int_coef(interactions):    
    interactions['count'] = interactions['count']= [1 for x in interactions['user_id']]
    int_coef = interactions.groupby(['user_id']).sum()
    int_coef['interactions_coefficient'] = np.clip(int_coef['count'],1,20)
    int_coef['interactions_coefficient'] = np.log(int_coef['interactions_coefficient'])
    int_coef = int_coef.drop('count',axis=1)
    int_coef = int_coef[['interactions_coefficient','day']]
    int_coef = int_coef.drop('day',axis=1)
    scaler = MinMaxScaler()
    int_coef['interactions_coefficient'] = scaler.fit_transform(np.array(int_coef['interactions_coefficient']).reshape(-1,1))
    return int_coef

In [7]:
int_coef = get_int_coef(interactions)

In [8]:
interactions = interactions.merge(pd.DataFrame(int_coef),how='left',on='user_id')
interactions

Unnamed: 0,year,month,day,user_id,movie_id,watch_duration_minutes,count,interactions_coefficient
0,2022,10,1,58073,a6889772-f1f4-45bc-9663-85b46fc8499d,4.800000,1,0.000000
1,2022,10,1,63698,c829f262-ddf1-46b7-a896-a7efc205028c,15.000000,1,0.231378
2,2022,10,1,66655,91f9d892-a508-4962-91e9-abacd93e0830,51.916668,1,0.768622
3,2022,10,1,66655,58b805e1-a65c-49f4-b302-865cacaaed8a,22.316668,1,0.768622
4,2022,10,1,67981,dbc44c07-46e2-4fb0-b57c-8b5172421683,117.000000,1,0.231378
...,...,...,...,...,...,...,...,...
1799995,2022,12,30,211250020,35251c71-6198-4016-9135-6e30746667bd,20.366667,1,0.733452
1799996,2022,12,30,211250020,70503f6b-b4a3-4241-9175-f030537083cd,85.166664,1,0.733452
1799997,2022,12,30,211250020,b2d334f4-fd62-4acf-a4d7-56716dfe6cea,9.666667,1,0.733452
1799998,2022,12,30,211250174,9db5e3ea-2b9c-4258-8288-79622e87626a,22.516666,1,1.000000


In [9]:
def get_popularity(interactions):
    interactions['count']= [1 for x in interactions['movie_id']]
    popularity = interactions.groupby(['movie_id']).sum()
    popularity['num_users_watched'] = popularity['count']
    popularity = popularity.drop('count',axis=1)
    popularity = popularity[['watch_duration_minutes','num_users_watched']]
    return popularity

In [10]:
def get_avg(interactions):
    avg = interactions.groupby(['movie_id']).mean()
    avg = avg[['watch_duration_minutes','day']]
    avg['avg_watch_coef'] = avg['watch_duration_minutes']
    avg = avg.drop(['watch_duration_minutes','day'],axis=1)
    return avg

In [11]:
popularity = get_popularity(interactions)

In [12]:
avg = get_avg(interactions)

In [13]:
def get_movies_upd(movies_md, popularity, avg):
    movies_md = movies_md.merge(popularity, how='right', on= 'movie_id')
    movies_md['time_watched'] = np.clip(movies_md['watch_duration_minutes']/movies_md['duration'],0,1)
    movies_md = movies_md.merge(avg, how='right', on= 'movie_id')
    return movies_md

In [14]:
movies_upd = get_movies_upd(movies_md, popularity, avg)

In [15]:
columns_list = ['genres','actors','director','country']
def convert_columns(df,columns_list):
    for column in columns_list:
        df[column] = [str(x) for x in df[column].values]
        df[column] = [x.replace('"','') for x in df[column].values]
        df[column] = [x.replace('[','') for x in df[column].values]
        df[column] = [x.replace(']','') for x in df[column].values]
        df[column] = [x.replace(',',' ') for x in df[column].values]
    return df

In [16]:
movies_upd.head(2)

Unnamed: 0,movie_id,title,entity_type,genres,actors,director,country,release_world,age_rating,duration,watch_duration_minutes,num_users_watched,time_watched,avg_watch_coef
0,0001694e-cdb6-43d8-bd83-516b6cbb85b2,Everything counts — Depeche Mode,Фильм,"[""Караоке""]","[""Караоке""]","[""Караоке""]","[""Россия""]",2020-12-01,16.0,5.0,0.133333,1,0.026667,0.133333
1,000d6913-68a1-4135-9bad-9bcd71708b2e,Против ночи,Фильм,"[""Детективы"",""Ужасы"",""Триллеры""]","[""Джош Кан"",""Ли Холлеран"",""Ханна Климан"",""Эрик...","[""Брайан Кавалларо""]","[""США""]",2017-09-15,16.0,83.0,379.616666,7,1.0,54.230952


In [17]:
movies_rd.head(2)

Unnamed: 0,rating,description,title
0,6.664,"Мама, папа и… пятеро щенят. У Барбоскиных все,...",Барбоскины
1,6.383,"Эми — амбициозная девушка, всегда мечтавшая о ...",Эскортницы


In [18]:
movies_upd = convert_columns(movies_upd,columns_list)
movies_upd = movies_upd.join(movies_rd.set_index('title'), how='left', on='title')

In [19]:
movies_upd.head(2)

Unnamed: 0,movie_id,title,entity_type,genres,actors,director,country,release_world,age_rating,duration,watch_duration_minutes,num_users_watched,time_watched,avg_watch_coef,rating,description
0,0001694e-cdb6-43d8-bd83-516b6cbb85b2,Everything counts — Depeche Mode,Фильм,Караоке,Караоке,Караоке,Россия,2020-12-01,16.0,5.0,0.133333,1,0.026667,0.133333,-1.0,
1,000d6913-68a1-4135-9bad-9bcd71708b2e,Против ночи,Фильм,Детективы Ужасы Триллеры,Джош Кан Ли Холлеран Ханна Климан Эрик Кохенбе...,Брайан Кавалларо,США,2017-09-15,16.0,83.0,379.616666,7,1.0,54.230952,4.258,Компания друзей отправляется искать призраков ...


In [20]:
def get_desc(movies_upd):
    movies_upd['desc_md'] = movies_upd['title'] + ' ' + movies_upd['entity_type'] + ' ' + movies_upd['genres'] + ' ' + movies_upd['director']
    movies_upd = movies_upd.dropna(subset=['desc_md'])
    return movies_upd

In [21]:
movies_upd = get_desc(movies_upd)

In [22]:
movies_upd.head(2)

Unnamed: 0,movie_id,title,entity_type,genres,actors,director,country,release_world,age_rating,duration,watch_duration_minutes,num_users_watched,time_watched,avg_watch_coef,rating,description,desc_md
0,0001694e-cdb6-43d8-bd83-516b6cbb85b2,Everything counts — Depeche Mode,Фильм,Караоке,Караоке,Караоке,Россия,2020-12-01,16.0,5.0,0.133333,1,0.026667,0.133333,-1.0,,Everything counts — Depeche Mode Фильм Караоке...
1,000d6913-68a1-4135-9bad-9bcd71708b2e,Против ночи,Фильм,Детективы Ужасы Триллеры,Джош Кан Ли Холлеран Ханна Климан Эрик Кохенбе...,Брайан Кавалларо,США,2017-09-15,16.0,83.0,379.616666,7,1.0,54.230952,4.258,Компания друзей отправляется искать призраков ...,Против ночи Фильм Детективы Ужасы Триллеры Бра...


In [23]:
def get_cosine_mat(df,desc):
    tfidf = TfidfVectorizer(min_df=5)
    tfidf_matrix = tfidf.fit_transform(df[desc])
    cos_sim = cosine_similarity(tfidf_matrix)
    return cos_sim

In [24]:
cos_sim_md = get_cosine_mat(movies_upd,'desc_md')

In [25]:
cos_sim_desc = get_cosine_mat(movies_upd,'description')

In [26]:
movies_upd.columns

Index(['movie_id', 'title', 'entity_type', 'genres', 'actors', 'director',
       'country', 'release_world', 'age_rating', 'duration',
       'watch_duration_minutes', 'num_users_watched', 'time_watched',
       'avg_watch_coef', 'rating', 'description', 'desc_md'],
      dtype='object')

In [27]:
def get_score(df):
    df['rating'] = df['rating'].replace(-1,0)
    df['time_watched'] = df['time_watched'].replace(np.nan,0)
    scaler = MinMaxScaler()
    df['time_watched'] = scaler.fit_transform(np.array(df['time_watched']).reshape(-1,1))
    df['num_users_watched'] = scaler.fit_transform(np.array(df['num_users_watched']).reshape(-1,1))
    df['avg_watch_coef'] = scaler.fit_transform(np.array(df['avg_watch_coef']).reshape(-1,1))
    df['rating'] = scaler.fit_transform(np.array(df['rating']).reshape(-1,1))
    df['score'] = df['time_watched']*0.1 + df['num_users_watched']*0.4+df['avg_watch_coef']*0.1 + df['rating']*0.4
    #df['score'] = np.clip(df['score'],0,100)
    df['score'] = scaler.fit_transform(np.array(df['score']).reshape(-1,1))
    return df

In [28]:
movies_upd = get_score(movies_upd)

In [29]:
movies_upd['score'].describe()

count    18565.000000
mean         0.400925
std          0.222644
min          0.000000
25%          0.148395
50%          0.495758
75%          0.578383
max          1.000000
Name: score, dtype: float64

In [30]:
for row in movies_md:
    print(row)

movie_id
title
entity_type
genres
actors
director
country
release_world
age_rating
duration


In [31]:
def get_dur(movies_md,interactions):
    interactions = interactions.merge(movies_md,how='left',on='movie_id')
    interactions['watched'] = interactions['watch_duration_minutes_x']/interactions['duration']
    interactions['watched'] = np.clip(interactions['watched'],0,1)
    interactions = interactions.replace(np.nan,interactions['watched'].mean())
    interactions =  interactions[['year','month','day','user_id','movie_id','watch_duration_minutes_x','watched']]
    interactions.rename({'watch_duration_minutes_x':'watch_duration_minutes'})
    return interactions
    

In [32]:
interactions = get_dur(movies_upd,interactions)

In [33]:
interactions

Unnamed: 0,year,month,day,user_id,movie_id,watch_duration_minutes_x,watched
0,2022,10,1,58073,a6889772-f1f4-45bc-9663-85b46fc8499d,4.800000,0.522748
1,2022,10,1,63698,c829f262-ddf1-46b7-a896-a7efc205028c,15.000000,0.104167
2,2022,10,1,66655,91f9d892-a508-4962-91e9-abacd93e0830,51.916668,0.522748
3,2022,10,1,66655,58b805e1-a65c-49f4-b302-865cacaaed8a,22.316668,0.522748
4,2022,10,1,67981,dbc44c07-46e2-4fb0-b57c-8b5172421683,117.000000,1.000000
...,...,...,...,...,...,...,...
1799995,2022,12,30,211250020,35251c71-6198-4016-9135-6e30746667bd,20.366667,0.522748
1799996,2022,12,30,211250020,70503f6b-b4a3-4241-9175-f030537083cd,85.166664,0.906028
1799997,2022,12,30,211250020,b2d334f4-fd62-4acf-a4d7-56716dfe6cea,9.666667,0.095710
1799998,2022,12,30,211250174,9db5e3ea-2b9c-4258-8288-79622e87626a,22.516666,0.147168


In [34]:
def get_highest_value(row):
    if row['similarity_md'] - row['similarity_desc'] <0.2:
        return max(row['similarity_md'], row['similarity_desc'])
    elif row['similarity_md'] ==0:
        return row['similarity_desc']
    else:
        return row['similarity_desc']
        
def get_lowest_value(row):
    return min(row['similarity_md'], row['similarity_desc'])
def get_mean_value(row):
    return (row['similarity_md'] + row['similarity_desc'])/2
def get_idk_value(row):
    if row['similarity_md'] !=0 and row['similarity_desc']!=0:
        print(0)
        return row['similarity_md'] * row['similarity_desc']
    elif row['similarity_md'] == 0 and row['similarity_desc']!=0:
        print(1)
        return row['similarity_desc']
    elif row['similarity_desc'] ==0 and row['similarity_md']!=0:
        print(3)
        return row['similarity_md']

In [35]:
def predict(df,title,userid, similarity_weight=0.7, top_n=10):
    data = df.reset_index()
    index_movie = data[data['movie_id'] == title].index
    similarity_md = cos_sim_md[index_movie].T
    similarity_desc = cos_sim_desc[index_movie].T
    dur_coef = float(interactions.loc[(interactions['movie_id'] == title)&(interactions['user_id'] == userid)]['watched'])
    nan = float(np.nan)
    if dur_coef > 0 :
        dur = dur_coef
    else: 
        dur = 0.5
    sim_df_md = pd.DataFrame(similarity_md, columns=['similarity_md'])
    sim_df_desc = pd.DataFrame(similarity_desc, columns=['similarity_desc'])
    final_df = pd.concat([data, sim_df_md,sim_df_desc], axis=1)
    final_df['similarity'] = final_df.apply(get_highest_value,axis=1) 
    final_df['final_score'] = (final_df['score']*(1-similarity_weight) +   final_df['similarity'] *similarity_weight)*dur
    
    final_df_sorted = final_df.sort_values(by='final_score', ascending=False).head(top_n)
    final_df_sorted = final_df_sorted.loc[final_df_sorted['similarity'] < 1]
    return final_df_sorted[['title','score','similarity', 'similarity_md','similarity_desc','final_score']]

In [36]:
predict(movies_upd,'a6889772-f1f4-45bc-9663-85b46fc8499d',238476537, top_n=10)

Unnamed: 0,title,score,similarity,similarity_md,similarity_desc,final_score
10495,1703,1.0,0.082103,0.082103,0.0,0.186868
15545,Барбоскины Team,0.866987,0.093853,0.093853,0.035489,0.170308
3509,Дошколята,0.507771,0.238737,0.238737,0.048936,0.166991
13094,Билал,0.601725,0.198135,0.198135,0.017566,0.166868
1636,Грибок,0.621861,0.175655,0.175655,0.0,0.161799
10732,ДжиФайтерс. Город супергероев,0.549176,0.197266,0.197266,0.005313,0.158309
5426,Бемби 2,0.631528,0.159377,0.159377,0.007322,0.157359
3862,Гравити Фолз,0.586893,0.178332,0.178332,0.0,0.157295


In [37]:
def get_movies_list(userID,df):
    ml = df.loc[df['user_id']==userID]['movie_id'].to_list()
    return ml

In [38]:
print(len(set(get_movies_list(238476537,interactions))))

51


In [39]:
movies_upd.head(1)

Unnamed: 0,movie_id,title,entity_type,genres,actors,director,country,release_world,age_rating,duration,watch_duration_minutes,num_users_watched,time_watched,avg_watch_coef,rating,description,desc_md,score
0,0001694e-cdb6-43d8-bd83-516b6cbb85b2,Everything counts — Depeche Mode,Фильм,Караоке,Караоке,Караоке,Россия,2020-12-01,16.0,5.0,0.133333,0.0,0.026667,0.000236,0.0,,Everything counts — Depeche Mode Фильм Караоке...,0.003816


In [72]:
def get_user_pred(user_id, interactions):
    coef = float(int_coef.loc[int_coef.index == user_id]['interactions_coefficient'].values)
    print(coef)
    z=pd.DataFrame()
    ml = set(get_movies_list(user_id, interactions))
    for i in ml:
        try:
            title = movies_upd.loc[movies_upd['movie_id']== i]['title']
            df = predict(movies_upd,i,user_id,similarity_weight=coef)
            df['based_on'] = str(title)
            z = pd.concat([z,df])
            z = z.loc[z['similarity']<1]
            z = z.drop_duplicates(subset=['title'])
            z = z.sort_values('final_score',ascending=False)
            z = z.sort_values('final_score',ascending=False)
        except:
            z = z
    return z

In [73]:
a = get_user_pred(238476537, interactions)
a

1.0


Unnamed: 0,title,score,similarity,similarity_md,similarity_desc,final_score,based_on
1323,Ёлки 5,0.467609,0.323242,0.323242,0.140796,0.314022,"16495 Ёлки 3\nName: title, dtype: object"
6601,"Первая встреча, последняя встреча",0.580041,0.298005,0.025116,0.298005,0.294678,"11844 Подельники\nName: title, dtype: object"
1933,Домовой,0.592010,0.341965,0.341965,0.150517,0.289634,"17207 Финник\nName: title, dtype: object"
11892,Музыкальная история,0.601889,0.261630,0.007505,0.261630,0.258709,"11844 Подельники\nName: title, dtype: object"
13444,Спирит Непокорный,0.577854,0.246027,0.246027,0.066672,0.233067,"6599 Айнбо. Сердце Амазонии\nName: title, d..."
...,...,...,...,...,...,...,...
7937,В постели с Викторией,0.509870,0.224004,0.224004,0.030389,0.000226,"12176 Счастье в конверте\nName: title, dtyp..."
4036,Кощей. Похититель невест,0.693836,0.222007,0.222007,0.035663,0.000224,"12176 Счастье в конверте\nName: title, dtyp..."
9312,История Золушки,0.578707,0.217769,0.217769,0.047606,0.000220,"12176 Счастье в конверте\nName: title, dtyp..."
3437,Серебряные коньки,0.653811,0.217404,0.217404,0.026357,0.000220,"12176 Счастье в конверте\nName: title, dtyp..."


In [44]:
get_movies_list(238476537,interactions)

['a6889772-f1f4-45bc-9663-85b46fc8499d',
 '16843f1b-7b21-4443-9a52-2409295205ff',
 'af99d6a2-21ee-4281-82d3-472abbf92de7',
 '3330698d-80cb-4506-b976-5b4253427c30',
 'c0d3b165-9d24-4b73-ad03-9673b0f4718d',
 'd364dea2-4a3b-4499-b619-1f58897e4e99',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 '5b54d128-feb5-4ae6-bd32-44795b43f16c',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 '67341b61-f875-4be5-988f-42fa0968e3cb',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 '29ed8083-b33b-45bb-8f88-9df070ced049',
 'd177bdf8-642e-4325-80ac-750881a4535f',
 '537ece24-2358-4108-a419-85cfd5952607',
 'ba46e96f-ee9b-4392-887a-753460ccca07',
 'ba46e96f-ee9b-4392-887a-753460ccca07',
 '42f5cb31-f333-426c-a95b-8e106741de38',
 '3c8f2117-3853-4682-b024-814a15de1b69',
 '8c0e0114-9e33-476b-b17b-b8eab7d24c38',
 '59e15ed8-219d-4b11-800f-0b92b056c108',
 'd722f09d-a99d-4c7f-853c-0c77fcd45f35',
 '0eb840a2-e900-

In [45]:
movies_upd.loc[movies_upd['movie_id']== 'a6889772-f1f4-45bc-9663-85b46fc8499d']['title']

12207    Барбоскины
Name: title, dtype: object

In [46]:
a = a.drop_duplicates(subset=['title'])

In [47]:
a.final_score.describe()

count    214.000000
mean       0.117497
std        0.089704
min        0.000219
25%        0.018160
50%        0.114564
75%        0.203296
max        0.314022
Name: final_score, dtype: float64

In [48]:
a

Unnamed: 0,title,score,similarity,similarity_md,similarity_desc,final_score,based_on
1323,Ёлки 5,0.467609,0.323242,0.323242,0.140796,0.314022,"16495 Ёлки 3\nName: title, dtype: object"
6601,"Первая встреча, последняя встреча",0.580041,0.298005,0.025116,0.298005,0.294678,"11844 Подельники\nName: title, dtype: object"
1933,Домовой,0.592010,0.341965,0.341965,0.150517,0.289634,"17207 Финник\nName: title, dtype: object"
11892,Музыкальная история,0.601889,0.261630,0.007505,0.261630,0.258709,"11844 Подельники\nName: title, dtype: object"
2725,Ёлки Последние,0.525703,0.259815,0.259815,0.147274,0.252405,"16495 Ёлки 3\nName: title, dtype: object"
...,...,...,...,...,...,...,...
7937,В постели с Викторией,0.509870,0.224004,0.224004,0.030389,0.000226,"12176 Счастье в конверте\nName: title, dtyp..."
4036,Кощей. Похититель невест,0.693836,0.222007,0.222007,0.035663,0.000224,"12176 Счастье в конверте\nName: title, dtyp..."
9312,История Золушки,0.578707,0.217769,0.217769,0.047606,0.000220,"12176 Счастье в конверте\nName: title, dtyp..."
3437,Серебряные коньки,0.653811,0.217404,0.217404,0.026357,0.000220,"12176 Счастье в конверте\nName: title, dtyp..."


In [49]:
def calculate_precision_at_k(actual, predicted, k):
    if k == 0:
        return 0
    
    predicted_k = predicted[:k]
    num_relevant = np.isin(predicted_k, actual).sum()
    precision = num_relevant / k
    return precision


def calculate_average_precision(actual, predicted, k):
    if len(actual) == 0:
        return 0
    
    precision_values = []
    num_relevant = 0
    
    for i, item in enumerate(predicted[:k]):
        if item in actual:
            num_relevant += 1
            precision_values.append(num_relevant / (i + 1))
    
    if len(precision_values) == 0:
        return 0
    
    average_precision = np.mean(precision_values)
    return average_precision


def calculate_map_at_k(test_data, recommendations, k):
    map_values = []
    
    for user_id in test_data['user_id'].unique():
        user_actual = test_data[test_data['user_id'] == user_id]['movie_id'].values
        user_predicted = recommendations.get(user_id, [])
        average_precision = calculate_average_precision(user_actual, user_predicted, k)
        map_values.append(average_precision)
    
    map_at_k = np.mean(map_values)
    return map_at_k

In [50]:
interactions['date'] = pd.to_datetime(interactions[['year', 'month', 'day']])

In [51]:
split_date = '2022-12-01'

train_data = interactions[interactions['date'] < split_date]
test_data = interactions[interactions['date'] >= split_date]

In [62]:
b = test_data.groupby('user_id')['movie_id'].count().sort_values(ascending=False).reset_index()
b = b[b['movie_id'] >= 15]
b_calc = random.sample(b.user_id.to_list(), 15)
b_calc

[202926595,
 1283435,
 228049131,
 202896229,
 75753305,
 1268893,
 228056759,
 228082349,
 1103713,
 72119152,
 1633292,
 23464672,
 75757204,
 211296390,
 228112155]

In [74]:
reccos_dict = {}

for i in b_calc:
    try:
        recs = get_user_pred(i, train_data).title[:10].to_list()
        reccos_dict[i] = recs
    except AttributeError:
        pass

1.0
1.0
1.0
1.0
1.0
0.945749848565416
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0


In [81]:
test_data = test_data.join(movies_md[['movie_id', 'title']].set_index('movie_id'), how='left', on='movie_id')

In [83]:
test_map = test_data[test_data['user_id'].isin(b_calc) == True]
user_movie_dict = test_map.groupby('user_id')['title'].apply(list).to_dict()

In [85]:
map_at_k = calculate_map_at_k(test_data, reccos_dict, k=10)

In [87]:
print("MAP@{}: {:.4f}".format(10, map_at_k))

MAP@10: 0.0000


In [None]:
66655
10
movies_md_upd_link = 'data/movies_md_upd.parquet'
interactions_upd_link = 'data/interactions_upd.parquet'
int_coef_link = 'data/int_coef.parquet'
cos_sim_md_link = 'matrix/cos_sim_md.pkl'
cos_sim_desc_link = 'matrix/cos_sim_desc.pkl'

docker run -p 4000:80 -e P1=66655 -e P2=10 -e P3='data/interactions_upd.parquet' -e P4='data/movies_md_upd.parquet' -e P5='data/int_coef.parquet' -e P6='matrix/cos_sim_md.pkl' -e P7='matrix/cos_sim_desc.pkl' your-image-name