In [67]:
from __future__ import annotations

import pandas as pd
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [83]:
from typing import Union

from data_import import load_data
from abc import ABC, abstractmethod
import pandas as pd

In [69]:
# import data
df_user, df_movie = load_data()

Data cleaning

In [70]:
df_movie = df_movie[df_movie['Avg_rating']<=10]

Feature engineering

In [71]:
# percentage difference between user rating and average rating (user-avg)/avg

df_user_temp = df_user.merge(df_movie.groupby(['Title', 'Year'])['Avg_rating'].max().reset_index(), on=['Title','Year'], how='left')

df_user['Avg_user_rating_diff'] = (df_user_temp['Rating'] - df_user_temp['Avg_rating'])/df_user_temp['Avg_rating']
df_user

Unnamed: 0,User,Title,Year,Rating,Avg_user_rating_diff
0,79villemo,Cząstki kobiety,2020,7.0,-0.014085
1,79villemo,Palmy w śniegu,2015,7.0,-0.078947
2,79villemo,Małe kobietki,2019,8.0,0.081081
3,79villemo,Kwiat pustyni,2009,7.0,-0.090909
4,79villemo,To,2017,6.0,-0.104478
...,...,...,...,...,...
37114,_paranoid_,Nie!,2022,7.0,0.147541
37115,_paranoid_,Titane,2021,,
37116,_paranoid_,W głowie kota,2022,7.0,0.093750
37117,_paranoid_,Lekcje języka,2021,7.0,0.029412


Preparing final users to validate. Us.

In [72]:
val_user_list = ['sokoly35', 'honorciak', 'piotrr99']
df_user_validation = df_user[df_user['User'].isin(val_user_list)]

# exclude us from df_user
df_user = df_user[~df_user.index.isin(df_user_validation.index)]

In [73]:
df_user_validation

Unnamed: 0,User,Title,Year,Rating,Avg_user_rating_diff
9900,honorciak,Skazani na Shawshank,1994,9.0,0.022727
9901,honorciak,Zielona mila,1999,10.0,0.162791
9902,honorciak,Anna,2019,6.0,-0.076923
9903,honorciak,Gwiezdne wojny: Część III - Zemsta Sithów,2005,3.0,-0.605263
9904,honorciak,Cztery wesela i pogrzeb,1994,7.0,0.014493
...,...,...,...,...,...
32486,sokoly35,Teoria wszystkiego,2014,8.0,0.052632
32487,sokoly35,Iron Man,2008,7.0,-0.078947
32488,sokoly35,Iluzja,2013,7.0,-0.054054
32489,sokoly35,Zjawa,2015,5.0,-0.305556


# 0. model

In [81]:



class BaseRecommender(ABC):
    """ Base abstract class for recommendation techniques."""
    def train(self, df_user: Union[pd.DataFrame, None], df_movie: pd.DataFrame, **kwargs):
        raise NotImplementedError
    def fit(self, df_user: pd.DataFrame, num_of_recomendations: int, **kwargs):
        raise NotImplementedError

# 1. User cold start
* recommendation based on popularity and overall rating of the movie for users with not many rated movies
* we have no data about the user (age, gender, ...) so we don't use it as a feature in collaborative filtering.

In [97]:
class UCSRecommender(BaseRecommender):
    """ user cold start recommender.
    Take n_top_movies and recommend them in order based on Avg_rating."""
    movie = None
    recommendation_table = None

    def train(self, df_user: Union[pd.DataFrame, None], df_movie: pd.DataFrame, n_top_movies: int = 20, **kwargs):
        self.movie = df_movie
        self.recommendation_table = df_movie.groupby(['Title', 'Year'])[['Avg_rating', 'Number_of_ratings']].min().reset_index().sort_values('Number_of_ratings', ascending=False).head(n_top_movies).sort_values('Avg_rating', ascending=False)

    def fit(self, df_user: pd.DataFrame, num_of_recomendations: int, **kwargs):
        recommendations = pd.DataFrame(columns=['User', 'Title', 'Year'])
        for user in df_user['User'].unique():
            recommendations_for_user = self.recommendation_table.head(num_of_recomendations)[['Title', 'Year']]
            recommendations_for_user['User'] = user
            recommendations = pd.concat([recommendations, recommendations_for_user])
        return recommendations

In [98]:
# example of usage
ucs_recommender = UCSRecommender()
ucs_recommender.train(df_user, df_movie)
ucs_recommender.fit(df_user=df_user_validation, num_of_recomendations=10)

Unnamed: 0,User,Title,Year
5744,honorciak,Skazani na Shawshank,1994
7433,honorciak,Zielona mila,1999
4163,honorciak,Nietykalni,2011
1867,honorciak,Forrest Gump,1994
5264,honorciak,Pulp Fiction,1994
3621,honorciak,Milczenie owiec,1991
3258,honorciak,Leon zawodowiec,1994
6077,honorciak,Szeregowiec Ryan,1998
2006,honorciak,Gladiator,2000
7177,honorciak,Władca Pierścieni: Drużyna Pierścienia,2001


# 2. Collaborative filtering
* user similarity

In [None]:
# TODO:

# 3. Content based
* movie similarity

In [None]:
# TODO:

# 4. Final model