## Priporočilni sistem

Za to seminarsko sem se odločil za imdb dataset, ker sem navajen gledati review-e na njihovi strani, ter želel sem delati nad malo večjo zbirko podatkov.

In [156]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date
import random

In [107]:
df = pd.read_csv("data/user_ratedmovies.dat", sep="\t")

df_unique_movieID = df.drop_duplicates(subset=["movieID"])
df_unique_movieID

Unnamed: 0,userID,movieID,rating,date_day,date_month,date_year,date_hour,date_minute,date_second
0,75,3,1.0,29,10,2006,23,17,16
1,75,32,4.5,29,10,2006,23,23,44
2,75,110,4.0,29,10,2006,23,30,8
3,75,160,2.0,29,10,2006,23,16,52
4,75,163,4.0,29,10,2006,23,29,30
...,...,...,...,...,...,...,...,...,...
850645,71331,62796,3.0,19,11,2008,23,10,40
851696,71420,3585,4.0,22,11,2007,22,43,7
852654,71420,59065,1.5,30,10,2008,15,41,38
853182,71483,4421,2.5,2,9,2003,22,18,25


Dodajanje stolpcev date, ki ga sestavimo iz podatkov in ratings_count, kejer prešejemo število ocen za posamezen film (movieID)

In [108]:
df["date"] = pd.to_datetime(df["date_day"].astype(str) + "." + df["date_month"].astype(str) + "." + df["date_year"].astype(str), format="%d.%m.%Y")
df["ratings_count"] = df.groupby("movieID")["rating"].transform("count")

In [109]:
df

Unnamed: 0,userID,movieID,rating,date_day,date_month,date_year,date_hour,date_minute,date_second,date,ratings_count
0,75,3,1.0,29,10,2006,23,17,16,2006-10-29,252
1,75,32,4.5,29,10,2006,23,23,44,2006-10-29,1158
2,75,110,4.0,29,10,2006,23,30,8,2006-10-29,1237
3,75,160,2.0,29,10,2006,23,16,52,2006-10-29,298
4,75,163,4.0,29,10,2006,23,29,30,2006-10-29,467
...,...,...,...,...,...,...,...,...,...,...,...
855593,71534,44555,4.0,3,12,2007,3,5,38,2007-12-03,294
855594,71534,46578,4.0,3,12,2007,2,56,44,2007-12-03,731
855595,71534,48516,4.5,3,12,2007,2,53,46,2007-12-03,804
855596,71534,61075,5.0,10,10,2008,9,56,5,2008-10-10,16


Tle sm meu težave, ker moram naprej sortirati po datumu in potem šele po ratingih

In [153]:
class UserItemData:
    def __init__(self, path, start_date=None, end_date=None, min_ratings=None):
        self.path = path
        self.start_date = pd.to_datetime(start_date, dayfirst=True) if start_date else None
        self.end_date = pd.to_datetime(end_date, dayfirst=True) if end_date else None
        self.min_ratings = min_ratings

    def nratings(self):
        df = pd.read_csv(self.path, sep="\t")
        # Naprej filter po datumih
        df["date"] = pd.to_datetime( df["date_day"].astype(str) + "." + df["date_month"].astype(str) + "." + df["date_year"].astype(str), format="%d.%m.%Y", dayfirst=True)

        if self.start_date is not None:
            df = df[df["date"] >= self.start_date]

        if self.end_date is not None:
            df = df[df["date"] <= self.end_date]

        # Zdaj pa po ratingih
        df["ratings_count"] = df.groupby("movieID")["rating"].transform("count")

        if self.min_ratings is not None:
            df = df[df["ratings_count"] >= self.min_ratings]

        #print(df)
        return df.count()["userID"]
    
    def get_movie_ids(self):
        df = pd.read_csv(self.path, sep="\t")

        df["date"] = pd.to_datetime( df["date_day"].astype(str) + "." + df["date_month"].astype(str) + "." + df["date_year"].astype(str), format="%d.%m.%Y", dayfirst=True)

        if self.start_date is not None:
            df = df[df["date"] >= self.start_date]

        if self.end_date is not None:
            df = df[df["date"] <= self.end_date]

        # Zdaj pa po ratingih
        df["ratings_count"] = df.groupby("movieID")["rating"].transform("count")

        if self.min_ratings is not None:
            df = df[df["ratings_count"] >= self.min_ratings]

        return df["movieID"].unique()

In [159]:
class MovieData:
    def __init__(self, path):
        self.path = path


    def get_title(self, movieID):
        df = pd.read_csv("data/movies.dat", sep="\t", encoding="latin1")
        return df.loc[df['id'] == movieID]["title"].iloc[0]

In [None]:
class RandomPredictor:
    def __init__(self, minRating, maxRating):
        self.minRating = minRating
        self.maxRating = maxRating
        self.items = None
        
    def predict(self, user_id):
        myDict = dict()
        for item in self.items:
            myDict[item] = random.randint(self.minRating, self.maxRating)

        return myDict

    def fit(self, X):
        self.items = X.get_movie_ids()

In [164]:
md = MovieData('data/movies.dat')
uim = UserItemData('data/user_ratedmovies.dat')
rp = RandomPredictor(1, 5)
rp.fit(uim)
pred = rp.predict(78)
print(type(pred))
items = [1, 3, 20, 50, 100]
for item in items:
    print("Film: {}, ocena: {}".format(md.get_title(item), pred[item]))


<class 'dict'>
Film: Toy story, ocena: 3
Film: Grumpy Old Men, ocena: 5
Film: Money Train, ocena: 3
Film: The Usual Suspects, ocena: 3
Film: City Hall, ocena: 3


In [None]:
class Recommender:
    def __init__(self, rp):
        self.rp = rp
        self.items = None

    def recommend(self, userID, n=10, rec_seen=True):
        myDict = dict()
        for item in self.items:
            myDict[item] = random.randint(self.minRating, self.maxRating)

        return myDict

    def fit(self, X):
        self.items = X.get_movie_ids()

In [163]:

md = MovieData('data/movies.dat')
uim = UserItemData('data/user_ratedmovies.dat')
rp = RandomPredictor(1, 5)
rec = Recommender(rp)
rec.fit(uim)
rec_items = rec.recommend(78, n=5, rec_seen=False)
for idmovie, val in rec_items:
    print("Film: {}, ocena: {}".format(md.get_title(idmovie), val))  

TypeError: 'NoneType' object is not iterable

In [148]:
uim = UserItemData('data/user_ratedmovies.dat')
print(uim.nratings())

uim = UserItemData('data/user_ratedmovies.dat', start_date = '12.1.2007', end_date='16.2.2008', min_ratings=100)
print(uim.nratings())

855598
73657


In [149]:
uim.get_movie_ids()

array([    3,    32,   110, ..., 59065,  4421,  8393], dtype=int64)

In [136]:
md = MovieData('data/movies.dat')
print(md.get_title(1))

0    Toy story
Name: title, dtype: object


In [None]:
df = pd.read_csv("data/movies.dat", sep="\t", encoding="latin1")
df

Unnamed: 0,id,title,imdbID,spanishTitle,imdbPictureURL,year,rtID,rtAllCriticsRating,rtAllCriticsNumReviews,rtAllCriticsNumFresh,...,rtAllCriticsScore,rtTopCriticsRating,rtTopCriticsNumReviews,rtTopCriticsNumFresh,rtTopCriticsNumRotten,rtTopCriticsScore,rtAudienceRating,rtAudienceNumRatings,rtAudienceScore,rtPictureURL
0,1,Toy story,114709,Toy story (juguetes),http://ia.media-imdb.com/images/M/MV5BMTMwNDU0...,1995,toy_story,9,73,73,...,100,8.5,17,17,0,100,3.7,102338,81,http://content7.flixster.com/movie/10/93/63/10...
1,2,Jumanji,113497,Jumanji,http://ia.media-imdb.com/images/M/MV5BMzM5NjE1...,1995,1068044-jumanji,5.6,28,13,...,46,5.8,5,2,3,40,3.2,44587,61,http://content8.flixster.com/movie/56/79/73/56...
2,3,Grumpy Old Men,107050,Dos viejos gruñones,http://ia.media-imdb.com/images/M/MV5BMTI5MTgy...,1993,grumpy_old_men,5.9,36,24,...,66,7,6,5,1,83,3.2,10489,66,http://content6.flixster.com/movie/25/60/25602...
3,4,Waiting to Exhale,114885,Esperando un respiro,http://ia.media-imdb.com/images/M/MV5BMTczMTMy...,1995,waiting_to_exhale,5.6,25,14,...,56,5.5,11,5,6,45,3.3,5666,79,http://content9.flixster.com/movie/10/94/17/10...
4,5,Father of the Bride Part II,113041,Vuelve el padre de la novia (Ahora también abu...,http://ia.media-imdb.com/images/M/MV5BMTg1NDc2...,1995,father_of_the_bride_part_ii,5.3,19,9,...,47,5.4,5,1,4,20,3,13761,64,http://content8.flixster.com/movie/25/54/25542...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10192,65088,Bedtime Stories,960731,Más allá de los sueños,http://ia.media-imdb.com/images/M/MV5BMjA5Njk5...,2008,bedtime_stories,4.4,104,26,...,25,4.7,26,6,20,23,3.5,108877,63,http://content6.flixster.com/movie/10/94/33/10...
10193,65091,Manhattan Melodrama,25464,El enemigo público número 1,http://ia.media-imdb.com/images/M/MV5BMTUyODE3...,1934,manhattan_melodrama,7,12,10,...,83,0,4,2,2,50,3.7,344,71,http://content9.flixster.com/movie/66/44/64/66...
10194,65126,Choke,1024715,Choke,http://ia.media-imdb.com/images/M/MV5BMTMxMDI4...,2008,choke,5.6,135,73,...,54,4.9,26,8,18,30,3.3,13893,55,http://content6.flixster.com/movie/10/85/09/10...
10195,65130,Revolutionary Road,959337,Revolutionary Road,http://ia.media-imdb.com/images/M/MV5BMTI2MzY2...,2008,revolutionary_road,6.7,194,133,...,68,6.9,36,25,11,69,3.5,46044,70,http://content8.flixster.com/movie/10/88/40/10...
