In [1]:
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
JSON_DATA_PATH = "data/reviews.json"
N = 10

In [3]:
import json

def iter_json_data(path):
    with open(path) as f:
        for line in f:
            data = json.loads(line)
            yield data
            
def get_data_frame():
    uid_to_id = {}
    iid_to_id = {}
    
    cols = ["uid", "iid", "review", "rating", "dt", "helpful", "summary"]
    rows = []
    for d in iter_json_data(JSON_DATA_PATH):
        uid = uid_to_id.setdefault(d["reviewerID"], len(uid_to_id))
        iid = iid_to_id.setdefault(d["asin"], len(iid_to_id))
        review = d["reviewText"]
        rating = float(d["overall"])
        dt = int(d["unixReviewTime"])
        helpful = 0 if d["helpful"][1] == 0 else d["helpful"][0]/d["helpful"][1]
        summary = d["summary"]
        rows.append((uid, iid, review, rating, dt, helpful, summary))
        
    return pd.DataFrame(rows, columns=cols)

In [4]:
df = get_data_frame()
df.head()

Unnamed: 0,uid,iid,review,rating,dt,helpful,summary
0,0,0,Installing the game was a struggle (because of...,1.0,1341792000,0.666667,Pay to unlock content? I don't think so.
1,1,0,If you like rally cars get this game you will ...,4.0,1372550400,0.0,Good rally game
2,2,0,1st shipment received a book instead of the ga...,1.0,1403913600,0.0,Wrong key
3,3,0,"I got this version instead of the PS3 version,...",3.0,1315958400,0.7,"awesome game, if it did not crash frequently !!"
4,4,0,I had Dirt 2 on Xbox 360 and it was an okay ga...,4.0,1308009600,1.0,DIRT 3


In [5]:
def split_df_by_dt(df, p=0.8):
    """Функция разбивает df на тестовую и тренировочную выборки по времени 
    публикации отзывов (значение времени в поле dt)
    
    :param p: персентиль значений dt, которые образуют тренировочную выборку. Например p=0.8 означает, что в 
    тренировочной части будут отзывы, соответствующие первым 80% временного интервала 
    :return: два pd.DataFrame объекта
    """
    border_dt = df.dt.quantile(p)
    print("Min=%s, border=%s, max=%s" % (df.dt.min(), border_dt, df.dt.max()))
    training_df, test_df  = df[df.dt <= border_dt], df[df.dt > border_dt]
    print("Размер до очистки:", training_df.shape, test_df.shape)
    # удаляем из тестовых данных строки, соответствующие пользователям или объектам, 
    # которых нет в тренировочных данных 
    # (пользователи - избегаем проблем для персональных систем, объекты - для всех)
    test_df = test_df[test_df.uid.isin(training_df.uid) & test_df.iid.isin(training_df.iid)]
    print("Размер после очистки:", training_df.shape, test_df.shape)
    return training_df, test_df

In [6]:
def hit_ratio(recs_dict, test_dict):
    """Функция считает метрику hit-ration для двух словарей
    :recs_dict: словарь рекомендаций типа {uid: {iid: score, ...}, ...}
    :test_dict: тестовый словарь типа {uid: {iid: score, ...}, ...}
    """
    hits = 0
    for uid in test_dict:
#         print(test_dict[uid].keys())
#         print(recs_dict.get(uid, {}))
        if set(test_dict[uid].keys()).intersection(recs_dict.get(uid, {})):
            hits += 1
    return hits / len(test_dict)

In [7]:
training_df, test_df = split_df_by_dt(df)
del df

Min=939859200, border=1377129600.0, max=1405987200
Размер до очистки: (185427, 7) (46353, 7)
Размер после очистки: (185427, 7) (19174, 7)


In [8]:
training_df.head()

Unnamed: 0,uid,iid,review,rating,dt,helpful,summary
0,0,0,Installing the game was a struggle (because of...,1.0,1341792000,0.666667,Pay to unlock content? I don't think so.
1,1,0,If you like rally cars get this game you will ...,4.0,1372550400,0.0,Good rally game
3,3,0,"I got this version instead of the PS3 version,...",3.0,1315958400,0.7,"awesome game, if it did not crash frequently !!"
4,4,0,I had Dirt 2 on Xbox 360 and it was an okay ga...,4.0,1308009600,1.0,DIRT 3
5,5,0,"Overall this is a well done racing game, with ...",4.0,1368230400,0.0,"Good racing game, terrible Windows Live Requir..."


In [9]:
from scipy.sparse import find
class BasicRecommender(object):
    def __init__(self):
        pass
    
    def get_recs(self, uid, top):
        """Строит рекомендации для пользователя uid
        :return: словарь типа {iid: score, ...}
        """
        return {}
    
    def get_batch_recs(self, uids, top):
        """Строит рекомендации для нескольких пользователей uids
        :return: словарь типа {uid: {iid: score, ...}, ...}
        """
        return {uid: self.get_recs(uid, top) for uid in uids}
    
class NonPersRecommender(BasicRecommender):

    def __init__(self, df):
        super(NonPersRecommender, self).__init__()
        self.recs = self._prepare_recs(df)
        
    def _prepare_recs(self, df):
        
        similarities_sparse = get_topk(nullify_main_diagonal(cosine_similarity(
            csr_normalized.T.tocsr(), dense_output=False)), 30)
        row, col, data = find(similarities_sparse)
        recs = csr_normalized.dot(similarities_sparse.T)
        
        return pd.Series([])
    
    def get_recs(self, uid, top):
        
        index = np.where(row==uid)
        recs = csr_normalized[index].dot(similarities_sparse.T)
        
        return self.recs[:top].to_dict()
    
    def get_batch_recs(self, uids, top):
        non_pers_recs = self.get_recs(None, top)
        return {uid: non_pers_recs for uid in uids}

In [21]:
class ItemBasedCollaborativeFilteringRS(BasicRecommender):
    
    
    def __init__(self, df):
        super(ItemBasedCollaborativeFilteringRS, self).__init__()
        self.recs = self._prepare_recs(df)
    
    def get_recs(self, uid, top):
        
        index = np.where(self.rows == uid)[0]
        iid_dict = {}
#         print(self.recs[0])

        
        has_iid = list(self.group_by_user_df[self.group_by_user_df.uid == uid].iid)[0]
        
        for arg_id in np.argsort(self.recs[index].data)[-top*3:][::-1]:
            iid = self.recs[index].indices[arg_id]
            score = self.recs[index].data[arg_id]
#             print(self.cols[iid])
            if not iid is has_iid:
                iid_dict[self.cols[iid]] = score
#         recs = csr_normalized[index].dot(similarities_sparse.T)
#             self.recs[:top]


        return iid_dict
    
    def get_batch_recs(self, uids, top):
#         non_pers_recs = self.get_recs(None, top)
        return {uid: self.get_recs(uid, top) for uid in uids}
    
    def _prepare_recs(self, df):
        # считаем количество отзывов для каждого объекта (pandas сортирует их по убыванию)
        self.csr, self.rows, self.cols = get_user_item_matrix(df)
        print("log 0")
        csr_normalized = normalize(self.csr, norm='l1', axis=1)
        print("log 1")
        self.similarities_sparse = get_topk(nullify_main_diagonal(cosine_similarity(
            csr_normalized.T.tocsr(), dense_output=False)), 50)
        print("log 2")
        row, col, self.data = find(self.similarities_sparse)
        print("log 3")
        recs = csr_normalized.dot(self.similarities_sparse.T)
        iid_group_by_user_df = training_df.groupby('uid')['iid'].apply(list).reset_index()
        rating_group_by_user_df = training_df.groupby('uid')['rating'].apply(list).reset_index()
        self.group_by_user_df = iid_group_by_user_df.merge(rating_group_by_user_df, on=['uid'], how='left')
        return recs

In [11]:
def get_topk(matrix, top, axis=1):
    """Converts source matrix to Top-K matrix
    where each row or column contains only top K values

    :param matrix: source matrix
    :param top: number of top items to be stored
    :param axis: 0 - top by column, 1 - top by row
    :return:
    """
    rows = []
    cols = []
    data = []

    if axis == 0:
        matrix = matrix.T.tocsr()

    for row_id, row in enumerate(matrix):
        if top is not None and row.nnz > top:
            top_args = np.argsort(row.data)[-top:]

            rows += [row_id] * top
            cols += row.indices[top_args].tolist()
            data += row.data[top_args].tolist()
        elif row.nnz > 0:
            rows += [row_id] * row.nnz
            cols += row.indices.tolist()
            data += row.data.tolist()

    topk_m = csr_matrix((data, (rows, cols)), (matrix.shape[0], matrix.shape[1]))

    if axis == 0:
        topk_m = topk_m.T.tocsr()

    return topk_m

In [12]:
# вспомогательные функции, которые могут пригодиться при построении Item-based CF
def nullify_main_diagonal(m):
    positions = range(m.shape[0])
    eye = csr_matrix((np.ones(len(positions)), (positions, positions)), m.shape)
    return m - m.multiply(eye)

In [13]:
from scipy.sparse import csr_matrix
def get_user_item_matrix(df):
    values = []
    rows = []
    cols = []
    
    uniq_iids = df.iid.unique()
    
    iid_group_by_user_df = training_df.groupby('uid')['iid'].apply(list).reset_index()
    rating_group_by_user_df = training_df.groupby('uid')['rating'].apply(list).reset_index()
    
    group_by_user_df = iid_group_by_user_df.merge(rating_group_by_user_df, on=['uid'], how='left')
    
    for index, row in group_by_user_df.iterrows():
        rows.extend([index] * len(row['iid']))
        cols.extend([np.where(uniq_iids==el)[0][0] for el in row['iid']])
        values.extend([el for el in row['rating']])
        
    csr = csr_matrix((values, (rows, cols)))
    
    return csr, group_by_user_df.uid, uniq_iids 

In [14]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import normalize

def get_sim_item_matrix(csr):
    
    csr_normalized = normalize(csr, norm='l1', axis=1)
    similarities_csr = get_topk(nullify_main_diagonal(cosine_similarity(csr_normalized.T.tocsr(),
                                                                        dense_output=False)), 30)
    
    return similarities_csr 

In [15]:
csr_normalized = normalize(csr, norm='l1', axis=1)

NameError: name 'csr' is not defined

In [None]:
csr_normalized.todense()

In [None]:
similarities_sparse = get_topk(nullify_main_diagonal(cosine_similarity(csr_normalized.T.tocsr(), dense_output=False)), 30)

In [29]:
similarities_sparse.todense()

NameError: name 'similarities_sparse' is not defined

In [30]:
from scipy.sparse import find
row, col, data = find(similarities_sparse)

NameError: name 'similarities_sparse' is not defined

In [None]:
len(row)

In [17]:
len(col)

NameError: name 'col' is not defined

In [18]:
len(data)

NameError: name 'data' is not defined

In [19]:
data.max()

NameError: name 'data' is not defined

In [20]:

np.where(data == data.max())

NameError: name 'data' is not defined

In [21]:
col[5641093]

NameError: name 'col' is not defined

In [22]:
row[5641093]

NameError: name 'row' is not defined

In [23]:
cols[11551]

NameError: name 'cols' is not defined

In [24]:
cols[4243]

NameError: name 'cols' is not defined

In [25]:
pd.options.display.max_colwidth = 2000
training_df[(training_df['iid'] == 4100) | (training_df['iid'] == 40)][['review', 'iid']]

Unnamed: 0,review,iid
619,"DO not get me wrong. I am one of The BIGGEST Zelda fans in the world. and I bought this one as soon as it came out with the gold cartige and all.But this Just isn't a Zelda game....Its missing the Fancy tunes, the story just doesn't seem that addictive.WHen you first get the game its great and it looks to be one of the best games. but then after you beat it.... its kinda boring...Zelda games should never get boring! Zelda NES I STILL paly that damn game.' Link Not as good as the first but still I like playing it. Zelda A Link To the pass on SNES is the BEST Zelda game.I just cannot love this game.. it did horrorble for a Zelda game.",40
620,"Ocarina of Time is the best video game of the last 100 years.If you have never played it, your missing out. Plus, you are a tool.",40
621,"The first Zelda game for the 64, The Legend of Zelda: Ocarina of time won game of the year for 1999. As Link, your quest to save the tri-force from the evil Ganondorf will take you all over Hyrule, not only in the present, but also the future with the help of the Ocarina of time. If you like the Zelda series, this game is definently for you (or even if you just like to save the world in your spare time:])",40
622,"I love this game. I love this game. I love this game. I just got Ocarina of Time and it's been a few years since it has come out - it still rocks even in comparison to what's out now. I've had a Playstation 2 for a couple of years and have had a good game here and there, but after playing Ocarina of Time, I realize that I had forgotten the beauty of Nintendo games. They just have a special feel to them (and they make me remember the years that I was a little tike playing old Nintendo games). I just beat the game about 5 minutes ago. I certainly won't spoil anything, but I will tell you that the ending is over 30 seconds long and is something worth playing through the game to see... A video game hasn't brought me this level of satisfaction in a while. Bravo Nintendo, bravo.",40
623,"I just bought this game a few weeks ago,and it's awesome. It just has that ""This is going to be an epic game"" from the get go. It has great graphics,even to this day.First of all,this is an Action Adventure,not a RPG/Role Playing Game.Next,I recommend buying the Strategy Guide. This game is what gamers call ""Nintendo Hard"". It's a challenging game,with quite a number of puzzle parts.Anyway,buy it if you have a 64,or Virtual Console it if you have a Wii.",40
624,"This game is blew me away! I didn't expect Ocarina to meet with my standards after playing the previous Zelda games. They usually go stale and predictable, but Ocarina didn't do that. It's a game for true fans of the adventure genre! The dungeons are long and perilous, there are numerous side-quests, a great cast of characters, and a large world to travel! If you have any doubts about this game, eliminate them. You won't be dissapointed!",40
625,This could be the best game of all time. What else needs to be said? If you haven't played this game before and still have a working N64 you need to pickup this game. If you haven't played the game you can also find it on the Wii Virtual Console and an upcoming remake for 3DS.,40
626,"I'm what gamers call a ""Zelda Veteran"" , I've played almost ALL of the Zelda games and managed to beat only one: this one. I love the Zelda games because they are challenging, different and fun to play.In this one, Link lives in a forest with the forest folk. Link is soon inside the Deku Tree to defeat the evil monsters that moved into the tree. After a battle with the queen spider, the Deku Tree sadly dies and tells Link to find Zelda, the Princess of Hyrule.Hyrule is a kingdom that's always in trouble, and its up to Link to save the land from a terrible enemy. Link has the help of Navi, a fairy that gives you advice or tells you what you should do next.During his adventures, Link will learn spells, be swallowed by a huge fish, and saving the land from the evil Ganondorf. This game is a classic and worth the money.",40
627,"For all those of you that have followed the lineage of Zelda from its early days on the classic 8-bit system, and onward to its SNES days with &quot;A Link to the Past&quot; (great game), and loved every second of it, then you have to buy this game! For those of you that have never played any other Zelda game, what are you waiting for? Now is a great time to start.Ocarina of Time is amazing. All the elements of classic Zelda are there - dozens of powerful items and weapons, lots of secrets to find, at least 10 new 3D-rendered challenging dungeons, plenty of secrets and side-games, and something that all the other Zelda's never had: a truly great story!If you enjoyed &quot;A Link to the Past&quot; you will most certainly like this game as well. The story of Ocarina of Time involves going back and forth in time, between a happy peaceful Hyrule, and a dark evil one, much like ALTTP; but don't think this is a rip-off on its predecessor. The complex dungeons, challenging bosses, and deepening story lines will blow you away. The 3D element of the game also adds so much more that was never possible before. Even the music is catchy and well orchestrated. (I actually downloaded the mp3 soundtrack online!)The gameplay is a little tough to get used to - afterall, you've never played Zelda in 3D before - so you actually have to consider things sneaking up behind you, and flying overhead! But once you get used to the controls and learn to use the &quot;Z-Targeting&quot; feature (using the trigger button will solve just about any control problem you may encounter), the gameplay gets easier.The only thing that annoyed me was Navi, a little fairy that travels with Link throughout the game. She is always bugging you to give you painfully obvious information. I suppose she is a helpful guide for younger or more inexperienced players, but I found her annoying. But once you come to realize that Navi is an important aspect of the game's control, you will let it slide.I would recomme...",40
628,"Really though, could an old, out dated game still be awesome 4 years later? The answer is yes, because this game is just too good. It gives you a complete story on Hyrule and the Triforce, not to mention the best video game music ever (especially the boss music). Being able to play as young link and adult link in two similar, yet in a way completely different worlds as one would find out sooner or later. The graphics are still, and when this was released, it was the best game graphicwise as well. The bosses are easy, yet in a way challenging, and the dungeons and temples are amazingly designed. However, there are two faults, one minor, one somewhat major.-Link is missing the strap on his frontside that holds his sword case thingy! Well, it just kinda looks odd.-The &quot;final boss&quot; is far too easy...but don't be too sure of that...",40


In [26]:
# recs = ui_m_normalized[uid].dot(ii_sim_m.T) 
recs = csr_normalized.dot(similarities_sparse.T)

NameError: name 'csr_normalized' is not defined

In [27]:
# np.argsort(recs.data)[-5:][::-1]
recs.todense()

NameError: name 'recs' is not defined

In [28]:
for arg_id in np.argsort(recs.data)[-30:][::-1]:
    iid = recs.indices[arg_id]
    score = recs.data[arg_id]
    print(iid, score)

NameError: name 'recs' is not defined

In [29]:
recs.indices

NameError: name 'recs' is not defined

In [30]:
csr_normalized.T[0].shape

NameError: name 'csr_normalized' is not defined

In [31]:
similarities_sparse.shape

NameError: name 'similarities_sparse' is not defined

In [32]:
# training_df.pivot(index='uid', columns='iid', values='rating')
# f = {'A':['sum','mean'], 'B':['prod']}
iid_group_by_user_df = training_df.groupby('uid')['iid'].apply(list).reset_index()
rating_group_by_user_df = training_df.groupby('uid')['rating'].apply(list).reset_index()

In [33]:
group_by_user_df = iid_group_by_user_df.merge(rating_group_by_user_df, on=['uid'], how='left')

In [34]:
group_by_user_df.head()

Unnamed: 0,uid,iid,rating
0,0,"[0, 1432, 3081, 3135, 3964, 6598]","[1.0, 5.0, 1.0, 5.0, 1.0, 2.0]"
1,1,"[0, 2223, 4380, 8388, 10211]","[4.0, 5.0, 1.0, 5.0, 2.0]"
2,2,"[3760, 5782, 6898, 7141, 7823, 7930]","[1.0, 3.0, 5.0, 3.0, 5.0, 1.0]"
3,3,"[0, 5363, 6293, 6308, 6339, 6590, 6843, 8069]","[3.0, 5.0, 3.0, 2.0, 5.0, 5.0, 5.0, 4.0]"
4,4,"[0, 5390, 6842, 6944, 7580, 7582, 7823, 7947, 7956, 8159, 8320, 8527, 8590, 8596, 8606, 8612, 8626, 9181, 9267, 9439, 9488]","[4.0, 5.0, 5.0, 4.0, 3.0, 1.0, 3.0, 5.0, 4.0, 4.0, 2.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0, 2.0, 4.0, 5.0, 2.0]"


In [35]:
# zipped = list(zip(iid_group_by_user_df.uid, iid_group_by_user_df.iid))


In [36]:
csr.todense()

NameError: name 'csr' is not defined

In [None]:
[[(elem[0], el) for el in elem[1]] for elem in zipped][0]

In [None]:
lst = list(zipped)

In [None]:
zipped[0][0]

In [None]:
a = csr_matrix([training_df['rating'], training_df['uid'], training_df['iid']])

In [None]:
a.todense()

In [None]:
iid_group_by_user_df.head()

In [None]:
group_by_user_df.iid.values[0]

In [None]:
rows = []
cols = []
    
group_by_user_df = training_df.groupby('uid')['iid'].apply(list).reset_index()
row
csr = csr_matrix((data, (rows, cols)))

In [None]:
from scipy.sparse import csr_matrix
from sklearn.preprocessing import normalize
from scipy.sparse import vstack
def load_data(df):
    rows = []
    cols = []
    data = []
    
    uid_to_row = {}
    iid_to_col = {}
    
    for t in df.itertuples():
        row_id = uid_to_row.setdefault(t.uid, len(uid_to_row))
        col_id = iid_to_col.setdefault(t.iid, len(iid_to_col))
        rating = t.rating
        
        rows.append(row_id)
        cols.append(col_id)
        data.append(rating)    
    ui_m = csr_matrix((data, (rows, cols)))
    return ui_m, uid_to_row, iid_to_col

In [None]:
ui_m, uid_to_row, iid_to_col = load_data(training_df)

In [None]:
from sklearn.preprocessing import normalize
ui_m_normalized = normalize(ui_m, norm='l1', axis=1)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
ii_sim_m = cosine_similarity(ui_m_normalized.T.tocsr(), dense_output=False)

In [37]:
ii_sim_m.data

NameError: name 'ii_sim_m' is not defined

In [38]:
def get_topk(matrix, top, axis=1):
    """Converts source matrix to Top-K matrix
    where each row or column contains only top K values

    :param matrix: source matrix
    :param top: number of top items to be stored
    :param axis: 0 - top by column, 1 - top by row
    :return:
    """
    rows = []
    cols = []
    data = []

    if axis == 0:
        matrix = matrix.T.tocsr()

    for row_id, row in enumerate(matrix):
        if top is not None and row.nnz > top:
            top_args = np.argsort(row.data)[-top:]

            rows += [row_id] * top
            cols += row.indices[top_args].tolist()
            data += row.data[top_args].tolist()
        elif row.nnz > 0:
            rows += [row_id] * row.nnz
            cols += row.indices.tolist()
            data += row.data.tolist()

    topk_m = csr_matrix((data, (rows, cols)), (matrix.shape[0], matrix.shape[1]))

    if axis == 0:
        topk_m = topk_m.T.tocsr()

    return topk_m

In [39]:
ii_sim_m = nullify_main_diagonal(ii_sim_m)

NameError: name 'ii_sim_m' is not defined

In [40]:
print("Density", ii_sim_m.nnz / (ii_sim_m.shape[0] * ii_sim_m.shape[1]))

NameError: name 'ii_sim_m' is not defined

In [41]:
ii_sim_m = get_topk(ii_sim_m, 10)

NameError: name 'ii_sim_m' is not defined

In [42]:
print("Density", ii_sim_m.nnz / (ii_sim_m.shape[0] * ii_sim_m.shape[1]))

NameError: name 'ii_sim_m' is not defined

In [43]:
ii_sim_m.shape

NameError: name 'ii_sim_m' is not defined

In [44]:
uid = 0
recs = ui_m_normalized[uid].dot(ii_sim_m.T) 

NameError: name 'ui_m_normalized' is not defined

In [45]:
recs.todense()

NameError: name 'recs' is not defined

In [46]:
for arg_id in np.argsort(recs.data)[-5:][::-1]:
    iid = recs.indices[arg_id]
    score = recs.data[arg_id]
    print(iid, score)

NameError: name 'recs' is not defined

In [16]:
def get_test_dict(test_df):
    """Функция, конвертирующая тестовый df в словарь
    """
    test_dict = {}
    for t in test_df.itertuples():
        test_dict.setdefault(t.uid, {})
        test_dict[t.uid][t.iid] = t.rating
    return test_dict

In [17]:
test_dict = get_test_dict(test_df)

In [22]:
ibcf = ItemBasedCollaborativeFilteringRS(training_df)

log 0
log 1
log 2
log 3


In [23]:
recs_dict = ibcf.get_batch_recs(test_df.uid.unique(), N)

In [24]:
hit_ratio(recs_dict, test_dict)

0.08730741012472487

In [32]:
iid_group_by_user_df = training_df.groupby('uid')['iid'].apply(list).reset_index()
rating_group_by_user_df = training_df.groupby('uid')['rating'].apply(list).reset_index()
group_by_user_df = iid_group_by_user_df.merge(rating_group_by_user_df, on=['uid'], how='left')

In [33]:
s = [0, 3, 5,3081]

In [44]:
list(group_by_user_df[group_by_user_df.uid == 607].iid)

[[40, 544, 551, 1006, 1482, 2785]]

In [47]:
set(s).symmetric_difference(list(group_by_user_df[group_by_user_df.uid == 607].iid)[0])

{0, 3, 5, 40, 544, 551, 1006, 1482, 2785, 3081}

In [45]:
group_by_user_df.head()

Unnamed: 0,uid,iid,rating
0,0,"[0, 1432, 3081, 3135, 3964, 6598]","[1.0, 5.0, 1.0, 5.0, 1.0, 2.0]"
1,1,"[0, 2223, 4380, 8388, 10211]","[4.0, 5.0, 1.0, 5.0, 2.0]"
2,2,"[3760, 5782, 6898, 7141, 7823, 7930]","[1.0, 3.0, 5.0, 3.0, 5.0, 1.0]"
3,3,"[0, 5363, 6293, 6308, 6339, 6590, 6843, 8069]","[3.0, 5.0, 3.0, 2.0, 5.0, 5.0, 5.0, 4.0]"
4,4,"[0, 5390, 6842, 6944, 7580, 7582, 7823, 7947, ...","[4.0, 5.0, 5.0, 4.0, 3.0, 1.0, 3.0, 5.0, 4.0, ..."
