In [1]:
import random
import numpy as np
import os
import zipfile as zf
import requests
import pandas as pd
from tqdm import tqdm
import pickle
import implicit
from rectools.models import ImplicitALSWrapperModel
from implicit.als import AlternatingLeastSquares
from rectools import Columns
from rectools.tools import UserToItemAnnRecommender
from rectools.dataset import Dataset, Interactions

In [2]:
RANDOM_STATE = 42
random.seed(RANDOM_STATE)
os.environ["PYTHONHASHSEED"] = str(RANDOM_STATE)
np.random.seed(RANDOM_STATE)

In [30]:
MODEL_PATH='als.pkl'

## Инициализация датасета

In [4]:
url = "https://github.com/irsafilo/KION_DATASET/raw/f69775be31fa5779907cf0a92ddedb70037fb5ae/data_original.zip"

In [5]:
req = requests.get(url, stream=True)

with open("kion.zip", "wb") as fd:
    total_size_in_bytes = int(req.headers.get("Content-Length", 0))
    progress_bar = tqdm(desc="kion dataset download", total=total_size_in_bytes, unit="iB", unit_scale=True)
    for chunk in req.iter_content(chunk_size=2**20):
        progress_bar.update(len(chunk))
        fd.write(chunk)

kion dataset download: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋| 78.6M/78.8M [00:17<00:00, 4.07MiB/s]

In [6]:
files = zf.ZipFile("kion.zip", "r")
files.extractall()
files.close()

In [37]:
interactions_df = pd.read_csv("data_original/interactions.csv", parse_dates=["last_watch_dt"])

interactions_df.rename(columns={"last_watch_dt": Columns.Datetime, "total_dur": Columns.Weight}, inplace=True)

In [38]:
interactions = Interactions(interactions_df)

In [39]:
interactions.df.head()

Unnamed: 0,user_id,item_id,datetime,weight,watched_pct
0,176549,9506,2021-05-11,4250.0,72.0
1,699317,1659,2021-05-29,8317.0,100.0
2,656683,7107,2021-05-09,10.0,0.0
3,864613,7638,2021-07-05,14483.0,100.0
4,964868,9506,2021-04-30,6725.0,100.0


In [40]:
users = pd.read_csv("data_original/users.csv")
items = pd.read_csv("data_original/items.csv")

## Обучение модели

Учить будем модель ImplicitALS, как в лекции было

In [41]:
model  = ImplicitALSWrapperModel(
        model=AlternatingLeastSquares(
            factors=32,
            random_state=RANDOM_STATE,
            num_threads=8
        ))

In [42]:
model

<rectools.models.implicit_als.ImplicitALSWrapperModel at 0x7fd43abbcc10>

In [43]:
dataset = Dataset.construct(interactions_df)

In [None]:
model.fit(dataset)

In [29]:
user_v

array([[ 1.3105050e-01,  9.5244055e+00,  9.2103491e+00, ...,
         2.4374983e+00,  8.8954216e-01,  1.1701256e+01],
       [ 5.1834965e+00,  2.9833522e+00,  1.1783665e+01, ...,
         4.3121619e+00,  6.5011787e+00,  2.7034140e+00],
       [-4.3731823e+00,  3.6713746e+00,  1.2461555e+00, ...,
        -2.0232971e+00,  5.8753810e+00,  3.4910135e+00],
       ...,
       [-5.6007956e-03,  9.7842216e-02, -4.5963153e-03, ...,
         5.0578274e-02, -3.6342427e-02,  9.0891722e-04],
       [-6.5714540e+00, -4.9895878e+00,  5.4431945e-01, ...,
        -5.2779121e+00, -3.0052602e+00,  4.0550637e+00],
       [ 8.4534197e+00,  1.1404150e+01, -8.8084656e-01, ...,
        -3.0177834e+00,  1.1427036e+01, -6.0125761e+00]], dtype=float32)

In [26]:
ann = UserToItemAnnRecommender(user_v, item_v, dataset.user_id_map, dataset.item_id_map)
ann.fit()

<rectools.tools.ann.UserToItemAnnRecommender at 0x7fd42af972b0>

In [31]:
pickle.dump(model, open(MODEL_PATH, "wb"))