In [5]:
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
kinogo = pd.read_csv('kinogo.csv')

In [7]:
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny")
model = AutoModel.from_pretrained("cointegrated/rubert-tiny")
# model.cuda()  # uncomment it if you have a GPU

def embed_bert_cls(text, model, tokenizer):
    t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
    with torch.no_grad():
        model_output = model(**{k: v.to(model.device) for k, v in t.items()})
    #print(model_output)
    embeddings = model_output.last_hidden_state[:, 0, :]
    embeddings = torch.nn.functional.normalize(embeddings)
    return embeddings[0].cpu().numpy()

kinogo['description'] = kinogo['description'].map(lambda text: embed_bert_cls(text, model, tokenizer))

In [8]:
kinogo.head(10)

Unnamed: 0,page_url,image_url,movie_title,description
0,https://kinogo.online/filmy/102112-hanna-v-igr...,kinogo.online/uploads/mini/fullstory/75/d88deb...,Ханна. В игре (2024),"[0.06898928, 0.05668675, 0.04241454, -0.028486..."
1,https://kinogo.online/filmy/102111-bajkery.html,kinogo.online/uploads/mini/fullstory/f5/fdcada...,Байкеры (2023),"[0.098050155, -0.009207887, 0.020433985, 0.004..."
2,https://kinogo.online/filmy/102021-jekzorcizm....,kinogo.online/uploads/mini/fullstory/f3/f419ff...,Экзорцизм (2024),"[0.023994667, 0.034180086, 0.0059999726, -0.01..."
3,https://kinogo.online/filmy/102142-bolero-dush...,kinogo.online/uploads/mini/fullstory/30/43d33a...,Болеро. Душа Парижа (2024),"[0.086606205, 0.034672655, 0.031870462, 0.0117..."
4,https://kinogo.online/filmy/102271-zombi-v-seu...,kinogo.online/uploads/mini/fullstory/1c/c07ec9...,Зомби в Сеуле (2024),"[0.08400867, 0.028651478, 0.023705158, -0.0121..."
5,https://kinogo.online/filmy/54528-kto-ugodno-k...,kinogo.online/uploads/mini/fullstory/4e/894333...,"Кто угодно, кроме тебя (2023)","[0.09322011, 0.046440534, 0.028097121, -0.0126..."
6,https://kinogo.online/filmy/51097-kniga-reshen...,kinogo.online/uploads/mini/fullstory/8d/36364f...,Книга решений (2023),"[0.04336082, 0.034105003, 0.010053288, -0.0103..."
7,https://kinogo.online/filmy/1970-vmesto-nas-dv...,kinogo.online/uploads/mini/fullstory/a5/fd307d...,Вместо нас двоих (2016),"[0.075716354, 0.019997299, 0.015330185, -0.005..."
8,https://kinogo.online/filmy/100588-nuzhdy-pute...,kinogo.online/uploads/mini/fullstory/a0/e268da...,Нужды путешественника (2024),"[0.12101997, 0.03829831, 0.030036554, 0.010132..."
9,https://kinogo.online/filmy/102188-svezhie-ubi...,kinogo.online/uploads/mini/fullstory/d8/37d0ae...,Свежие убийства (2023),"[0.07419462, 0.014064944, 0.040318046, -0.0103..."


In [9]:
search_phrase = 'Фильм про художника и кражу картин из музея'
search_e = embed_bert_cls(search_phrase, model, tokenizer)

In [10]:
kinogo['sim'] = kinogo.apply(lambda row: cosine_similarity(row['description'].reshape(1,-1),search_e.reshape(1,-1))[0][0], axis=1)

In [13]:
kinogo.sort_values(by=['sim'],ascending=False)

Unnamed: 0,page_url,image_url,movie_title,description,sim
2941,https://kinogo.online/filmy/49321-bardo.html,kinogo.online/uploads/mini/fullstory/03/92858a...,Бардо (2022),"[0.024128417, 0.04807458, -0.0019212511, -0.01...",0.594907
1830,https://kinogo.online/filmy/17767-korotkometra...,kinogo.online/uploads/mini/fullstory/4f/29ff3f...,Короткометражка Marvel: Забавный случай на пут...,"[0.04542778, 0.018167058, -0.016959837, -9.803...",0.588107
2873,https://kinogo.online/filmy/49407-pelevin.html,kinogo.online/uploads/mini/fullstory/28/2d2460...,Пелевин (2022),"[0.08690137, -0.0060871853, -0.031809866, -0.0...",0.586097
1285,https://kinogo.online/filmy/51469-che-serdce-b...,kinogo.online/uploads/mini/fullstory/bf/41b6f6...,Чьё сердце бьётся громче (2023),"[0.10496634, 0.043373264, -0.022618491, -0.026...",0.581895
2768,https://kinogo.online/filmy/49539-muzhik-pod-s...,kinogo.online/uploads/mini/fullstory/fd/e295f6...,Мужик под столом (2021),"[0.060782067, 0.06255164, 0.031844564, -0.0150...",0.576064
...,...,...,...,...,...
203,https://kinogo.online/filmy/99268-akvarium.html,kinogo.online/uploads/mini/fullstory/17/a4ad77...,Аквариум (2023),"[0.052646972, -0.011974158, 0.011002517, -0.02...",0.281427
430,https://kinogo.online/filmy/100209-preduprezhd...,kinogo.online/uploads/mini/fullstory/23/4b8e0b...,Предупреждение 2 (2024),"[0.030190356, 0.021085294, -0.023226669, -0.04...",0.281266
1742,https://kinogo.online/filmy/50824-dzhohan-2.html,kinogo.online/uploads/mini/fullstory/eb/d4b0ac...,Джохан 2 (2023),"[0.056193564, 0.03568418, -0.0010929449, -0.03...",0.279053
2343,https://kinogo.online/filmy/50085-dostavschik....,kinogo.online/uploads/mini/fullstory/47/b654fe...,Доставщик (2022),"[0.0832286, 0.042199157, 0.024896657, -0.01531...",0.277711
