In [1]:
import torch
from transformers import AutoTokenizer, AutoModel
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

from sentence_transformers import SentenceTransformer, util
from tqdm import tqdm

In [3]:
combined_parsing = pd.read_csv('data/combined_parsing_with_embeddings.csv')
combined_parsing.head(10)

Unnamed: 0,page_url,image_url,movie_title,description,embeddings,emb_bert
0,https://kinogo.online/filmy/102112-hanna-v-igr...,kinogo.online/uploads/mini/fullstory/75/d88deb...,Ханна. В игре (2024),"Ханна – молодая девушка, страдающая от агорафо...","-0.46084476,-0.16368349,0.07980391,-0.6657749,...","0.014485924,0.063853845,0.04404931,-0.07017968..."
1,https://kinogo.online/filmy/102111-bajkery.html,kinogo.online/uploads/mini/fullstory/f5/fdcada...,Байкеры (2023),"Остросюжетная картина, разворачивающаяся на Ср...","-0.5326852,-0.23060842,0.16051687,-0.56171346,...","0.04075556,-0.047768865,-0.023799775,-0.038850..."
2,https://kinogo.online/filmy/102021-jekzorcizm....,kinogo.online/uploads/mini/fullstory/f3/f419ff...,Экзорцизм (2024),"История об актере по имени Энтони, согласившем...","-0.61657906,-0.15813282,0.29164374,-0.5973491,...","-0.0682673,0.042816307,-0.06475405,-0.08741514..."
3,https://kinogo.online/filmy/102142-bolero-dush...,kinogo.online/uploads/mini/fullstory/30/43d33a...,Болеро. Душа Парижа (2024),События картины разворачиваются в Париже и зна...,"-0.5000128,-0.20215347,0.24276419,-0.63409877,...","0.02861084,0.010292943,-0.008946151,-0.0205987..."
4,https://kinogo.online/filmy/102271-zombi-v-seu...,kinogo.online/uploads/mini/fullstory/1c/c07ec9...,Зомби в Сеуле (2024),Фильм ужасов о молодой танцовщице по имени Си ...,"-0.5201405,-0.1288207,0.3313515,-0.6584862,0.6...","-0.030309511,0.03474685,-0.0334166,-0.07098175..."
5,https://kinogo.online/filmy/54528-kto-ugodno-k...,kinogo.online/uploads/mini/fullstory/4e/894333...,"Кто угодно, кроме тебя (2023)","Би – молодая девушка, всегда мечтавшая обзавес...","-0.41063425,-0.0830526,0.049538035,-0.74925107...","0.0847645,0.0651581,0.032940105,-0.034403432,-..."
6,https://kinogo.online/filmy/51097-kniga-reshen...,kinogo.online/uploads/mini/fullstory/8d/36364f...,Книга решений (2023),Фильм рассказывает о жизни необычного парня по...,"-0.5318866,-0.0229408,0.14009441,-0.60780185,0...","-0.018684108,0.0017345467,-0.025695119,-0.0441..."
7,https://kinogo.online/filmy/1970-vmesto-nas-dv...,kinogo.online/uploads/mini/fullstory/a5/fd307d...,Вместо нас двоих (2016),В небольшой деревушке молодая девушка по имени...,"-0.46826285,-0.054471053,-0.013140891,-0.59218...","0.00017356129,0.056439854,-0.008188479,-0.0607..."
8,https://kinogo.online/filmy/100588-nuzhdy-pute...,kinogo.online/uploads/mini/fullstory/a0/e268da...,Нужды путешественника (2024),"Ирис – молодая девушка, о которой никто ничего...","-0.581347,-0.14413577,0.09750812,-0.6048381,0....","0.09578059,0.051227942,0.0054312362,-0.0047080..."
9,https://kinogo.online/filmy/102188-svezhie-ubi...,kinogo.online/uploads/mini/fullstory/d8/37d0ae...,Свежие убийства (2023),Мы переносимся в Нью-Йорк 80-х годов. Франсин ...,"-0.5330533,-0.21125765,0.37169516,-0.6022329,0...","0.0633663,-0.03365965,0.008783014,-0.021746349..."


In [4]:
combined_parsing['embeddings'] = combined_parsing['embeddings'].map(lambda x: torch.FloatTensor(list(map(float,x.split(',')))))

In [5]:
combined_parsing.head(10)

Unnamed: 0,page_url,image_url,movie_title,description,embeddings,emb_bert
0,https://kinogo.online/filmy/102112-hanna-v-igr...,kinogo.online/uploads/mini/fullstory/75/d88deb...,Ханна. В игре (2024),"Ханна – молодая девушка, страдающая от агорафо...","[tensor(-0.4608), tensor(-0.1637), tensor(0.07...","0.014485924,0.063853845,0.04404931,-0.07017968..."
1,https://kinogo.online/filmy/102111-bajkery.html,kinogo.online/uploads/mini/fullstory/f5/fdcada...,Байкеры (2023),"Остросюжетная картина, разворачивающаяся на Ср...","[tensor(-0.5327), tensor(-0.2306), tensor(0.16...","0.04075556,-0.047768865,-0.023799775,-0.038850..."
2,https://kinogo.online/filmy/102021-jekzorcizm....,kinogo.online/uploads/mini/fullstory/f3/f419ff...,Экзорцизм (2024),"История об актере по имени Энтони, согласившем...","[tensor(-0.6166), tensor(-0.1581), tensor(0.29...","-0.0682673,0.042816307,-0.06475405,-0.08741514..."
3,https://kinogo.online/filmy/102142-bolero-dush...,kinogo.online/uploads/mini/fullstory/30/43d33a...,Болеро. Душа Парижа (2024),События картины разворачиваются в Париже и зна...,"[tensor(-0.5000), tensor(-0.2022), tensor(0.24...","0.02861084,0.010292943,-0.008946151,-0.0205987..."
4,https://kinogo.online/filmy/102271-zombi-v-seu...,kinogo.online/uploads/mini/fullstory/1c/c07ec9...,Зомби в Сеуле (2024),Фильм ужасов о молодой танцовщице по имени Си ...,"[tensor(-0.5201), tensor(-0.1288), tensor(0.33...","-0.030309511,0.03474685,-0.0334166,-0.07098175..."
5,https://kinogo.online/filmy/54528-kto-ugodno-k...,kinogo.online/uploads/mini/fullstory/4e/894333...,"Кто угодно, кроме тебя (2023)","Би – молодая девушка, всегда мечтавшая обзавес...","[tensor(-0.4106), tensor(-0.0831), tensor(0.04...","0.0847645,0.0651581,0.032940105,-0.034403432,-..."
6,https://kinogo.online/filmy/51097-kniga-reshen...,kinogo.online/uploads/mini/fullstory/8d/36364f...,Книга решений (2023),Фильм рассказывает о жизни необычного парня по...,"[tensor(-0.5319), tensor(-0.0229), tensor(0.14...","-0.018684108,0.0017345467,-0.025695119,-0.0441..."
7,https://kinogo.online/filmy/1970-vmesto-nas-dv...,kinogo.online/uploads/mini/fullstory/a5/fd307d...,Вместо нас двоих (2016),В небольшой деревушке молодая девушка по имени...,"[tensor(-0.4683), tensor(-0.0545), tensor(-0.0...","0.00017356129,0.056439854,-0.008188479,-0.0607..."
8,https://kinogo.online/filmy/100588-nuzhdy-pute...,kinogo.online/uploads/mini/fullstory/a0/e268da...,Нужды путешественника (2024),"Ирис – молодая девушка, о которой никто ничего...","[tensor(-0.5813), tensor(-0.1441), tensor(0.09...","0.09578059,0.051227942,0.0054312362,-0.0047080..."
9,https://kinogo.online/filmy/102188-svezhie-ubi...,kinogo.online/uploads/mini/fullstory/d8/37d0ae...,Свежие убийства (2023),Мы переносимся в Нью-Йорк 80-х годов. Франсин ...,"[tensor(-0.5331), tensor(-0.2113), tensor(0.37...","0.0633663,-0.03365965,0.008783014,-0.021746349..."


In [6]:
model = SentenceTransformer('sentence-transformers/msmarco-distilbert-base-v3', token='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')

In [7]:
# Вводим наш запрос
query = 'Фильм про наемного киллера с психологическими проблемами за которым охотится полицейский'
query_embedding = model.encode(query)

In [8]:
# Считаем похожесть эмбедингов и сортируем
combined_parsing['similarity'] = combined_parsing['embeddings'].apply(lambda x: util.cos_sim(query_embedding, x)[0][0])
top_movies = combined_parsing.sort_values(by='similarity', ascending=False).head(10)

In [9]:
# Выводим 10 похожих фильмов
print("Top 10 Similar Movies:")
for index, row in top_movies.iterrows():
    print(f"Title: {row['movie_title']} - Similarity: {row['similarity']:.4f}") 

Top 10 Similar Movies:
Title: Семья по-быстрому - Similarity: 0.9716
Title: Ватиканские записи - Similarity: 0.9684
Title: Первый снег - Similarity: 0.9676
Title: Родители строгого режима - Similarity: 0.9659
Title: Вихрь - Similarity: 0.9654
Title: Игра в Пусане - Similarity: 0.9639
Title: Джулс - Similarity: 0.9637
Title: Не входи - Similarity: 0.9622
Title: Проникновение - Similarity: 0.9620
Title: Ночной рейд - Similarity: 0.9620


In [10]:
query_embedding = model.encode("How big is London")
passage_embedding = model.encode("London has 9,787,426 inhabitants at the 2011 census")

print("Similarity:", util.cos_sim(query_embedding, passage_embedding))

Similarity: tensor([[0.6082]])
