In [1]:
from google.cloud import aiplatform
from vertexai.language_models import TextEmbeddingModel
import pandas as pd

aiplatform.init(
    project='vidio-quiz-prod',
    location='asia-southeast1',
    staging_bucket='gs://genai_hackathon_2024',
)

In [3]:
model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")

In [10]:
def search_text(title="", description="", group_l1="", group_l2="", main_genre="", genres="", directors="", actors="", country=""):
    return f"""title: {title}
description: {description}
group: {group_l1} > {group_l2}
genres: {main_genre}, {genres}
directors: {directors}
actors: {actors}
country: {country}"""

In [5]:
def embedding_text(model, text):
    embeddings = model.get_embeddings([text])
    for embedding in embeddings:
        vector = embedding.values
        # print(f"Length of Embedding Vector: {len(vector)}")
    return vector

In [6]:
final_df = pd.read_json('film_metadata.json', lines=True)

In [33]:
from annoy import AnnoyIndex
annoy_index = AnnoyIndex(768, 'dot')
for index, row in final_df.iterrows():
    annoy_index.add_item(row['id'], row['embedding'])
annoy_index.build(10)

True

In [39]:
query_vector = embedding_text(model, "action movies")
results = annoy_index.get_nns_by_vector(query_vector, 10, search_k=-1)
[print(search_text + "\n") for search_text in final_df[final_df['id'].isin(results)]['search_text'].values.tolist()]

title: man on fire
description: Di Mexico City, seorang mantan agen CIA bersumpah akan membalas dendam pada mereka yang melakukan tindakan tak terkatakan terhadap keluarga yang disewanya untuk dilindungi.
group: movies > western
genres: action, action, crime, drama, thriller
directors: tony scott
actors: adrian grunberg, carmen salinas, christopher walken, dakota fanning, denzel washington, gerardo taracena, marc anthony, mickey rourke, radha mitchell, stacy perskie
country: western

title: free fire
description: Film ini bergenre action komedi, menceritan pertemuan dua kelompok gengster yang hendak melakukan transaksi jualbeli senjata. Namun karena salah satu kelompok hendak menipu kelompok yang lain maka baku tembakpun tek dapat dihindari.
group: movies > western
genres: crime, action, comedy, crime, drama
directors: ben wheatley
actors: armie hammer, brie larson, sharlto copley
country: western

title: point break
description: Seorang agen FBI menyusup ke tim luar biasa yang terdiri

[None, None, None, None, None, None, None, None, None, None]

In [40]:
results = annoy_index.get_nns_by_item(4487, 10, -1)
final_df[final_df['id'].isin(results)]

[print(search_text + "\n") for search_text in final_df[final_df['id'].isin(results)]['search_text'].values.tolist()]

title: suka duka berduka
description: Meninggalnya Rauf membuat seluruh keluarganya harus terus berkumpul untuk pemakaman dan pengajian. Konflik antara para anak, mantu, cucu dan sang ibu tiri pun mulai terungkap. Apakah mereka berhasil mendapatkan apa yang mereka masing-masing harapkan?
group: series > vidio original
genres: drama, comedy, drama, family, romance
directors: andri cung,nia dinata
actors: atiqah hasiholan, ersamayori aurora yatim, jihane almira chedid, krisjiana baharudin, luna maya, oka antara, samudra taylor, tora sudiro
country: indonesia

title: sosmed
description: Popularitas Nadya dan Young di sosial media membuat mereka digemari. Namun, dibalik itu semua banyak problema yang tidak diketahui.
group: series > vidio original
genres: drama, drama, romance
directors: pritagita arianegara
actors: aurora ribero, bryan domani, carmela van der kruk, harris vriza, naufal samudra
country: indonesia

title: ikatan suami takut istri
description: Toro, Ario, Zulkifli malam itu 

[None, None, None, None, None, None, None, None, None, None]