In [1]:
import vertexai
from vertexai.preview.generative_models import GenerativeModel
from vertexai.language_models import TextEmbeddingModel
from annoy import AnnoyIndex
import pandas as pd

vertexai.init(project="vidio-quiz-prod", location="asia-southeast1")
llm_model = GenerativeModel("gemini-1.0-pro")
embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")

In [2]:
final_df = pd.read_json('film_metadata.json', lines=True)

annoy_index = AnnoyIndex(768, 'dot')
for index, row in final_df.iterrows():
    annoy_index.add_item(row['id'], row['embedding'])
annoy_index.build(10)

True

In [61]:
import requests
import os
from dotenv import load_dotenv
import json

load_dotenv()

def prompt_need_recommendation(user_query):
    return f"""You are Chat Recommendation Engine. Your task is to determine if user prompt need recommendation of movies or not.Answer False if the prompt not related in your task for recommending movies
User prompt: {user_query}
You need to answer in True or False only. 
"""

def get_play_history_film(user_id):
    api_key = os.environ.get("LARVA_APPLICATION_KEY", "")
    larva_response = requests.get("http://localhost:6000/vidio/user-play-history-films", params={'user_id': user_id}, headers={'key': api_key})
    print(larva_response)
    return [play_history['film_id'] for play_history in json.loads(larva_response.content)['records']][:5]

def prompt_vector_search(user_query):
    return f"""You are Chat Recommendation Engine, your job is to decide if we need to summarize User Query and look up to Vector Search or not.
User Query: {user_query}
Summarize the User Query as context to look up to Vector Search. If there is no context need to look up, you can answer with empty.
"""

def get_grounding(response, history_film_ids):
    vector_search_result = ""

    if len(response) == 0:
        vector_film_ids = vector_search(embedding_model, response)
        vector_search_result = lookup_film(vector_film_ids)
    else:
        if len(history_film_ids) > 0:
            film_ids = []
            vector_search_result
            for film_id in history_film_ids:
                film_search_query = lookup_film([film_id])
                if film_search_query != "":
                    film_ids += vector_search(embedding_model, film_search_query)
            vector_search_result = lookup_film(set(film_ids))
    return vector_search_result

def embedding_text(model, text):
    embeddings = model.get_embeddings([text])
    for embedding in embeddings:
        vector = embedding.values
        # print(f"Length of Embedding Vector: {len(vector)}")
    return vector

def vector_search(model, query, limit=10):
    query_vector = embedding_text(model, query)
    return annoy_index.get_nns_by_vector(query_vector, limit, search_k=-1)

def lookup_film(film_ids):
    return "\n\n".join([search_text for search_text in final_df[final_df['id'].isin(film_ids)]['search_text'].values.tolist()])

def prompt_recommendation(user_query, vector_search_result, user_history=""):
    return f"""You are content expert from OTT company.
Your task is to give recommendation based on User Query, Movie Data retrieved from Vector Search and User History. The answer in Bahasa Indonesia, please give 5 recommendation, re-rank the recommendation based on User Query and User History relevancy. 
Also give the explanation for each content why it relevant for the user based on the User Query and User History. The explanation should show only if it is relevant to be shown to user and summarize it between based on User Query and User History.

Here is the context.
User Query: {user_query}

Vector Search Result:
{vector_search_result}

User History:
{user_history}
"""

def call_llm(prompt):
    responses = llm_model.generate_content(prompt)
    return responses.candidates[0].content.parts[0].text

In [46]:
user_query = "gw lagi pengen nonton film science fiction, apa ya yang cocok?"
user_id = 54936340

In [49]:
response = call_llm(prompt_need_recommendation(user_query))
need_recommendation = response.lower() in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']
print(need_recommendation)

True


In [52]:
history_film_ids = get_play_history_film(user_id)
user_history = lookup_film(history_film_ids)
print(user_history)

<Response [200]>
Title: The Moon
Group: Movies > Korea > nan
Genres: adventure,action,adventure,drama,sci-fi
Directors: kim yong-hwa
Actors: do kyung-soo,kim hee ae,park byung-eun,sol kyung-gu
Country: Korea

Title: Resep Masak Dapur Abu Tosca
Group: Entertainment > Lifestyle > nan
Genres: cooking,cooking,hobbies
Directors: various
Actors: various
Country: Indonesia

Title: The 100
Group: Series > Western > nan
Genres: sci-fi,adventure,drama,mystery,sci-fi
Directors: ed fraiman,ian samoil,pj pesce
Actors: bob morley,eliza taylor,jarod joseph,marie avgeropoulos
Country: Western

Title: Bad Teacher
Group: Movies > Western > nan
Genres: romance,comedy,romance
Directors: jake kasdan
Actors: cameron diaz,jason segel,john michael higgins,justin timberlake,lee eisenberg,lucy punch
Country: Western


In [56]:
print(prompt_vector_search(user_query))
response = call_llm(prompt_vector_search(user_query))
print(response)

You are Chat Recommendation Engine, your job is to decide if we need to summarize User Query and look up to Vector Search or not.
User Query: gw lagi pengen nonton film science fiction, apa ya yang cocok?
Summarize the User Query as context to look up to Vector Search. If there is no context need to look up, you can answer with empty

film science fiction


In [58]:
vector_search_result = get_grounding(response, history_film_ids)
print(vector_search_result)

Title: The American
Group: Movies > Western > nan
Genres: action,action,thriller
Directors: anton corbijn
Actors: george clooney,paolo bonacelli,thekla reuten,violante placido
Country: Western

Title: Unfaithful
Group: Movies > Western > nan
Genres: drama,drama,romance,thriller
Directors: adrian lyne
Actors: chad lowe,diane lane,dominic chianese,erik per sullivan,gary basaraba,kate burton,margaret colin,olivier martinez,richard gere,zeljko ivanek
Country: Western

Title: Endeus TV - Resep Cemilan Saat Lebaran
Group: Entertainment > Lifestyle > nan
Genres: cooking,cooking,food
Directors: various
Actors: various
Country: Indonesia

Title: Endeus TV - Resep Olahan Aneka Dessert
Group: Entertainment > Lifestyle > nan
Genres: cooking,cooking,food
Directors: various
Actors: various
Country: Indonesia

Title: Switch
Group: Movies > Korea > nan
Genres: comedy,comedy,drama,fantasy
Directors: ma dae-yoon
Actors: kim mi-kyung,kwon sang-woo,lee min-jung,oh jung-se,park so-yi
Country: Korea

Title:

In [62]:
if need_recommendation:
    response = call_llm(prompt_recommendation(user_query, vector_search_result, user_history))
else:
    response = "Mohon maaf, saya tidak mengerti pertanyaan Anda. Mohon coba lagi."


In [63]:
print(f"user prompt: {user_query}\n")
print(response)

user prompt: gw lagi pengen nonton film science fiction, apa ya yang cocok?

**1. The Moon (Film Korea)**
Relevansi: Anda pernah menonton film serupa, **The Moon**, yang juga bergenre fiksi ilmiah.

**2. The 100 (Serial Barat)**
Relevansi: Anda juga pernah menonton serial **The 100** yang memiliki genre fiksi ilmiah dan petualangan.

**3. Super 8 (Film Barat)**
Relevansi: Film ini bergenre fiksi ilmiah dan misteri, mirip dengan preferensi Anda pada **The Moon**.

**4. The International (Film Barat)**
Relevansi: Meski tidak bergenre fiksi ilmiah, film ini menampilkan aksi dan misteri yang mungkin menarik minat Anda.

**5. Elektra (Film Barat)**
Relevansi: Film ini bergenre aksi dan petualangan, yang juga terdapat dalam film **The Moon** yang Anda tonton sebelumnya.


In [64]:
summarize_previous_prompt = f"""Summarize previous prompt and your answer to create context for next user query
Prompt: {prompt_recommendation(user_query, vector_search_result, user_history)}
Your Answer: {response}"""
response = call_llm(summarize_previous_prompt)
print(response)

**Konteks Sebelumnya:**

Pengguna: Saya ingin nonton film fiksi ilmiah, ada rekomendasi?

**Rekomendasi yang Diberikan:**

1. **The Moon (Film Korea)**: Pernah menonton film serupa bergenre fiksi ilmiah.
2. **The 100 (Serial Barat)**: Pernah menonton serial bergenre fiksi ilmiah dan petualangan.
3. **Super 8 (Film Barat)**: Bergenre fiksi ilmiah dan misteri, mirip preferensi pengguna.
4. **The International (Film Barat)**: Menampilkan aksi dan misteri yang menarik minat pengguna.
5. **Elektra (Film Barat)**: Bergenre aksi dan petualangan, serupa dengan "The Moon" yang pernah ditonton pengguna.


In [67]:
subsequent_query = """Mengapa kamu memberikan rekomendasi Super 8?"""
following_prompt = f"""You are recommendation engine, user ask question again about your answer. Here is user question: {subsequent_query}
Please answer user question, you can re-rank or explain based on user question.
Previous Prompt:
{response}"""
response = call_llm(following_prompt)
print(response)

Saya merekomendasikan "Super 8" karena beberapa alasan:

* **Mirip dengan "The Moon"**: Film ini memiliki kemiripan dengan "The Moon" dalam genre fiks ilmiah dan misteri.
* **Kisah yang menarik**: "Super 8" memiliki alur cerita yang seru dan menarik dengan kombinasi aksi, misteri, dan fiksi ilmiah.
* **Efek visual yang memukau**: Film ini menampilkan efek visual yang mengesankan, terutama dalam adegan-adegan yang melibatkan makhluk luar angkasa.
* **Referensi masa kecil**: Film ini menampilkan tema nostalgia dan eksplorasi masa kanak-kanak, yang mungkin menarik bagi Anda setelah menonton "The Moon".
