In [12]:
import vertexai
from vertexai.language_models import TextEmbeddingModel
from annoy import AnnoyIndex
import pandas as pd
from langchain_google_vertexai import VertexAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableLambda

vertexai.init(project="vidio-quiz-prod", location="asia-southeast1")
embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")
model = VertexAI(model_name="gemini-pro", temperature=0.4)

In [13]:
final_df = pd.read_json('data/film_metadata.json', lines=True)

annoy_index = AnnoyIndex(768, 'dot')
for index, row in final_df.iterrows():
    annoy_index.add_item(row['id'], row['embedding'])
annoy_index.build(10)

True

In [14]:
import requests
import os
from dotenv import load_dotenv
import json

load_dotenv()

class Larva:
    @staticmethod
    def get_play_history_film(user_id):
        api_key = os.environ.get("LARVA_APPLICATION_KEY", "")
        larva_url = os.environ.get("LARVA_URL", "http://localhost:6000")
        larva_response = requests.get(f"{larva_url}/vidio/user-play-start-history-films", params={'user_id': user_id}, headers={'key': api_key})
        return [play_history['film_id'] for play_history in json.loads(larva_response.content)['records']][:5]

# please give the interface here:
def get_grounding(response, history_film_ids):
    vector_search_result = ""

    if len(response) != 0:
        vector_film_ids = vector_search(embedding_model, response)
        vector_search_result = lookup_film_complete_text(vector_film_ids)
    else:
        if len(history_film_ids) > 0:
            film_ids = []
            vector_search_result
            for film_id in history_film_ids:
                film_search_query = lookup_film([film_id])
                if film_search_query != "":
                    film_ids += vector_search(embedding_model, film_search_query)
            vector_search_result = lookup_film_complete_text(set(film_ids))
    return vector_search_result

class Embedding:
    def __init__(self, model) -> None:
        self.model = model

    def embedding_text(self, text):
        embeddings = self.model.get_embeddings([text])
        for embedding in embeddings:
            vector = embedding.values
            # print(f"Length of Embedding Vector: {len(vector)}")
        return vector
    

embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")
embedding = Embedding(embedding_model)

class VectorSearch:
    def __init__(self, embedding:Embedding) -> None:
        self.embedding = embedding
        self.final_df = pd.read_json('./data/film_metadata.json', lines=True)
        self.annoy_index = AnnoyIndex(768, 'dot')
        for index, row in self.final_df.iterrows():
            self.annoy_index.add_item(row['id'], row['embedding'])
        self.annoy_index.build(10)

    def search(self, query, limit=10):
        query_vector = self.embedding.embedding_text(query)
        return self.annoy_index.get_nns_by_vector(query_vector, limit, search_k=-1)

    def lookup_film(self, film_ids):
        return "\n\n".join([search_text for search_text in self.final_df[self.final_df['id'].isin(film_ids)]['search_text'].values.tolist()])

    def lookup_film_complete_text(self, film_ids):
        film_search_text = []
        for id, search_text in self.final_df[self.final_df['id'].isin(film_ids)][['id', 'search_text']].values.tolist():
            film_search_text.append(f"ID: {str(id)}\n{search_text}")
        return "\n\n".join(film_search_text)

    def get_grounding(self, response, history_film_ids):
        vector_search_result = ""

        if len(response) != 0:
            vector_film_ids = self.search(response)
            vector_search_result = self.lookup_film_complete_text(vector_film_ids)
        else:
            if len(history_film_ids) > 0:
                film_ids = []
                vector_search_result
                for film_id in history_film_ids:
                    film_search_query = self.lookup_film([film_id])
                    if film_search_query != "":
                        film_ids += self.search(film_search_query)
                vector_search_result = self.lookup_film_complete_text(set(film_ids))
        return vector_search_result

vector_search = VectorSearch(embedding)

In [15]:
result = vector_search.lookup_film_complete_text([9372])
print(result)

ID: 9372
title: ratu adil
description: Lasja mendapati suami dan ayahnya terlibat dalam konflik bisnis mafia. Ia bertekad melindungi keluarganya meski harus mengangkat kembali pistolnya.
group: series > vidio original
genres: crime, action, crime, drama
directors: ginanti rona, tommy dewo
actors: abdurrahman arif, andri mashadi, budi ros, dian sastrowardoyo, donny damara, hana malasan, ira wibowo, khiva iskak, muhammad khan, nino fernandez
country: indonesia
release year: 2024
age rating: 18 or more
popularity: trending


In [23]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
from langchain_core.output_parsers import JsonOutputParser


class Recommendation(BaseModel):
    id: int = Field(description="ID of the content")
    title: str = Field(description="title of the content, use title case for this")
    explanation: str = Field(description="why you recommend this content")

class RecommendationList(BaseModel):
    __root__: List[Recommendation]

def parse_final_response(final_response):
    items = []
    for item in final_response['items']:
        image, link = final_df[final_df['id'] == item['id']][['image_url', 'content_url']].values[0]
        items.append(f"* Judul: **{item['title']}**\n\n  Kenapa kamu suka: {item['explanation']}\n\n  [![{item['title']}]({image})]({link})")
    return "\n\n\n".join(items)

def need_recommendation(text):
    return text.lower() in ['true', '1', 't', 'y', 'yes', 'yeah', 'yup', 'certainly', 'uh-huh']

output_parser = StrOutputParser()
json_parser = JsonOutputParser(pydantic_object=RecommendationList)

template_need_recommendation = """You are Chat Recommendation Engine. Your task is to determine if user prompt need recommendation of movies or not. Answer False if the prompt not related in your task for recommending movies
User prompt: {user_query}
You need to answer in True or False only."""
prompt_need_recommendation = ChatPromptTemplate.from_template(template_need_recommendation)

template_need_vector_search = """You are Chat Recommendation Engine, your job is to decide if we need to summarize User Query and look up to Vector Search or not.
User Query: {user_query}
Summarize the User Query as context to look up to Vector Search. If there is no context need to look up, you can answer with empty."""
prompt_need_vector_search = ChatPromptTemplate.from_template(template_need_vector_search)

template_recommendation = """You are content expert from OTT company.
Your task is to give recommendation based on User Query, Movie Data retrieved from Vector Search and User History.
The answer in **Bahasa Indonesia**, give **5 (five)** recommendation, re-rank the recommendation based on User Query and User History relevancy.
{format_instructions}
Always use title case for title field.
Do not recommend content from User History unless it really relevant for them based on their query.
Give the explanation for each content why it relevant for the user based on the User Query and User History. The explanation should show only if it is relevant to be shown to user and summarize it based on User Query and User History.

Here is the context.
User Query: {user_query}

Vector Search Result:
{vector_search_result}

User History:
{user_history}"""
prompt_recommendation = PromptTemplate(
    template=template_recommendation,
    input_variables=["user_query", "vector_search_result", "user_history"],
    partial_variables={"format_instructions": json_parser.get_format_instructions()},
)
unclear_prompt_no_watch_history = """Hmm, saya masih belajar menjawab pertanyaan seperti itu. Tapi soal rekomendasi film dan series saya jagonya!\n\nCoba tanya 'film komedi romantis yang lucu' atau 'film thriller yang menegangkan'"""
unclear_prompt_has_watch_history = """Wah, pertanyaan bagus! Namun saat ini, fokus saya membantu kamu menemukan film dan series terbaik.\n\nBerdasarkan film & series yang kamu tonton sebelumnya, sepertinya kamu menyukai film-film berikut ini."""

chain_need_recommendation = prompt_need_recommendation | model | output_parser | RunnableLambda(need_recommendation)
chain_need_vector_search = prompt_need_vector_search | model | output_parser
chain_recommendation = prompt_recommendation | model #| json_parser

def ask_recommendation(user_query, user_id):
    is_need_reco = chain_need_recommendation.invoke({"user_query": user_query})

    if is_need_reco:
        chain_need_vector_search_response = chain_need_vector_search.invoke({"user_query": user_query})
        history_film_ids = Larva.get_play_history_film(user_id)
        user_history = vector_search.lookup_film(history_film_ids)
        vector_search_result = vector_search.get_grounding(chain_need_vector_search_response, history_film_ids)
        final_response = chain_recommendation.invoke({"user_query": user_query, "vector_search_result": vector_search_result, "user_history": user_history})
        # final_response = parse_final_response(final_response)
    else:
        history_film_ids = Larva.get_play_history_film(user_id)
        if len(history_film_ids) > 0:
            user_history = vector_search.lookup_film(history_film_ids)
            vector_search_result = vector_search.get_grounding("", history_film_ids)
            final_response = chain_recommendation.invoke({"user_query": user_query, "vector_search_result": vector_search_result, "user_history": user_history})
            # final_response = f"{unclear_prompt_has_watch_history}\n\n{parse_final_response(final_response)}"
        else:
            final_response = unclear_prompt_no_watch_history
    return final_response

In [30]:
user_query = "gw lagi bosen nih cocoknya nonton film apa ya?"
user_id = 54936340

final_response = ask_recommendation(user_query, user_id)
print(final_response)

{"items": [{"id": 5902, "title": "Cosmopolitan - Exclusive Interview", "explanation": "Ini adalah konten hiburan yang ringan dan cocok untuk mengisi waktu luang Anda."}, {"id": 5815, "title": "Keluyuran - Ngeluyurin Bandung", "explanation": "Konten ini berisi rekomendasi tempat-tempat seru di Bandung, cocok untuk Anda yang sedang mencari hiburan di sekitar kota."}, {"id": 6698, "title": "Kpop on Class - Koc Clip", "explanation": "Konten ini berisi cover video klip Korea, cocok untuk Anda yang menyukai musik dan budaya Korea."}, {"id": 5930, "title": "Love O2O", "explanation": "Ini adalah drama Mandarin yang ringan dan menghibur, cocok untuk Anda yang suka cerita romantis."}, {"id": 9590, "title": "Wanted", "explanation": "Film action yang seru dan menegangkan, cocok untuk Anda yang mencari hiburan yang memacu adrenalin."}]}


In [32]:
template_general = """Given a user's query on any topic, use your extensive database and understanding of various subjects to provide a clear, accurate, and helpful answer.
Prioritize directness and relevance in your response, ensuring it is informative and accessible to the user. If the question falls outside your expertise, offer guidance on where or how they might find the desired information.
Always communicate in a friendly and professional tone, fostering a positive user experience. Please answer according to user's query.

User Query: {user_query}
Answer: """
prompt_general = ChatPromptTemplate.from_template(template_general)
chain_general = prompt_general | model | output_parser

def ask_general_question(user_query, user_id=0):
    final_response = chain_general.invoke({"user_query": user_query})
    return final_response

In [33]:
ask_general_question("say potato")

'Potato'

* Judul: **No Mercy**

  Kenapa kamu suka: Film ini memiliki genre action dan thriller yang sesuai dengan keinginan Anda untuk menonton film yang seru dan menegangkan.

  [![No Mercy](https://thumbor.prod.vidiocdn.com/tHAsMdvfP4PVd_kL7z1YlnW2JPE=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/7332/no-mercy-eec4d8.jpg)](https://www.vidio.com/premier/7332)


* Judul: **She's on Duty**

  Kenapa kamu suka: Film ini bergenre action dan komedi yang dapat menghibur Anda dengan adegan-adegan aksi yang seru dan humor yang menggelitik.

  [![She's on Duty](https://thumbor.prod.vidiocdn.com/qhUMGd9EV02eJP8Cwap7EkTuW44=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/7334/she-s-on-duty-93f542.png)](https://www.vidio.com/premier/7334)


* Judul: **The Suspect**

  Kenapa kamu suka: Film ini bergenre action dan thriller yang akan membuat Anda tegang dan penasaran dengan alur ceritanya yang menegangkan.

  [![The Suspect](https://thumbor.prod.

In [191]:
from IPython.display import display, Markdown

Markdown(final_response)

* Judul: **Serial Indonesia 2024**

  Kenapa kamu suka: Kamu baru aja nonton ini, jadi kali ini kita rekomendasikan konten lain ya

  [![Serial Indonesia 2024](https://thumbor.prod.vidiocdn.com/8U8zySHN9koEGPXDp_84z_vAgrE=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/9642/serial-indonesia-2024-a48abe.jpg)](https://www.vidio.com/premier/9642)


* Judul: **Sosmed**

  Kenapa kamu suka: Ini series terbaru dari Indonesia, temanya anak muda dan media sosial. Pas banget kayaknya sama yang lagi kamu cari

  [![Sosmed](https://thumbor.prod.vidiocdn.com/0zk1vw8DyFKRoX5xqYMz9O23oPE=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/2567/sosmed-76e90a.jpg)](https://www.vidio.com/premier/2567)


* Judul: **Hotel Del Luna**

  Kenapa kamu suka: Hotel Del Luna ini series Korea yang tayang di Vidio. Selain ceritanya yang bagus, aktor dan aktrisnya juga keren-keren, lho

  [![Hotel Del Luna](https://thumbor.prod.vidiocdn.com/oIrFVaaUAJDdTEt0hWajK03st70=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/1935/hotel-del-luna-964581.jpg)](https://www.vidio.com/premier/1935)


* Judul: **Decision to Leave**

  Kenapa kamu suka: Kalau kamu suka film action dan misteri, Decision to Leave ini cocok banget. Film ini juga lagi trending di Vidio

  [![Decision to Leave](https://thumbor.prod.vidiocdn.com/nyn2kBAwsM_vOVHT8zfKCPJlNG0=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/7379/decision-to-leave-62b684.jpg)](https://www.vidio.com/premier/7379)


* Judul: **The Accidental Detective**

  Kenapa kamu suka: The Accidental Detective ini film Korea yang juga ada di Vidio. Film ini kocak dan seru, cocok buat yang lagi butuh hiburan

  [![The Accidental Detective](https://thumbor.prod.vidiocdn.com/-8zd8vxV__oH8D39vDXsbNmF1Kw=/223x332/filters:quality(75)/vidio-web-prod-film/uploads/film/image_portrait/1560/the-accidental-detective-43fea1.jpg)](https://www.vidio.com/premier/1560)

In [55]:
print(f"user prompt: {user_query}\n")
print(final_response)

user prompt: hari senin cocok nonton film apa ya?

1. **Descendants of the Sun**
- Kisah cinta antara ahli bedah dan kapten pasukan khusus.
- Genre: Romance, action, comedy, drama.
- Relevan karena sama-sama bertemakan kisah cinta.

2. **The Moon**
- Seorang astronot yang terdampar di luar angkasa mencoba untuk kembali ke Bumi.
- Genre: Adventure, action, adventure, drama, sci-fi.
- Relevan karena sama-sama bergenre adventure.

3. **The 100**
- Sekelompok remaja dikirim ke Bumi untuk menguji kelayakan planet ini setelah perang nuklir.
- Genre: Sci-fi, adventure, drama, mystery, sci-fi.
- Relevan karena sama-sama bertemakan survival.

4. **Bad Teacher**
- Seorang guru sekolah yang malas dan tidak kompeten berusaha untuk mendapatkan uang untuk implan payudara.
- Genre: Romance, comedy, romance.
- Relevan karena sama-sama bertemakan kehidupan sekolah.

5. **Resep Masak Dapur Abu Tosca**
- Kumpulan resep masakan dari dapur Abu Tosca.
- Genre: Cooking, cooking, hobbies.
- Relevan karena sam

In [56]:
prompt_summarize_previous_chat = ChatPromptTemplate.from_template("""Summarize previous chat and your answer to create context for next user query
Previous User Query: {user_query}
Prompt: {last_prompt}
Your Answer: {final_response}""")
chain_summarize_previous_chat = prompt_summarize_previous_chat | model | output_parser
previous_response = chain_summarize_previous_chat.invoke({
    "user_query": user_query,
    "last_prompt": prompt_recommendation.format(user_query=user_query, vector_search_result=vector_search_result, user_history=user_history), 
    "final_response": final_response
    })
print(previous_response)

**Ringkasan Chat Sebelumnya:**

Kamu sedang mencari film yang cocok untuk ditonton pada hari Senin. Beberapa rekomendasi telah diberikan, mempertimbangkan riwayat tontonan kamu.

**Rekomendasi:**

1. **Descendants of the Sun** (Serial Korea)
2. **The Moon** (Film Korea)
3. **The 100** (Serial Barat)
4. **Bad Teacher** (Film Barat)
5. **Resep Masak Dapur Abu Tosca** (Konten Kuliner)

**Penjelasan Relevansi:**

**Descendants of the Sun:** Bertemakan kisah cinta, seperti rekomendasi sebelumnya.

**The Moon:** Bergenre petualangan, seperti rekomendasi sebelumnya.

**The 100:** Bertemakan bertahan hidup, seperti rekomendasi sebelumnya.

**Bad Teacher:** Bertemakan kehidupan sekolah, seperti rekomendasi sebelumnya.

**Resep Masak Dapur Abu Tosca:** Bertemakan memasak, meski dengan konteks berbeda.


In [57]:
subsequent_query = """Mengapa kamu memberikan rekomendasi Descendants of the Sun?"""
prompt_user_follow_up = ChatPromptTemplate.from_template("""You are recommendation engine, user ask question again about your answer. Here is user question: {subsequent_query}
Please answer user question, you can re-rank or explain based on user question.
Previous Prompt:
{response}""")
chain_user_follow_up = prompt_user_follow_up | model | output_parser
response = chain_user_follow_up.invoke({"subsequent_query": subsequent_query, "response": previous_response})
print(response)

Saya merekomendasikan "Descendants of the Sun" karena memiliki elemen-elemen yang mirip dengan preferensi menonton Anda sebelumnya, yaitu:

* **Kisah Cinta:** Serial ini berfokus pada kisah cinta yang berkembang antara seorang tentara dan seorang dokter.
* **Adegan Aksi:** Sebagai serial bertema militer, "Descendants of the Sun" memiliki banyak adegan aksi yang menegangkan.
* **Drama yang Menyentuh:** Serial ini juga mengeksplorasi tema-tema seperti persahabatan, pengorbanan, dan rasa kehilangan, yang dapat membangkitkan emosi Anda.
