In [20]:
import vertexai
from vertexai.language_models import TextEmbeddingModel
from annoy import AnnoyIndex
import pandas as pd

import requests
import os
from dotenv import load_dotenv
import json
from IPython.display import display, Markdown

from langchain_google_vertexai import VertexAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableBranch

load_dotenv()

vertexai.init(project="vidio-quiz-prod", location="asia-southeast1")
embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")

In [4]:
final_df = pd.read_json('./data/faq_embed.json', lines=True)

annoy_index = AnnoyIndex(768, 'dot')
for index, row in final_df.iterrows():
    annoy_index.add_item(row['id'], row['embedding'])
annoy_index.build(10)

True

In [21]:
class Larva:
    @staticmethod
    def get_play_history_film(user_id):
        api_key = os.environ.get("LARVA_APPLICATION_KEY", "")
        larva_url = os.environ.get("LARVA_URL", "http://localhost:6000")
        larva_response = requests.get(f"{larva_url}/vidio/user-play-start-history-films", params={'user_id': user_id}, headers={'key': api_key})
        return [play_history['film_id'] for play_history in json.loads(larva_response.content)['records']][:5]

class Embedding:
    def __init__(self, model) -> None:
        self.model = model

    def embedding_text(self, text):
        embeddings = self.model.get_embeddings([text])
        for embedding in embeddings:
            vector = embedding.values
            # print(f"Length of Embedding Vector: {len(vector)}")
        return vector

embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")
embedding = Embedding(embedding_model)

class VectorSearchFaq:
    def __init__(self, embedding:Embedding) -> None:
        self.embedding = embedding

        self.final_df = final_df
        self.annoy_index = annoy_index

        # self.final_df = pd.read_json('./data/faq_embed.json', lines=True)
        # self.annoy_index = AnnoyIndex(768, 'dot')
        # for index, row in self.final_df.iterrows():
        #     self.annoy_index.add_item(row['id'], row['embedding'])
        # self.annoy_index.build(10)

    def search(self, query, limit=10):
        query_vector = self.embedding.embedding_text(query)
        return self.annoy_index.get_nns_by_vector(query_vector, limit, search_k=-1)

    def lookup_md(self, ids):
        return self.final_df[self.final_df['id'].isin(ids)]['markdown'].values.tolist()

vector_search = VectorSearchFaq(embedding)

In [7]:
# ids = vector_search.search("berpa harga langganan")
# # results = vector_search.lookup_md([714399])
# results = vector_search.lookup_md(ids)
# tmp = ""
# for result in results:
#     tmp = tmp + "\n# ========="+ result
# Markdown(tmp)

In [23]:
model = VertexAI(model_name="gemini-pro")

def tanya(user_query):
    prompt = ChatPromptTemplate.from_template("""
You are customer service for vidio OTT platform.
Your task is to answer to user question

Here are the useful information:
{information} 

User question: {user_query}
    """)
    output_parser = StrOutputParser()

    chain = prompt | model | output_parser

    ids = vector_search.search(user_query)
    results = vector_search.lookup_md(ids)
    tmp = ""
    for result in results:
        tmp = tmp + "\n" + result

    return Markdown(chain.invoke({"user_query": user_query, "information": tmp}))

In [9]:
tanya("bagaimana mendaftar paket mahasiswa?")

Untuk membeli paket Vidio Platinum Mahasiswa, ikuti langkah berikut : 

1. Masuk atau daftar dengan email kampus
2. Pastikan sedang tidak memiliki paket aktif Platinum di Vidio
3. Verifikasi email-mu
4. Beli paketnya via [Paywall](https://www.vidio.com/plans?fpc=80) atau halaman [Platinum Mahasiswa](https://www.vidio.com/pages/234/paket-mahasiswa?layout=false&utm_source=product&utm_medium=faq_page&utm_campaign=platinum_mahasiswa_2023)
5. Langsung nonton di Vidio!

In [10]:
tanya("bagaimana cara main vidio arcade")

Informasi yang Anda berikan tidak menyertakan cara main Vidio Arcade, sehingga saya tidak dapat menjawab pertanyaan tersebut.

In [11]:
tanya("bagaimana cara topup energy")

Berikut cara untuk melakukan pembelian/top-up Energy:

1. Pastikan kamu sudah mempunyai akun dan login di aplikasi Vidio
2. Klik icon Energy yang ada di menu Arcade
3. Setelah klik, kamu akan diarahkan ke halaman pembelian Energy
4. Kamu bisa langsung pilih jumlah Energy yang kamu inginkan dan lakukan pembayaran
5. Setelah itu, Energy akan langsung ditambahkan ke akun kamu dan siap kamu pakai untuk bermain games Vidio Arcade!

In [60]:

def cs_chain(user_query):
    chain = (ChatPromptTemplate.from_template(
"""
You are customer service for vidio OTT platform.
Your task is to answer to user question

Here are the useful information:
{information} 

User question: {user_query}
"""
    ) | model | StrOutputParser())

    ids = vector_search.search(user_query)
    results = vector_search.lookup_md(ids)
    tmp = ""
    for result in results:
        tmp = tmp + "\n" + result

    return chain.invoke({"user_query": user_query, "information": tmp})

In [61]:

def reco_chain(user_query):
    chain = ( PromptTemplate.from_template(
"""
You are chatbot for Vidio OTT platform        
Recommend film from vidio base on user query

<user_query>
{user_query}
</user_query>

Classification:
"""
    ) | model | StrOutputParser() ) 

    return chain

In [62]:

route_chain = (
    PromptTemplate.from_template(
        """
You are chatbot for Vidio OTT platform        
Given the user question below, classify it as either being about `customer service`, recommendation`, or `other`.

Do not respond with more than one word.

<user_query>
{user_query}
</user_query>

Classification:
"""
    ) | model | StrOutputParser() 
) 

In [63]:
def run(user_query):
    branch = RunnableBranch(
        (lambda x: "customer service" in x["topic"], lambda x: cs_chain(x["user_query"])),
        (lambda x: "recommendation" in x["topic"], lambda x: reco_chain(x["user_query"])),
        reco_chain
    )
    full_chain = {"topic": route_chain, "user_query": lambda x: x["user_query"]} | branch | StrOutputParser()
    resp = full_chain.invoke({"user_query": user_query}) 

    return Markdown(resp)

In [64]:
run("bagaimana mendaftar paket mahasiswa")

Untuk membeli paket Vidio Platinum Mahasiswa, ikuti langkah berikut :

1. Masuk atau daftar dengan email kampus
2. Pastikan sedang tidak memiliki paket aktif Platinum di Vidio
3. Verifikasi email-mu
4. Beli paketnya via [Paywall](https://www.vidio.com/plans?fpc=80) atau halaman [Platinum Mahasiswa](https://www.vidio.com/pages/234/paket-mahasiswa?layout=false&utm_source=product&utm_medium=faq_page&utm_campaign=platinum_mahasiswa_2023)
5. Langsung nonton di Vidio!

In [54]:
run("saya mau nonton film action")

- Category: Genre
- Value: Action