In [1]:

import vertexai
from vertexai.language_models import TextEmbeddingModel
from annoy import AnnoyIndex
import pandas as pd

import requests
import os
from dotenv import load_dotenv
import json
from IPython.display import display, Markdown

from langchain_google_vertexai import VertexAI
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableBranch

from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()
load_dotenv()

vertexai.init(project="vidio-quiz-prod", location="asia-southeast1")
embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko-multilingual")
model = VertexAI(model_name="gemini-pro")

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook


In [16]:
history_df = pd.read_excel('data/route.xlsx')
test_df = history_df.copy()

history_df['actual_route'] = history_df.apply(lambda x: x["history_route"], axis=1)
history_df['is_tp'] = history_df.apply(lambda x: 1 if x["actual_route"] == x["expected_route"] else 0, axis=1)
history_df = history_df.drop('history_route', axis=1)

test_df = test_df.drop('history_route', axis=1)

In [31]:
prompt_1 = """You are chatbot for Vidio OTT platform
Given the user question below, classify it as either being about
`recommendation` if question related to user want film recommendation
`vidio` if question related to vidio
`other` otherwise

Do not respond with more than one word.

<user_query>
{user_query}
</user_query>

Classification:
"""

prompt_2 = """You are chatbot for Vidio OTT platform
Given the user question below, classify it as either being about
`recommendation` if question related to user want film recommendation, not games or acade
`customer service` if question related to user problem and trouble
`vidio` if question related about vidio product, feature, faq, film, games, or arcade
`other` otherwise

Do not respond with more than one word.

Input:
<user_query>
{user_query}
</user_query>
Classification:
"""

prompt_3 = """You are chatbot for Vidio OTT platform
Given the user question below, classify it as either being about
`recommendation` if question related to user want film recommendation, not games or acade
`customer service` if question related to user problem and trouble
`vidio` if question related about vidio product, feature, faq, film, games, or arcade
`other` otherwise

Do not respond with more than one word.


Input: bagaimana topup energy
Classification: vidio

Input: berapa harga paket diamond
Classification: vidio

Input: kenapa aliza dijodohkan
Classification: vidio

Input: Tidak bisa menonton epl padahal sudah bayar
Classification: customer service

Input: pembayaran gagal
Classification: customer service

Input: Lagi patah hati nih
Classification: recommendation

Input: film yang dibintangi wulan guritno
Classification: recommendation

Input: film terbaru di vidio
Classification: recommendation

Input: saya mau nonton santri pilihan bunda
Classification: recommendation

Input: 1 + 1
Classification: other

Input: apa ibukota indonesia
Classification: other

Input:
<user_query>
{user_query}
</user_query>
Classification:
"""

In [52]:

user_query = "bagaimana mendaftar paket mahasiswa"

route_chain = (
    PromptTemplate.from_template(
        prompt_3
    ) | model | StrOutputParser() 
) 

resp = route_chain.invoke({"user_query": user_query}) 

def get_route(query):
    return route_chain.invoke({"user_query": query}) 

resp

'vidio'

In [53]:
test_df['actual_route'] = test_df.progress_apply(lambda x: get_route(x["query"]), axis=1)
test_df['is_tp'] = test_df.apply(lambda x: 1 if x["actual_route"] == x["expected_route"] else 0, axis=1)
test_df

  0%|          | 0/73 [00:00<?, ?it/s]

Unnamed: 0,query,expected_route,actual_route,is_tp
0,berapa harga paket mahasiswa,vidio,vidio,1
1,berapa harga paket mahasiswa,vidio,vidio,1
2,bagaimana topup energy,vidio,vidio,1
3,tontonan patah patah,customer service,vidio,0
4,bayar vidio bisa pakai apa saja,vidio,vidio,1
...,...,...,...,...
68,saya ingin menonton film romance tapi ada acti...,recommendation,recommendation,1
69,apa anime terpopuler sekarang,recommendation,recommendation,1
70,lagi mood sedih\n,recommendation,recommendation,1
71,mau nonton film sedih,recommendation,recommendation,1


In [57]:
# prompt_3
test_df[test_df['is_tp'] == 0]

Unnamed: 0,query,expected_route,actual_route,is_tp
3,tontonan patah patah,customer service,vidio,0
23,siapa Aditya Haikal,other,vidio,0
30,episode berapa saat ayah Lasja tewas ditembak ...,vidio,recommendation,0
32,film cinta pertama ayah bisa di tonton segala ...,vidio,recommendation,0
34,tears of the sun menceritakan tentang apa?,vidio,recommendation,0
35,Is John Wick available in Vidio?,recommendation,vidio,0
56,apa yang baru di vidio?,recommendation,vidio,0
57,apa tontonan di Vidio?,recommendation,vidio,0


In [56]:
# prompt_3
test_df[['is_tp']].describe()

Unnamed: 0,is_tp
count,73.0
mean,0.890411
std,0.314539
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [48]:
# prompt_2
test_df[test_df['is_tp'] == 0]

Unnamed: 0,query,expected_route,actual_route,is_tp
2,bagaimana topup energy,vidio,other,0
5,berapa harga pake mahasiswa,vidio,customer service,0
23,siapa Aditya Haikal,other,vidio,0
32,film cinta pertama ayah bisa di tonton segala ...,vidio,recommendation,0
35,Is John Wick available in Vidio?,recommendation,vidio,0
37,Ada film dari Martin Scorsese gak di Vidio?,recommendation,vidio,0
40,Saya mau nonton ratu adil,recommendation,vidio,0
41,Saya mau nonton film dian sastro,recommendation,vidio,0
44,film yang dibintangi dian sastro,recommendation,vidio,0
56,apa yang baru di vidio?,recommendation,vidio,0


In [50]:
# prompt_2
test_df[['is_tp']].describe()

Unnamed: 0,is_tp
count,73.0
mean,0.808219
std,0.396426
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [47]:
sample_df = test_df[test_df['is_tp'] == 0].copy()
test_df[test_df['is_tp'] == 0]


Unnamed: 0,query,history_route,expected_route,actual_route,is_tp
2,bagaimana topup energy,customer service,customer service,other,0
10,apa itu vidio arcade\t,other,customer service,other,0
13,apa saja paket yang bisa di tonton di semua pe...,recommendation,customer service,other,0
19,1 + 1,other,other,Other,0
31,games yang paling seru di vidio?,recommendation,other,recommendation,0
32,tears of the sun menceritakan tentang apa?,recommendation,other,recommendation,0
33,Is John Wick available in Vidio?,other,recommendation,other,0
34,Lagi patah hati nih,other,recommendation,other,0
45,lagi mood sedih2,recommendation,recommendation,other,0
56,"I want to watch movie ""How to Catch a Dragon""....",recommendation,recommendation,customer service,0


In [48]:


user_query = "bagaimana mendaftar paket mahasiswa"

route_chain = (
    PromptTemplate.from_template(
        """
You are chatbot for Vidio OTT platform        
Given the user question below, classify it as either being about `customer service`, `recommendation`, or `other`.
Only classify to recommendation if user ask for reccomendation

Do not respond with more than one word.

<user_query>
{user_query}
</user_query>

Classification:
"""
    ) | model | StrOutputParser() 
) 

resp = route_chain.invoke({"user_query": user_query}) 

def get_route(query):
    return route_chain.invoke({"user_query": query}) 

sample_df['actual_route'] = sample_df.progress_apply(lambda x: get_route(x["query"]), axis=1)
sample_df['is_tp'] = sample_df.apply(lambda x: 1 if x["actual_route"] == x["expected_route"] else 0, axis=1)

sample_df

  0%|          | 0/11 [00:00<?, ?it/s]

Unnamed: 0,query,history_route,expected_route,actual_route,is_tp
2,bagaimana topup energy,customer service,customer service,customer service,1
10,apa itu vidio arcade\t,other,customer service,other,0
13,apa saja paket yang bisa di tonton di semua pe...,recommendation,customer service,recommendation,0
19,1 + 1,other,other,other,1
31,games yang paling seru di vidio?,recommendation,other,recommendation,0
32,tears of the sun menceritakan tentang apa?,recommendation,other,other,1
33,Is John Wick available in Vidio?,other,recommendation,other,0
34,Lagi patah hati nih,other,recommendation,other,0
45,lagi mood sedih2,recommendation,recommendation,recommendation,1
56,"I want to watch movie ""How to Catch a Dragon""....",recommendation,recommendation,customer service,0


In [49]:
test_df_2 = test_df.copy()
test_df_2['actual_route'] = test_df_2.progress_apply(lambda x: get_route(x["query"]), axis=1)
test_df_2['is_tp'] = test_df_2.apply(lambda x: 1 if x["actual_route"] == x["expected_route"] else 0, axis=1)
test_df_2[['is_tp']].describe()

  0%|          | 0/70 [00:00<?, ?it/s]

Unnamed: 0,is_tp
count,70.0
mean,0.828571
std,0.379604
min,0.0
25%,1.0
50%,1.0
75%,1.0
max,1.0


In [50]:
test_df_2[test_df_2['is_tp'] == 0]

Unnamed: 0,query,history_route,expected_route,actual_route,is_tp
0,berapa harga paket mahasiswa,customer service,customer service,other,0
2,bagaimana topup energy,customer service,customer service,other,0
5,berapa harga pake mahasiswa,customer service,customer service,other,0
10,apa itu vidio arcade\t,other,customer service,other,0
11,bagaimana topup energy\t,other,customer service,other,0
12,berapa device yang bisa nonton sekaligus,other,customer service,other,0
13,apa saja paket yang bisa di tonton di semua pe...,recommendation,customer service,other,0
18,3* 38,other,other,Other,0
31,games yang paling seru di vidio?,recommendation,other,recommendation,0
33,Is John Wick available in Vidio?,other,recommendation,other,0


In [1]:


user_query = "bagaimana mendaftar paket mahasiswa"

route_chain = (
    PromptTemplate.from_template(
        """
You are chatbot for Vidio OTT platform. Your task is to answer to user question
Question can is related to vidio. Like about vidio product, and film.
Or question can be unrelated to vidio

Here are the useful information:
<information>
{information} 
</information>

User Question:
<user_query>
{user_query}
</user_query>

Classification:
"""
    ) | model | StrOutputParser() 
) 

resp = route_chain.invoke({"user_query": user_query}) 

def get_route(query):
    return route_chain.invoke({"user_query": query}) 

resp

NameError: name 'PromptTemplate' is not defined