In [1]:
import pandas as pd
from google.cloud import aiplatform
from tqdm._tqdm_notebook import tqdm_notebook

aiplatform.init(
    project='vidio-quiz-prod',
    location='asia-southeast1',
    staging_bucket='gs://genai_hackathon_2024',
)
model = aiplatform.Endpoint("7738653107357220864")
tqdm_notebook.pandas()

def embedding_text(model, text):
    prediction = model.predict(instances=[{
        "content": text,
        "task_type": "DEFAULT",
        "title": ""
    }])
    for embedding in prediction.predictions:
        vector = embedding
    return vector

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook


In [85]:
from langchain_google_vertexai import VertexAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate

model = VertexAI(model_name="gemini-pro")

route_chain = (
    PromptTemplate.from_template(
        """
You are chatbot for Vidio OTT platform        
Given the user question below, classify it as either being about `customer service`, recommendation`, or `other`.

Do not respond with more than one word.

<user_query>
{user_query}
</user_query>

Classification:
"""
    ) | model | StrOutputParser() 
) 

def route(user_query):
    return route_chain.invoke({"user_query": user_query})

template_general = """Given a user's query on any topic, use your extensive database and understanding of various subjects to provide a clear, accurate, and helpful answer.
Prioritize directness and relevance in your response, ensuring it is informative and accessible to the user. If the question falls outside your expertise, offer guidance on where or how they might find the desired information.
Always communicate in a friendly and professional tone, fostering a positive user experience. Please answer according to user's query.

User Query: {user_query}
Answer: """
prompt_general = PromptTemplate.from_template(template_general)
chain_general = prompt_general | model | StrOutputParser()

def ask_general_question(user_query, user_id=0):
    final_response = chain_general.invoke({"user_query": user_query})
    return final_response

template_need_vector_search = """You are Chat Recommendation Engine, your job is to decide if we need to summarize User Query and look up to Vector Search or not.
User Query: {user_query}
Summarize the User Query as context to look up to Vector Search. If there is no context need to look up, you can answer with empty."""
prompt_need_vector_search = PromptTemplate.from_template(template_need_vector_search)
chain_need_vector_search = prompt_need_vector_search | model | StrOutputParser()

def summarize_query_vector_search(user_query):
    return chain_need_vector_search.invoke({"user_query": user_query})

In [52]:
df = pd.read_excel('data/history.xlsx')

In [16]:
def paginate_dataframe(df, page=1, items_per_page=10):
    """
    Paginate a DataFrame.

    Parameters:
    - df: Pandas DataFrame to paginate.
    - page: The page number (1-indexed).
    - items_per_page: Number of items per page.

    Returns:
    - A portion of the DataFrame corresponding to the page and items_per_page.
    """
    # Calculate start and end indices
    start = (page - 1) * items_per_page
    end = start + items_per_page
    
    # Use iloc to select the desired rows
    return df.iloc[start:end]

In [97]:
paginate_dataframe(df, page=1)

Unnamed: 0.1,Unnamed: 0,session_id,user_id,datetime,query,response
0,0,715,30220883,2024-03-15 14:39:13.152,laper nih,"Maaf, saya tidak dapat menjawab pertanyaan itu..."
1,1,715,30220883,2024-03-15 14:39:32.043,saya bosan ingin menonton,* Judul: **Doa Mengancam**\n\n Kenapa kamu su...
2,2,715,30220883,2024-03-15 14:39:46.408,gw lagi pengen nonton horror,* Judul: **Sleep**\n\n Kenapa kamu suka: Film...
3,3,715,30220883,2024-03-15 14:40:02.970,gw lagi pengen lucu,* Judul: **Gila Lu Ndro!**\n\n Kenapa kamu su...
4,4,715,30220883,2024-03-15 14:40:17.316,yang xedih,* Judul: **D'Academy Asia 6**\n\n Kenapa kamu...
5,5,715,30220883,2024-03-15 14:40:37.745,film porno xxx,"I'm sorry, but I'm unable to provide any infor..."
6,6,715,30220883,2024-03-15 14:40:51.775,film netflix,"Sure, here are some films that are currently a..."
7,7,712,46808216,2024-03-15 14:38:08.998,di mana saya bisa nonton film gratis,"Maaf, tampaknya ada kesalahan sistem yang tida..."
8,8,712,46808216,2024-03-15 14:38:18.577,di mana saya bisa nonton film gratis?,"Maaf, saya tidak seharusnya memberikan tanggap..."
9,9,712,46808216,2024-03-15 14:38:49.189,di mana saya bisa nonton series gratis?,"Wah, pertanyaan bagus! Namun saat ini, fokus s..."


In [36]:
history_df = df.iloc[0:500]

In [98]:
paginate_dataframe(history_df, page=1)

Unnamed: 0.1,Unnamed: 0,session_id,user_id,time,query,response,routing
0,0,661,85126055,10:21:45,Berapa harga langganan paket di vidio?,**Vidio Premier League Mobile (khusus apps mob...,customer service
1,1,658,85126055,10:20:51,"Saya ingin berlangganan di Vidio, ada paket ap...","Wah, pertanyaan bagus! Namun saat ini, fokus s...",customer service
2,2,653,140666214,10:19:47,give me a recommendation series,"Maaf, tampaknya ada kesalahan sistem yang tida...",recommendation
3,3,653,140666214,10:20:35,Berikan saya rekomendasi series action,"Maaf, tampaknya ada kesalahan sistem yang tida...",recommendation
4,4,645,30489874,10:16:19,Filmnya Zoey Deutch yang tentang mental illnes...,* Judul: **Ek Thi Daayan**\n\n Kenapa kamu su...,recommendation
5,5,645,30489874,10:18:25,ada film yang jadi nominasi Oscar 2022 tidak d...,"Maaf, saya tidak memiliki akses ke informasi r...",other
6,6,645,30489874,10:20:34,Apa film dari Martin Scorsese gak di Vidio?,Berikut daftar film Martin Scorsese yang tidak...,other
7,7,641,30940591,10:13:35,games yang paling seru di vidio,"Wah, pertanyaan bagus! Namun saat ini, fokus s...",recommendation
8,8,641,30940591,10:16:47,vidio arcade di vidio,"Maaf, saya tidak tahu apa-apa tentang topik in...",other
9,9,641,30940591,10:17:26,arcade games seru di vidio,"Wah, pertanyaan bagus! Namun saat ini, fokus s...",recommendation


In [39]:
history_df['routing'] = history_df['query'].progress_apply(lambda x: route(x))

  0%|          | 0/500 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  history_df['routing'] = history_df['query'].progress_apply(lambda x: route(x))


In [96]:
history_df[history_df['query'].str.contains("Ada film dari Martin Scorsese gak di Vidio?", case=False, na=False)][['response', 'routing']].values

array([], shape=(0, 2), dtype=object)

In [95]:
route('Ada film dari Martin Scorsese gak di Vidio?')

'recommendation'

In [106]:
ask_general_question('Ada film dari Martin Scorsese gak di Vidio?')

'Ya, ada beberapa film Martin Scorsese yang tidak tersedia di Vidio, di antaranya:\n\n* Hugo\n* Gangs of New York\n* The Age of Innocence\n* After Hours\n* Bringing Out the Dead'

In [94]:
summarize_query_vector_search("Filmnya Zoey Deutch yang tentang mental illness atau bullying itu apa judulnya? Ada di Vidio gak?")

'Mental illness or bullying films starring Zoey Deutch'

In [105]:
df[(df['query'].str.contains("Martin Scorsese", case=False, na=False))]['response'].values

array(["Berikut daftar film Martin Scorsese yang tidak tersedia di platform streaming Vidio:\n\n* After Hours (1985)\n* Alice Doesn't Live Here Anymore (1974)\n* Bringing Out the Dead (1999)\n* Casino (1995)\n* The Color of Money (1986)\n* The Departed (2006)\n* Gangs of New York (2002)\n* Hugo (2011)\n* Kundun (1997)\n* The Last Temptation of Christ (1988)\n* Shutter Island (2010)\n* Silence (2016)\n* Taxi Driver (1976)\n* Who's That Knocking at My Door (1967)"],
      dtype=object)