In [7]:
import requests
import os
import dotenv
dotenv.load_dotenv()

True

In [12]:
api_key = os.environ.get("VIDIO_X_API_AUTH", "")
params = {
    'content_size': 20,
    'filter[state]': 'upcoming'
}
headers = {
    'x-api-key': api_key,
    'x-secure-level': '2', 
    'x-api-platform': 'app-js', 
    'x-api-app-info': 'js/www.vidio.com', 
    'accept': 'application/json'
}
response = requests.get(f"https://api.vidio.com/sport_events", params=params, headers=headers)

In [13]:
def join_included_data(data, included):
    included_map = {(item['type'], item['id']): item for item in included}
    
    for item in data:
        for relation_key, relation_value in item.get('relationships', {}).items():
            relation_data = relation_value.get('data')
            if relation_data:
                key = (relation_data['type'], relation_data['id'])
                if key in included_map:
                    item[relation_key] = included_map[key]['attributes']
    
    return data

# Assuming 'response' is your JSON object from above
print(response)
data = response.json()['data']
included = response.json()['included']
result = join_included_data(data, included)

<Response [200]>


In [14]:
from datetime import datetime


def parse_result(result):
    date_format = '%Y-%m-%dT%H:%M:%S%z'
    output_date_format = '%d %B %Y %H:%M'
    return (
        f"{result['home_team']['name']}"
        " vs "
        f"{result['away_team']['name']}"
        " - "
        f"{datetime.strptime(result['attributes']['start_time'], date_format).strftime(output_date_format)}"
    )

parsed_result = [parse_result(i) for i in result]

In [15]:
parsed_result

['ACF Fiorentina vs Viktoria Plzeň - 18 April 2024 23:45',
 'Lille OSC Métropole vs Aston Villa - 18 April 2024 23:45',
 'Fenerbahce S.K. vs Olympiacos F.C. - 19 April 2024 02:00',
 'PAOK FC vs Club Brugge KV - 19 April 2024 02:00',
 'AS Roma vs A.C. Milan - 19 April 2024 02:00',
 'Atalanta B.C. vs Liverpool - 19 April 2024 02:00',
 'Olympique de Marseille vs S.L. Benfica - 19 April 2024 02:00',
 'West Ham United vs Bayer 04 Leverkusen - 19 April 2024 02:00',
 'Bima Perkasa Jogja vs Pelita Jaya Bakrie Jakarta - 19 April 2024 18:00',
 'Prawira Harum Bandung vs Bali United Basketball - 19 April 2024 20:00',
 'Amartha Hangtuah Jakarta vs Kesatria Bengawan Solo - 20 April 2024 14:00',
 'Dewa United Banten vs Rajawali Medan - 20 April 2024 16:00',
 'Satria Muda Pertamina Jakarta vs Satya Wacana Salatiga - 20 April 2024 18:00',
 'Prawira Harum Bandung vs RANS Simba Bogor - 20 April 2024 20:00',
 'Borneo Hornbills vs Pelita Jaya Bakrie Jakarta - 21 April 2024 14:00',
 'Dewa United Banten vs B

In [43]:
from langchain_google_vertexai import VertexAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnableLambda
import vertexai


vertexai.init(project="vidio-quiz-prod", location="asia-southeast1")
model = VertexAI(model_name="gemini-pro", temperature=0.4)
joined_result = "\n".join(parsed_result)

template_schedule_question = """"Given the following list of matches with their scheduled dates:

{context}

Considering today's date is {today}, and given the user's question: "{user_question}", please identify and list only the future matches that involve the team mentioned in the user's question. Answer you don't know if no match that you identify from user_question.
"""

prompt_schedule = ChatPromptTemplate.from_template(template_schedule_question)
chain_schedule = prompt_schedule | model | StrOutputParser()

In [46]:
today = '03 April 2024'
chain_schedule.invoke({"user_question": "kapan liverpool berlaga dan melawan siapa?", "context": joined_result, "today": today})

'05 April 2024 - Liverpool vs Sheffield United FC\n07 April 2024 - Manchester United vs Liverpool'

In [35]:
from google.cloud import storage

def upload_blob(bucket_name, source_file_name, destination_blob_name):
  storage.blob._DEFAULT_CHUNKSIZE = 35 * 1024 * 1024  # 35 MB
  storage.blob._MAX_MULTIPART_SIZE = 35 * 1024 * 1024  # 35 MB
  
  storage_client = storage.Client()
  bucket = storage_client.bucket(bucket_name)
  blob = bucket.blob(destination_blob_name)

  blob.upload_from_filename(source_file_name)

  print(
      "File {} uploaded to {}.".format(
          source_file_name, destination_blob_name
      )
  )

with open('data/schedule.txt', 'w') as file:
  # Writing the string to the file
  file.write(joined_result)


upload_blob("genai_hackathon_2024", "data/schedule.txt", "data/schedule.txt")

File data/schedule.txt uploaded to data/schedule.txt.


In [64]:
from base64 import b64encode
import json
import google.auth
import google.auth.transport.requests
creds, project = google.auth.default()

# creds.valid is False, and creds.token is None
# Need to refresh credentials to populate those

auth_req = google.auth.transport.requests.Request()
creds.refresh(auth_req)
token = creds.token


project_id = "vidio-quiz-prod"
location = "global"
data_store_id = "vidio-info-v3_1711610635224"


In [63]:
def reindex_from_gcs(token, project_id, location, data_store_id, gcs_url):
    headers={
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json"
    }
    data={
      "reconciliationMode": "INCREMENTAL",
      "gcsSource":{
        "inputUris": [
          gcs_url
        ],
        "dataSchema": "content"
      }
    }
    response = requests.post(
        f"https://discoveryengine.googleapis.com/v1beta/projects/{project_id}/locations/{location}/collections/default_collection/dataStores/{data_store_id}/branches/0/documents:import",
        headers=headers,
        json=data
    )
    return response

In [65]:
reindex_from_gcs(token, project_id, location, data_store_id, "gs://genai_hackathon_2024/data/schedule.txt")

<Response [200]>

In [61]:
from hashlib import sha256


sha256("gs://genai_hackathon_2024/data/vidio_info/2024-03-28b/43000713400.html".encode('utf-8')).hexdigest()[:32]

'762b7cb3d9c418b5722d707a61aa3f72'