In [None]:
%pip install google-genai
%pip install os
%pip install dotenv
%pip install pydantic

In [None]:
from google import genai
from dotenv import load_dotenv
from google.genai import types
from enum import Enum
from pydantic import BaseModel, Field

In [None]:
def create_vertexai_client():
    import os
    
    cloud_api_key = os.getenv("GOOGLE_CLOUD_API_KEY")
    if not cloud_api_key:
        raise ValueError("GOOGLE_CLOUD_API_KEY not found in .env file")
    
    # Configure the client with your API key
    client = genai.Client(
        vertexai=True, 
        api_key=cloud_api_key, 
    )

    return client

In [None]:
load_dotenv()

# Configure the client with your API key
client = create_vertexai_client()

In [None]:
def get_citations(response: types.GenerateContentResponse) -> list[str]:

    citations: list[str] = []
    if response.candidates is not None and len(response.candidates) > 0:
        candidate = response.candidates[0]
        if candidate.grounding_metadata:
            grounding_supports = candidate.grounding_metadata.grounding_supports or []
            grounding_chunks = candidate.grounding_metadata.grounding_chunks or []
            num_chunks = len(grounding_chunks)
            for support in grounding_supports:
                indices = support.grounding_chunk_indices or []
                for chunk_id in indices:
                    chunk = grounding_chunks[chunk_id] if chunk_id >= 0 and chunk_id < num_chunks else None
                    web_uri = chunk.web.uri if chunk.web is not None else None
                    if web_uri:
                        citations.append(web_uri)

    return citations

In [None]:
class MaritalStatus(Enum):
    SINGLE = "SINGLE"
    MARRIED = "MARRIED"
    DIVORCED = "DIVORCED"
    WIDOWED = "WIDOWED"
    UNKNOWN = "UNKNOWN"

class PlayerStats(BaseModel):
    name: str
    marital_status: MaritalStatus = Field(..., description="Marital status of a player")
    current_club: str
    is_professional_player: bool = Field(..., description="Must be True if found in PL records, False otherwise")
    verification_status: str = Field(..., description="Explanation of where the data was found or why it failed")
    position: str
    goals: int
    goal_assists: int
    total_shots: int  
    total_passes: int
    dribbles: int
    appearances: int
    total_tackles: int
    minutes_played: int

class Retrieved_Url_Status(BaseModel):
    url: str | None
    status: types.UrlRetrievalStatus | None

In [None]:
def get_url_context_metadata(response: types.GenerateContentResponse) -> list[Retrieved_Url_Status]:

    if response.candidates is not None and len(response.candidates) > 0:
        candidate = response.candidates[0]
        if candidate.url_context_metadata and candidate.url_context_metadata.url_metadata:
            url_metadata = candidate.url_context_metadata.url_metadata
            retrieved_url_status_list = [
                Retrieved_Url_Status(url=item.retrieved_url, status=item.url_retrieval_status) for item in url_metadata
            ]
            return retrieved_url_status_list
            
    return []

In [None]:
def get_player_stats(player: str) -> types.GenerateContentResponse:
        
    stats = [
                "goals", "goalAssists", "appearances", "totalTackles",
                "totalShots",  "totalPasses",
                "timePlayed"
            ]
    
    stat_description = [
        "goals", "assists", "Appearances", "Total Tackles",
        "Total Shots", "Total Passes",
        "Minutes Played"
    ]

    url_list = [
        f"URL {i} ({stat_description[i]}): https://www.premierleague.com/en/stats/top/players?statMetric={stat}&season=2025" 
                for i, stat in enumerate(stats)
    ]
    urls = "\n".join(url_list)

    prompt = f"""
        Please get the season 2025/2026 player stats of a player from the following URLs.
        You MUST use the URL Context tool to deep-read these specific links as your primary source of truth. 
        Only use the Google Search tool if the provided URLs are unreachable or do not contain the specific data required to satisfy the JSON schema.
        
        STRICT INSTRUCTIONS:
        - If the player is NOT a professional Premier League player or no data exists for the 2025/2026 season, 
        set all numeric fields to 0, is_professional_player to False and "current_club" to "Unknown/Not Found".
        - DO NOT invent or hallucinate statistics. 
        - Use the "verification_status" field in the JSON to explain if the player was found or not.

        URLs:
        {urls}

        Player:
        {player}

        Output:
        ```json
        {{
            "name": "<name of the player>"
            "marital_status": "<Marital status>"
            "current_club": "<player's club>"
            "is_professional_player": "<whether or not it is a player>"
            "verification_status": "<verification status>"
            "position": "<player's position>
            "goals": "<number of goals>"
            "goal_assists": "<number of assists>
            "total_shots": "<number of total shots>"
            "total_passes": "<number of total passes>"
            "dribbles": "<number of dribbles>"
            "appearances": "<number of appearances>"
            "total_tackles": "<number of tackles>"
            "minutes_played": "<number of minutes played>"
        }}
        ```
        
        If a stat is 0, record it as 0. Do not leave fields blank.
        return the response in the JSON object specified above
    """

    response = client.models.generate_content(
        model='gemini-3-flash-preview',
        contents=types.Content(
            role="user",
            parts=[
                types.Part(text=prompt)
            ]
        ),
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_json_schema=PlayerStats.model_json_schema(),
            tools=[
                types.Tool(url_context=types.UrlContext()),
                types.Tool(google_search=types.GoogleSearch()),
            ]
        )
    )
                        
    return response

In [None]:
def clean_json_string(raw_string):
    # Remove the markdown code blocks
    clean_str = raw_string.strip()
    if clean_str.startswith("```json"):
        clean_str = clean_str[7:]
    if clean_str.endswith("```"):
        clean_str = clean_str[:-3]
    return clean_str.strip()

def print_citations_by_response(response: types.GenerateContentResponse):
    citations = get_citations(response)
    for i, citation in enumerate(citations):
        print(f"Citation {i}: {citation}")

def print_url_context_by_response(response: types.GenerateContentResponse):
    url_metadata = get_url_context_metadata(response)
    if len(url_metadata) == 0:
        print("URL Context Tool is not triggered")
    else:
        for i, item in enumerate(url_metadata):
            print(f"URL Metadata {i}: {item.url}, status: {item.status}")

In [None]:
def print_player_stats(response: types.GenerateContentResponse):
    if response.parsed:
        player_stats = PlayerStats.model_validate(response.parsed)
    else:
        player_stats = PlayerStats.model_validate_json(clean_json_string(response.text))

    print(player_stats.model_dump_json(indent=2))

In [None]:
# response = get_player_stats(player="Connie Leung")

# player_stats= PlayerStats.model_validate_json(clean_json_string(response.text))

# print(player_stats.model_dump_json(indent=2))

In [None]:
# print_citations_by_response(response)
# print_url_context_by_response(response)

In [None]:
# response = get_player_stats(player="Erling Haaland")

# player_stats= PlayerStats.model_validate_json(clean_json_string(response.text))

# print(player_stats.model_dump_json(indent=2))

In [None]:
# print_citations_by_response(response)
# print_url_context_by_response(response)

In [None]:
response = get_player_stats(player="Bruno Fernandes")
print_player_stats(response=response)

In [None]:
print_citations_by_response(response)
print_url_context_by_response(response)

In [None]:
# response = get_player_stats(player="Alisson Becker")

# player_stats= PlayerStats.model_validate_json(clean_json_string(response.text))

# print(player_stats.model_dump_json(indent=2))

In [None]:
# print_citations_by_response(response)
# print_url_context_by_response(response)