In [None]:
%pip install google-genai
# %pip install os
%pip install dotenv
%pip install pydantic

In [None]:
from google import genai
from dotenv import load_dotenv
from google.genai import types
from pydantic import BaseModel, Field
from typing import Literal
import os

In [None]:
def create_vertexai_client():
 
    
    cloud_api_key = os.getenv("GOOGLE_CLOUD_API_KEY")
    if not cloud_api_key:
        raise ValueError("GOOGLE_CLOUD_API_KEY not found in .env file")
    
    # Configure the client with your API key
    client = genai.Client(
        vertexai=True, 
        api_key=cloud_api_key, 
    )

    return client

In [None]:
load_dotenv()

# Configure the client with your API key
client = create_vertexai_client()

In [None]:
class PlayerField(BaseModel):
    value: str | int | float
    source_quote: str
    uri: str | None = Field(None, description="The EXACT, UNEDITED URL provided by the tool. Do not guess or shorten. None if the source is internal training data")
    source_type: Literal["GOOGLE_SEARCH", "URL_CONTEXT", "INTERNAL_KNOWLEDGE"] = Field(None, description="Categorize the source of this specific field. Must not be None when uri is not null.")

class PlayerStats(BaseModel):
    name: str
    net_worth: PlayerField = Field(..., description="Net worth of the PL player.")
    is_professional_player: bool = Field(..., description="Must be True if found in PL records, False otherwise")
    verification_status: str = Field(..., description="Explanation of where the data was found or why it failed")
    height: PlayerField
    shirt_number: PlayerField
    preferred_foot: PlayerField
    goals: PlayerField
    goal_assists: PlayerField
    appearances: PlayerField
    minutes_played: PlayerField

In [None]:
def get_player_stats(player: str) -> types.GenerateContentResponse:
        
    url_list = [
        "https://www.premierleague.com/en/players/141746/bruno-fernandes/stats",
        "https://www.premierleague.com/en/players/223094/erling-haaland/stats",
        "https://www.premierleague.com/en/players/97032/virgil-van-dijk/stats",
        "https://www.premierleague.com/en/players/244851/cole-palmer/stats"
    ]

    urls = "\n".join(url_list)

    prompt = f"""
        **OBJECTIVE:**
        Search and identify the Premier League 2025/2026 Player Statistics of {player}.
        
        ---

        ### **1. DYNAMIC SOURCE IDENTIFICATION**
        1.  **IF a Premier League URL is provided:**
            *   You **MUST** execute the `url_context` tool first. This is your **Primary Source**.
        2.  **IF NO URL is provided (or if the player is non-PL):**
            *   The **Web Citations** (Google Search results) become your **Primary Source**. 
        3.  **PRIORITY:** Official URL > Web Citations > Internal Training Data.

        ### **2. MANDATORY SOURCE_TYPE CLASSIFICATION RULES**
        You are strictly forbidden from returning `null` for `source_type` if a `uri` is present.
        *   **MATCHING RULE:** If the `uri` matches one of the URLs provided below, you MUST use "URL_CONTEXT".
        *   **SEARCH RULE:** If the `uri` is a search result (e.g., Transfermarkt, Wikipedia, vertexaisearch links), you MUST use "GOOGLE_SEARCH".
        *   **FALLBACK RULE:** If no tool found the data and you use internal memory, `uri` must be `null` and `source_type` must be "INTERNAL_KNOWLEDGE".

        ### **3. INACTIVE / NON-PROFESSIONAL PLAYER LOGIC**
        If the player cannot be found in active professional records for the 2025/26 season:
        *   `is_professional_player`: `false`.
        *   **All Numeric Fields:** `{{"value": 0, "source_quote": null, "uri": null, "source_type": null}}`.
        *   **All String Fields:** `{{"value": "n/a", "source_quote": null, "uri": null, "source_type": null}}`.
        *   **Verification Status:** "Player not found in active professional databases."

        ### **4. URI EXTRACTION RULES (STRICT):**
        1.  **NO GUESSING:** You are strictly forbidden from constructing, autocompleting, or guessing a URL based on the website name. 
        2.  **LITERAL COPY:** You must copy the `uri` exactly as it appears in the search result that provided the `source_quote`. 
        3.  **THE JOIN RULE:** Before finalizing the JSON, verify that the `source_quote` actually appears in the content/snippet associated with the `uri` you provided.
        4.  **IF IN DOUBT:** If you found a fact in your training data but cannot find a specific, working URI for it in the search results, you MUST set `source_type` to `INTERNAL_KNOWLEDGE` and `uri` to `null`.

        ### **5. DATA VALIDATION & AUDIT**
        *   **`net_worth`**: Must be a string (e.g., `100 million dollars`).
        *   **`height`**: Must be a float (e.g., `1.85`).
     
        ### PROVIDED URLS:
        { urls }

        ### OUTPUT FORMAT:
        Return a JSON object exactly as follows:
        ```json
        {{
            "name": "string",
            "net_worth": {{ "value": "string", "source_quote": "...", "uri": "...", "source_type": "Google Search" }},
            "is_professional_player": boolean,
            "verification_status": "Detailed confirmation of Premier League status for 2025/26",
            "height": {{ "value": float, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "shirt_number": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "preferred_foot": {{ "value": "string", "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "goals": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "goal_assists": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "appearances": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "minutes_played": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }}
        }}
        ```
    """

    response = client.models.generate_content(
        model='gemini-3-flash-preview',
        contents=types.Content(
            role="user",
            parts=[types.Part(text=prompt)]
        ),
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_json_schema=PlayerStats.model_json_schema(),
            thinking_config=types.ThinkingConfig(
                thinking_level=types.ThinkingLevel.HIGH,
            ),
            tools=[
                types.Tool(url_context=types.UrlContext()),
                types.Tool(google_search=types.GoogleSearch()),
            ]
        )
    )
                        
    return response

In [None]:
def clean_json_string(raw_string):
    # Remove the markdown code blocks
    clean_str = raw_string.strip()
    if clean_str.startswith("```json"):
        clean_str = clean_str[7:]
    if clean_str.endswith("```"):
        clean_str = clean_str[:-3]
    return clean_str.strip()

In [None]:
def print_player_stats(response: types.GenerateContentResponse):
    if response.parsed:
        player_stats = PlayerStats.model_validate(response.parsed)
    else:
        player_stats = PlayerStats.model_validate_json(clean_json_string(response.text))

    print(player_stats.model_dump_json(indent=2))

In [None]:
response = get_player_stats(player="Erling Haaland")
print_player_stats(response=response)

In [None]:
# response = get_player_stats(player="Bruno Fernandes")
# print_player_stats(response=response)

In [None]:
# response = get_player_stats(player="Leny Yoro")
# print_player_stats(response)

In [None]:
# response = get_player_stats(player="Kaoru Mitoma")
# print_player_stats(response)