In [3]:
%pip install google-genai
%pip install os
%pip install dotenv
%pip install pydantic


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
[31mERROR: Could not find a version that satisfies the requirement os (from versions: none)[0m[31m
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
[31mERROR: No matching distribution found for os[0m[31m
[0mNote: you may need to restart the kernel to use updated packages.

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To updat

In [4]:
from google import genai
from dotenv import load_dotenv
from google.genai import types
from pydantic import BaseModel, Field

In [6]:
def create_vertexai_client():
    import os
    
    cloud_api_key = os.getenv("GOOGLE_CLOUD_API_KEY")
    if not cloud_api_key:
        raise ValueError("GOOGLE_CLOUD_API_KEY not found in .env file")
    
    # Configure the client with your API key
    client = genai.Client(
        vertexai=True, 
        api_key=cloud_api_key, 
    )

    return client

In [7]:
load_dotenv()

# Configure the client with your API key
client = create_vertexai_client()

In [13]:
from typing import Literal

class PlayerField(BaseModel):
    value: str | int | float
    source_quote: str
    uri: str | None = Field(None, description="The EXACT, UNEDITED URL provided by the tool. Do not guess or shorten. None if the source is internal training data")
    source_type: Literal["GOOGLE_SEARCH", "URL_CONTEXT", "INTERNAL_KNOWLEDGE"] = Field(None, description="Categorize the source of this specific field. Must not be None when uri is not null.")

class PlayerStats(BaseModel):
    name: str
    net_worth: PlayerField | None = Field(..., description="Net worth of the PL player.")
    is_professional_player: bool = Field(..., description="Must be True if found in PL records, False otherwise")
    verification_status: str = Field(..., description="Explanation of where the data was found or why it failed")
    height: PlayerField
    shirt_number: PlayerField
    preferred_foot: PlayerField
    goals: PlayerField
    goal_assists: PlayerField
    appearances: PlayerField
    minutes_played: PlayerField

In [30]:
def get_player_stats(player: str) -> types.GenerateContentResponse:
        
    url_list = [
        "https://www.premierleague.com/en/players/141746/bruno-fernandes/stats",
        "https://www.premierleague.com/en/players/223094/erling-haaland/stats",
        "https://www.premierleague.com/en/players/97032/virgil-van-dijk/stats",
        "https://www.premierleague.com/en/players/244851/cole-palmer/stats"
    ]

    urls = "\n".join(url_list)

    prompt = f"""
        **OBJECTIVE:**
        Search and identify the Premier League 2025/2026 Player Statistics of {player}.
        
        ---

        ### **1. DYNAMIC SOURCE IDENTIFICATION**
        1.  **IF a Premier League URL is provided:**
            *   You **MUST** execute the `url_context` tool first. This is your **Primary Source**.
        2.  **IF NO URL is provided (or if the player is non-PL):**
            *   The **Web Citations** (Google Search results) become your **Primary Source**. 
        3.  **PRIORITY:** Official URL > Web Citations > Internal Training Data.

        ### **2. MANDATORY SOURCE_TYPE CLASSIFICATION RULES**
        You are strictly forbidden from returning `null` for `source_type` if a `uri` is present.
        *   **MATCHING RULE:** If the `uri` matches one of the URLs provided below, you MUST use "URL_CONTEXT".
        *   **SEARCH RULE:** If the `uri` is a search result (e.g., Transfermarkt, Wikipedia, vertexaisearch links), you MUST use "GOOGLE_SEARCH".
        *   **FALLBACK RULE:** If no tool found the data and you use internal memory, `uri` must be `null` and `source_type` must be "INTERNAL_KNOWLEDGE".

        ### **3. INACTIVE / NON-PROFESSIONAL PLAYER LOGIC**
        If the player cannot be found in active professional records for the 2025/26 season:
        *   `is_professional_player`: `false`.
        *   **All Numeric Fields:** `{{"value": 0, "source_quote": null, "uri": null, "source_type": null}}`.
        *   **All String Fields:** `{{"value": "n/a", "source_quote": null, "uri": null, "source_type": null}}`.
        *   **Verification Status:** "Player not found in active professional databases."

        ### **4. URI EXTRACTION RULES (STRICT):**
        1.  **NO GUESSING:** You are strictly forbidden from constructing, autocompleting, or guessing a URL based on the website name. 
        2.  **LITERAL COPY:** You must copy the `uri` exactly as it appears in the search result that provided the `source_quote`. 
        3.  **THE JOIN RULE:** Before finalizing the JSON, verify that the `source_quote` actually appears in the content/snippet associated with the `uri` you provided.
        4.  **IF IN DOUBT:** If you found a fact in your training data but cannot find a specific, working URI for it in the search results, you MUST set `source_type` to `INTERNAL_KNOWLEDGE` and `uri` to `null`.

        ### **5. DATA VALIDATION & AUDIT**
        *   **`net_worth`**: Must be a string (e.g., `100 million dollars`).
        *   **`height`**: Must be a float (e.g., `1.85`).
     
        ### PROVIDED URLS:
        { urls }

        ### OUTPUT FORMAT:
        Return a JSON object exactly as follows:
        ```json
        {{
            "name": "string",
            "net_worth": {{ "value": "string", "source_quote": "...", "uri": "...", "source_type": "Google Search" }},
            "is_professional_player": boolean,
            "verification_status": "Detailed confirmation of Premier League status for 2025/26",
            "height": {{ "value": float, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "shirt_number": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "preferred_foot": {{ "value": "string", "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "goals": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "goal_assists": {{ "value": int, "chunk_id": int or null }},
            "appearances": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }},
            "minutes_played": {{ "value": int, "source_quote": "...", "uri": "...", "source_type": "URL Context" }}
        }}
        ```
    """

    response = client.models.generate_content(
        model='gemini-3-flash-preview',
        contents=types.Content(
            role="user",
            parts=[types.Part(text=prompt)]
        ),
        config=types.GenerateContentConfig(
            response_mime_type="application/json",
            response_json_schema=PlayerStats.model_json_schema(),
            thinking_config=types.ThinkingConfig(
                thinking_level=types.ThinkingLevel.HIGH,
            ),
            tools=[
                types.Tool(url_context=types.UrlContext()),
                types.Tool(google_search=types.GoogleSearch()),
            ]
        )
    )
                        
    return response

In [16]:
def clean_json_string(raw_string):
    # Remove the markdown code blocks
    clean_str = raw_string.strip()
    if clean_str.startswith("```json"):
        clean_str = clean_str[7:]
    if clean_str.endswith("```"):
        clean_str = clean_str[:-3]
    return clean_str.strip()

In [17]:
def print_player_stats(response: types.GenerateContentResponse):
    if response.parsed:
        player_stats = PlayerStats.model_validate(response.parsed)
    else:
        player_stats = PlayerStats.model_validate_json(clean_json_string(response.text))

    print(player_stats.model_dump_json(indent=2))

In [36]:
response = get_player_stats(player="Erling Haaland")
print_player_stats(response=response)

{
  "name": "Erling Haaland",
  "net_worth": {
    "value": "$80 million",
    "source_quote": "Haaland's net worth, which according to Forbes is $80m (£59.5m)",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQHwe5-b036A0QaSzU9G0uAgOVZs1OJ0NMe0MmnMy-wRh4Qh9PtR_PhO6UV4S4-6ZJXJL4cicMEt0CFm_hwEER-d5WI5uY3H6DySLRcQZOmbeXF0PQU68ny6xyWF-64jJ_2Jnht_hkr7Kk3FasVjBki_2Q-n8jvr6PIcYUkTrFyJa2wZjPt4jkkdrFDo4Y6A2_OahUrg7unsUbzWJBxPp6e52tzg9w==",
    "source_type": "GOOGLE_SEARCH"
  },
  "is_professional_player": true,
  "verification_status": "Confirmed active for Manchester City in the Premier League for the 2025/26 season via official Premier League statistics.",
  "height": {
    "value": 1.95,
    "source_quote": "Height, 1.95 m (6 ft 5 in).",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQHFUkt6uBYUgbC5fgMofMPjKps_9QszoJClzsHz1zkpovNWK1-OueQEj_2oFq--1dOaL7L4X9Aef7iLamfT1f4iN2aH3BxaYfJUznngJ2vXuawp-SAWl4x3R1nxHpiSCKF_pjnPg-rL",
  

In [31]:
response = get_player_stats(player="Bruno Fernandes")
print_player_stats(response=response)

{
  "name": "Bruno Fernandes",
  "net_worth": {
    "value": "50 million pounds",
    "source_quote": "Fernandes's net worth at current is around £50 million (~$63.5 million)",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQE8anoOkuCS6lZdKmGDzZHxI8qzN86GKJnH4hvkWlEgu8X-MuDBPuw072vwORC5MnAvZJz-d9rpXd6CRIAu4TIJ13SjfjUKadeJVJLsZ4dIVguFedJ690y5UiiaMGLPFOOGdl8NIcfvl0_dhjIjwpXYJTojOqyiMuTFT6943E49EWwXLaI_pVvJB9QgundsZgp0zKiIwi1sJFbnNNNeHO4GlC5B67zjtluJ-hx3Q-0Q5ALq-LqFHHzpPs-qp_VIq-nSIaY4eX-3orqBvtHcurm6xl168e0l_Wg2wrm4xr5So_PFfKKOvutbCA_-Y_PUon2r0FlfCQMHyq60DW-Vjk3oi1nj_g==",
    "source_type": "GOOGLE_SEARCH"
  },
  "is_professional_player": true,
  "verification_status": "Bruno Fernandes is confirmed as an active professional player and captain for Manchester United in the 2025/26 Premier League season, with 19 appearances and 5 goals recorded.",
  "height": {
    "value": 1.79,
    "source_quote": "Height: 1,79 m",
    "uri": "https://vertexaisearch.cloud

In [25]:
response = get_player_stats(player="Leny Yoro")
print_player_stats(response)

{
  "name": "Leny Yoro",
  "net_worth": {
    "value": "$5 million to $10 million",
    "source_quote": "Leny Yoro's net worth in 2025 is estimated to be between $5 million and $10 million.",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQFltEwsHVuaLxmXHF0h8oVlOX6al8b6sygr89Rs6rgkh9vcrP7k0gq2EPM81HrMZNY0uZ6fh82J75Hh6WurPgU44UgNX_Ri2uEdFTy-ybecQad3Bjb3Egj5M1hR9yxtXK00QPlJSfrWzvbGWTpyhwCZpS2epWew2LqAVWnmsfPMMvJIy1F_sNCnWL9n5TumQiQvxJ5sH9vyzet_TTmEwqx12A==",
    "source_type": "GOOGLE_SEARCH"
  },
  "is_professional_player": true,
  "verification_status": "Confirmed active Premier League player for Manchester United in the 2025/26 season with 21 recorded appearances.",
  "height": {
    "value": 1.9,
    "source_quote": "Height: 1,90 m",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQHxHVbh3C7-5EZbfu6Qc17RS7ppYCJWwSlID0TYEHYvtnyzXJcbKv1sn0wNyVjo4Rzi4yCJM-6ur9henyNIuC9xuv5tgTadQhL_N67in9EuVg-TNZefG5JvYUCtlTVXvTLcpT5xdaloZR

In [35]:
response = get_player_stats(player="Kaoru Mitoma")
print_player_stats(response)

{
  "name": "Kaoru Mitoma",
  "net_worth": {
    "value": "£14,924,000",
    "source_quote": "Kaoru Mitoma's net worth is £14,924,000.",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQEdretvYCKW4O59gRm5RYnJHJ5hexKWa57fMrjcNQFZLRECj_qDJmmefNRkfctFMOndlWJHTFNNHAmKtKzBWdcM1QiD7dT2BOwWndGe1a07MIO74up-OGN1ASjpTPn-uE8pBtXJEkZMhSFAHuY5rjoH",
    "source_type": "GOOGLE_SEARCH"
  },
  "is_professional_player": true,
  "verification_status": "Confirmed active professional player for Brighton & Hove Albion in the 2025/26 Premier League season, with 11 appearances and 2 goals recorded as of January 2026.",
  "height": {
    "value": 1.78,
    "source_quote": "Height, 1.78 m (5 ft 10 in).",
    "uri": "https://vertexaisearch.cloud.google.com/grounding-api-redirect/AUZIYQE_tkgLhG5M6pk5yR84vWWVwCJPikxnO-s30ayWBug4j1xXjwX-CvACu-4Owj6HHSZfkyqRcslyLltARMHMuW1brlyrexSfmZwts9a5znZjlG4WTSDIIyaBkZeATdFLFtVeJTnaKQ==",
    "source_type": "GOOGLE_SEARCH"
  },
  "shirt_number":