In [0]:
%pip install langchain langchain-core langchain-community databricks-langchain
%pip install python-dotenv

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m
[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
dbutils.library.restartPython()

In [0]:
from pyspark.sql import functions as F
from dotenv import load_dotenv  
import os


load_dotenv()

# ==============================================================================
# STEP 1: Load "Submissions" Data (Matches & Stadiums)
# ==============================================================================


# 1. Define variables for the First Container
storage_account = "lab94290"
container_submissions = "submissions"
sas_token_submissions = os.getenv('SAS_SUBMISSIONS')
sas_token_submissions = sas_token_submissions.lstrip('?')

# 2. Configure Spark for Submissions
print("🔄 Connecting to 'submissions' container...")
spark.conf.set(f"fs.azure.account.auth.type.{storage_account}.dfs.core.windows.net", "SAS")
spark.conf.set(f"fs.azure.sas.token.provider.type.{storage_account}.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.sas.FixedSASTokenProvider")
spark.conf.set(f"fs.azure.sas.fixed.token.{storage_account}.dfs.core.windows.net", sas_token_submissions)
# 3. Load Data
base_path_submissions = f"abfss://{container_submissions}@{storage_account}.dfs.core.windows.net/Lior_Shachar_Raz/"



stadium_enrichment_df = spark.read.format("csv") \
    .option("header", "true") \
    .option("inferSchema", "true") \
    .load(f"{base_path_submissions}Stadium_LLM_Enrichment.csv")



# 5. CRITICAL: Cache into RAM
# We must do this NOW because we are about to change the SAS token. 
# If we don't cache, Spark will try to read the files again later and fail.
print("⏳ Caching Match and Stadium data into RAM...")
stadium_enrichment_df = stadium_enrichment_df.cache()

# Trigger an action (.count()) to force the data into memory immediately
print(f"   - Stadiums loaded: {stadium_enrichment_df.count()} rows")

print("Stadium Enrichment:")
display(stadium_enrichment_df.limit(5))



🔄 Connecting to 'submissions' container...
⏳ Caching Match and Stadium data into RAM...
   - Stadiums loaded: 274 rows
Stadium Enrichment:


League,Country,Team,Stadium,Latitude,Longitude,Directions,Food_and_Stay,Ticket_Info
Premier League,England,Arsenal,Emirates Stadium,51.55667,-0.10611,"The Emirates Stadium is located in the Islington area, toward the north of London at just over 2 miles from Kings Cross St Pancras railway station. There are multiple ways to reach the stadium by public transport. Theunderground(tube) is one option – the nearest tube station is Arsenal, which is on the Piccadilly line. Decent alternatives are stations Finsbury Park (Victoria and Piccadilly line) and Highbury & Islington (Victoria line and London overground). From both stations it is an approximate 10-minute walk to the stadium. On non-matchdays, Holloway Road station (Piccadilly line) is located closest, but will be closed pre-match and is exit-only after the match. Alternatively, one can catch atrainto Finsbury Park or Highbury & Islington main line stations. It is a 5 to 10-minute journey coming from Kings Cross station. During the week trains depart from Moorgate as well. Drayton Park rail station, closest to the stadium, closes on matchdays. Supporters are not advised to arrive by car on matchdays, and there is little parking available around the Emirates Stadium on non-matchdays. Address: Emirates Stadium, London N5 1BU","The Emirates Stadium is located in the recently regenerated and rather quiet Highbury area. There are a few pubs and cafés in the immediate surroundings of the stadium, which is mainly residential, and some options to eat and drink along Holloway Road and Highbury Park road. More nightlife can be found in the Islington area on Upper Street starting south of Highbury & Islington tube station – an approximate 15 to 20-minute walk from the stadium. There are no hotels right near the Emirates Stadium, but a fair few options further north on Seven Sisters Road near Finsbury Park, which is a 15-minute walk from the stadium. TheQueens HotelandBest Western Highburyget the best reviews and go for about £100 a night, whereas thePembury Hotel,Woodberry Down Hotel, andCentral Park Hotelare more basic options. Clickherefor all hotels near the Emirates Stadium. Of course, as there are various tube lines passing by the stadium, you can just as well choose a stadium in any part ofCentral London.","Tickets for Arsenal matches can be boughtonline, or by phone 0844 277 3625 (+44 207 649 9003 if calling from abroad). Arsenal is one of the clubs in the Premier League for which it is hardest to get tickets and one will often need an Arsenal membership to be able to acquire tickets. The most basic Red level membership currently costs £33.00 and with it tickets will generally be available in pre-sale. Arsenal matches fall into three pricing categories. Tickets for category C matches, the cheapest, start at £26.00 for most lower-tier seats and range up to £51.00 for a central upper-tier seat. Tickets for category A matches, the most expensive, range from £63.50 for most lower-tier seats to £126.00 for a central upper-tier seat. A further booking fee of about £2.00 applies. Tickets for members are a little bit cheaper."
Premier League,England,Aston Villa,Villa Park,52.50917,-1.88472,Info not available,"Villa Park is located in a typical English urban area. This means that there are a few pubs around as well as some easy eating options, though if you have more time on your hands, Birmingham’s city centre might be a better option. If you arrive by car, you could stop at theStar Cityshopping centre just off exit 6 of the M6. It has the typical food outlets you tend to find at a shopping centre as well as various entertainment options. The nightlife in Birmingham’s city centre is mostly located west and south of Birmingham New Street Station. There are few hotels in the immediate vicinity of the stadium. TheHoliday Inn Expresslocated across Star City shopping centre is probably closest, but is mostly convenient if you get in by car. You can about just walk to the stadium from there. Further toward the centre, there are aCampanile HotelandPremier Inn. They are affordable and get good reviews, but are again most of all convenient for those arriving by car. If travelling by public transport, it is likely best to find a hotel in Birmingham’s centre, and take a bus or train to the match. For all hotels near Villa Park clickhere. For all options in the city centre gohere.","Tickets for Aston Villa matches can be boughtonline, by phone +44 (0) 800 6120 970, or at the Villa Village store at Villa Park. Tickets can also be bought at the same store before the start of the match. Aston Villa only very occasionally sell out. Aston Villa have divided their home games in three pricing categories. Tickets for category A matches, the most expensive, range in price from £25.00 for a lower-tier seat at the North Stand to £45.00 for a central seat at one of the long sides. Tickets for category VV matches, the cheapest, range in price from £20.00 to £37.00."
Premier League,England,Bournemouth,Dean Court,50.73528,-1.83833,"The Vitality Stadium is located in the north-east of Bournemouth at a little over 2 miles from The Square and the Bournemouth Pier in the town centre. The main railway station lies roughly halfway the town centre and the stadium. The walk from the mainrailstation to the stadium will take just under half an hour. The stadium furthermore lies at walking distance from Pokesdown station, which lies south-east of the stadium. If arriving by car, take the A338 towards Bournemouth and exit towards Boscombe. Take the second exit on the roundabout and follow King’s Park Drive to the stadium. There are signs that point to the stadium (or Dean Court) along the way. Address: Dean Court, Kings Park, Bournemouth BH7 7AF","The Vitality Stadium is located in a quiet residential neighbourhood, and apart from a local pub, there is therefore little in the immediate vicinity of the stadium in terms of eating and drinking, which is best done in and around Bournemouth’s town centre. There are neither any hotels directly near the Vitality Stadium, but plenty of options in Bournemouth’stown centre, or even closer in the nearby Boscombe area (which while regenerated, still has a somewhat negative reputation in terms of safety).","Tickets for Bournemouth games can be boughtonline, over the phone +44 (0) 344 576 1910, or in person at the ticket office at the Vitality Stadium or the BIC & Pavilion ticket office on Exeter Road right off The Square in Bournemouth’s town centre. Ticket prices range from £32.00 for a seat behind the goal to £45.00 for a central seat at the Main Stand. Bournemouth currently sell out every match in their first season in the Premier League and one generally needs to have accrued a certain number of loyalty points to be able to obtain tickets. Emailtickets@afcb.co.ukor call +44 (0) 344 576 1910 for more information."
Premier League,England,Brentford,Brentford Community Stadium,51.49083,-0.28861,"Brentford Community Stadium is located in the west of London just north of the river Thames at about 6 miles from central London. There are multiple ways to travel to Brentford Community Stadium by public transport. The nearest station is Kew Bridge station, which is on the rail network. Trains depart from Waterloo station roughly every 20-30 minutes on the weekend. The journey takes about half an hour. Kew Bridge station is right next to the stadium. If you prefer the London Underground, get a District Line train to Gunnersbury, which can be boarded at any of various stop in Central London including Bank, Westminster, Victoria, and South Kensington. The journey takes max. 30 minutes and it is a further 15-minute walk to the stadium. Gunnersbury is also a station on the Overground network with trains running from Stratford in the east and Richmond in the south. It is particularly useful if coming from various places in North or East London. Finally, the stadium is also at walking distance from Acton Town underground station (app. 25 minutes), which is on the Piccadilly Line and runs through central London to Heathrow.",Info not available,"Brentford tickets can be boughtonline, or at the ticket office at Brentford Community Stadium. Ticket prices depend on the game. Games are split between high-profile Cat A games and regular Cat B games. Tickets for Cat A games range in price from £40.00 to £50.00 and those in Cat B games from £35.00 to £45.00. Emailtickets@brentfordfc.comfor more information."
Premier League,England,Brighton & Hove Albion,Falmer Stadium,50.86194,-0.08333,Info not available,"The Amex is located outside of the city of Brighton, bordered by the University of Sussex campus, some residential housing and farmlands. There is little around in terms of eating and drinking, which is recommended to be done in pleasant Brighton. The are neither any hotels in the close vicinity of The Amex, but there is plenty of choice in Brighton on the seafront. Clickherefor an overview of hotels in Brighton.","Tickets for Brighton matches can be boughtonline, or at the Ticket Office at The Amex Stadium. Tickets are also sold at the stadium on the day of the match. Brighton often sell out though so booking in advance is recommended. Ticket prices depend on the opponent. Prices for category C matches, the cheapest, range from £30.00 for a seat behind the goal to £46.00 for a central seat at one of the sides. Prices for category A matches, the most expensive, start at £45.00 and range up to £65.00 for the most expensive seats."


In [0]:
# ==============================================================================
# Part Y: Generate Stadium Guides JSON and Integrate into HTML
# ==============================================================================

import requests
import json
import time

# Load stadium data
stadiums_df = stadium_enrichment_df.toPandas()
# ------------------------------------------------------------------------------
# Function to call LLM and generate stadium guide
# ------------------------------------------------------------------------------
def generate_stadium_guide(team, stadium, directions, food_and_stay, ticket_info):
    """
    Generates a user-friendly guide for a stadium using LLM
    """
    # Handle missing values
    directions = directions if directions and str(directions) != "nan" and directions != "Info not available" else "Information not available"
    food_and_stay = food_and_stay if food_and_stay and str(food_and_stay) != "nan" and food_and_stay != "Info not available" else "Information not available"
    ticket_info = ticket_info if ticket_info and str(ticket_info) != "nan" and ticket_info != "Info not available" else "Information not available"
    
    # If all info is missing, return appropriate message
    if all(x == "Information not available" for x in [directions, food_and_stay, ticket_info]):
        return "No information available for this stadium."
    
    prompt = f"""Create a SHORT guide for {stadium} ({team}). MAX 120 words total.

RAW DATA:
- Directions: {directions}
- Food/Stay: {food_and_stay}
- Tickets: {ticket_info}

Format:
🚗 **Getting There** (3 bullet points)
🍽️ **Food & Stay** (2-3 bullet points)
🎫 **Tickets** (1-2 bullet points only - just price range and where to buy)

Rules: 
- Be very brief. Only essential facts. No intro/outro.
- If a section has "Information not available", SKIP that section entirely. Do not print it."""

    # Call LLM via Databricks Foundation Model API
    try:
        from databricks.sdk import WorkspaceClient
        from databricks.sdk.service.serving import ChatMessage, ChatMessageRole
        
        w = WorkspaceClient()
        
        response = w.serving_endpoints.query(
            name="databricks-meta-llama-3-3-70b-instruct",
            messages=[
                ChatMessage(
                    role=ChatMessageRole.USER,
                    content=prompt
                )
            ],
            max_tokens=400,
            temperature=0.7
        )
        
        return response.choices[0].message.content
        
    except Exception as e:
        # Fallback: Use direct REST API
        try:
            token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
            host = spark.conf.get("spark.databricks.workspaceUrl")
            
            url = f"https://{host}/serving-endpoints/databricks-meta-llama-3-3-70b-instruct/invocations"
            
            headers = {
                "Authorization": f"Bearer {token}",
                "Content-Type": "application/json"
            }
            
            payload = {
                "messages": [
                    {"role": "user", "content": prompt}
                ],
                "max_tokens": 400,
                "temperature": 0.7
            }
            
            response = requests.post(url, headers=headers, json=payload, timeout=60)
            response.raise_for_status()
            
            result = response.json()
            return result["choices"][0]["message"]["content"]
            
        except Exception as e2:
            return f"Error generating guide: {str(e2)}"


# ------------------------------------------------------------------------------
# Generate guides for all stadiums and create JSON
# ------------------------------------------------------------------------------
def create_stadium_guides_dict(batch_size=10, delay_seconds=3):
    """
    Generate guides for all stadiums
    Returns dictionary keyed by stadium name
    """
    guides_dict = {}
    total = len(stadiums_df)
    
    print(f"🚀 Generating guides for {total} stadiums...")
    
    for i, row in stadiums_df.iterrows():
        # Key by stadium name for easy lookup
        stadium_key = row['Stadium']
        
        print(f"Processing {i+1}/{total}: {row['Stadium']}...")
        
        guide = generate_stadium_guide(
            row["Team"],
            row["Stadium"],
            row["Directions"],
            row["Food_and_Stay"],
            row["Ticket_Info"]
        )
        
        guides_dict[stadium_key] = {
            "team": row["Team"],
            "stadium": row["Stadium"],
            "country": row["Country"],
            "league": row["League"],
            "guide": guide
        }
        
        # Rate limiting protection
        if (i + 1) % batch_size == 0 and (i + 1) < total:
            print(f"⏳ Waiting {delay_seconds} seconds...")
            time.sleep(delay_seconds)
    
    print(f"✅ Successfully generated {total} guides!")
    return guides_dict


# Generate the guides
stadium_guides = create_stadium_guides_dict()

# Convert to JSON string for HTML embedding
stadium_guides_json = json.dumps(stadium_guides, ensure_ascii=False)

print(f"\n📦 Stadium guides JSON ready! ({len(stadium_guides)} stadiums)")

# ==============================================================================
# Part Z: Save and Create Download Link
# ==============================================================================

# 1. Define the filename and target path in Databricks FileStore
file_name = "stadium_guides.json"
dbfs_path = f"dbfs:/FileStore/{file_name}"

# 2. Save the JSON string to DBFS
# We use dbutils.fs.put to write directly to the distributed storage
try:
    dbutils.fs.put(dbfs_path, stadium_guides_json, overwrite=True)
    print(f"✅ File saved successfully to: {dbfs_path}")
except Exception as e:
    print(f"❌ Error saving file: {str(e)}")

# 3. Generate a Download Button
# The '/files/' path maps to 'dbfs:/FileStore/' in the workspace URL
html_button = f"""
<div style="margin-top: 20px;">
    <h3>🎉 Processing Complete</h3>
    <p>Your stadium data is ready.</p>
    <a href="/files/{file_name}" download>
        <button style="
            background-color: #1E88E5; 
            color: white; 
            padding: 12px 24px; 
            border: none; 
            border-radius: 4px; 
            font-size: 16px; 
            cursor: pointer;
            font-weight: bold;">
            ⬇️ Download stadium_guides.json
        </button>
    </a>
</div>
"""

# 4. Render the HTML in the notebook
displayHTML(html_button)


🚀 Generating guides for 274 stadiums...
Processing 1/274: Emirates Stadium...
Processing 2/274: Villa Park...
Processing 3/274: Dean Court...
Processing 4/274: Brentford Community Stadium...
Processing 5/274: Falmer Stadium...
Processing 6/274: Stamford Bridge...
Processing 7/274: Selhurst Park...
Processing 8/274: Goodison Park...
Processing 9/274: Craven Cottage...
Processing 10/274: Portman Road...
⏳ Waiting 3 seconds...
Processing 11/274: King Power Stadium...
Processing 12/274: Anfield...
Processing 13/274: City of Manchester Stadium...
Processing 14/274: Old Trafford...
Processing 15/274: St James' Park...
Processing 16/274: City Ground...
Processing 17/274: St Mary's Stadium...
Processing 18/274: Tottenham Hotspur Stadium...
Processing 19/274: London Stadium...
Processing 20/274: Molineux Stadium...
⏳ Waiting 3 seconds...
Processing 21/274: Mendizorrotza...
Processing 22/274: San Mamés...
Processing 23/274: Metropolitano...
Processing 24/274: Olímpic Lluís Companys...
Processing