In [0]:
import requests
import json
import os
from pyspark.sql.functions import from_json, expr, lit
from pyspark.sql.types import StructType, StructField, StringType, ArrayType

**Create a Secret Scope and Secret**

`
databricks secrets create-scope tfl_analytics_scope
`

`
databricks secrets put-secret --json '{
  "scope": "tfl_analytics_scope",
  "key": "tfl_app_key",
  "string_value": "xxx"
}'
`


In [0]:
tfl_app_key = dbutils.secrets.get(scope = "tfl_analytics_scope", key = "tfl_app_key")
url = f"https://api.tfl.gov.uk/BikePoint/?app_key={tfl_app_key}"

apis = ["BikePoint", "Crowding"]

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS robin_huebner;


In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS robin_huebner.tfl_analytics;
USE robin_huebner.tfl_analytics;
CREATE VOLUME IF NOT EXISTS robin_huebner.tfl_analytics.landing;

In [0]:
response = requests.get(url)
data = response.json()

dbutils.fs.put("/Volumes/robin_huebner/tfl_analytics/landing/bike_point/tfl_data.json", json.dumps(data), overwrite=True)

In [0]:
# Load dataframe from volume
df = spark.read.json("/Volumes/robin_huebner/tfl_analytics/landing/bike_point/tfl_data.json")

# Define fields to extract from JSON
fields_to_extract = [
    "TerminalName",
    "Installed",
    "Locked",
    "InstallDate",
    "RemovalDate",
    "Temporary",
    "NbBikes",
    "NbEmptyDocks",
    "NbDocks",
    "NbStandardBikes",
    "NbEBikes"
]

# Define expressions for extraction from JSON
expressions_extract = [
  f"filter(additionalProperties, x -> x.key = '{field}')[0].value as {field}"
  for field in fields_to_extract
]

# Extract specified fields from JSON
df = df.selectExpr("*", *expressions_extract)

# Define columns to keep and column names
fields_to_rename = {
  "id": "bikepoint_id",
  "commonName": "bikepoint_name",
  "lat": "bikepoint_latitude",
  "lon": "bikepoint_longitude",
  "TerminalName": "terminal_name",
  "Installed": "installed",
  "Locked": "locked",
  "InstallDate": "install_date",
  "RemovalDate": "removal_date",
  "Temporary": "temporary",
  "NbBikes": "bike_count",
  "NbEmptyDocks": "empty_dock_count",
  "NbDocks": "dock_count",
  "NbStandardBikes": "standard_bike_count",
  "NbEBikes": "ebike_count",
}

# Define expression to filter and rename columns
expressions_rename = [
  f"{old_name} as {new_name}"
  for old_name, new_name in fields_to_rename.items()
]

# Filter and rename dataframe
df = df.selectExpr(
  [*expressions_rename]
)

# Add audit information to the table
df = (
  df.withColumn("source_system", lit("TFL"))
  .withColumn("ingestion_user", expr("CURRENT_USER()"))
  .withColumn("ingestion_timestamp", expr("CURRENT_TIMESTAMP()"))
)

# Write dataframe to silver
df.write.mode("append").saveAsTable("silver_tfl_bike_point")




display(df)