In [0]:
import requests
import json
import os
from datetime import datetime
from pyspark.sql.functions import from_json, expr, lit
from pyspark.sql.types import StructType, StructField, StringType, ArrayType

**Create a Secret Scope and Secret**

`
databricks secrets create-scope tfl_analytics_scope
`

`
databricks secrets put-secret --json '{
  "scope": "tfl_analytics_scope",
  "key": "tfl_app_key",
  "string_value": "xxx"
}'
`


In [0]:
tfl_app_key = dbutils.secrets.get(scope = "tfl_analytics_scope", key = "tfl_app_key")
api_name = "BikePoint"
directory_name = "bike_point"
url = f"https://api.tfl.gov.uk/{api_name}/?app_key={tfl_app_key}"
catalog_name = "robin_huebner"
schema_name= "tfl_analytics"
volume_name = "landing"
execution_timestamp = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

In [0]:
print(execution_timestamp)

In [0]:
# spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
spark.sql(f"USE CATALOG {catalog_name}")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")
spark.sql(f"USE SCHEMA {schema_name}")
spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog_name}.{schema_name}.{volume_name}")

In [0]:
response = requests.get(url)
data = response.json()

dbutils.fs.put(f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/{directory_name}/{execution_timestamp}_tfl_{schema_name}.json", json.dumps(data), overwrite=True)

In [0]:
%sql
SELECT
  sequence(DATE '2025-01-01', DATE '2026-12-31', INTERVAL 1 DAY) as calendar_date

In [0]:
%sql
SELECT
  explode(sequence(DATE '2025-01-01', DATE '2026-12-31', INTERVAL 1 DAY)) as calendar_date

In [0]:

from pyspark.sql.functions import explode, sequence, to_date, lit
# Create a DataFrame with a sequence of dates
date_df = spark.createDataFrame([1], "int").select(
    explode(sequence(to_date(lit("2023-01-01")), to_date(lit("2023-01-31")), lit("interval 1 day"))).alias("date")
)

In [0]:
display(date_df)