In [0]:
from pyspark.sql import SparkSession
import pandas as pd
import random

# Initialize Spark Session
spark = SparkSession.builder.appName("Databricks_Catalog_Read").getOrCreate()

# Define the catalog and schema
catalog_name = "workspace"
schema_name = "default"

# Read data from Databricks tables and convert them to spark Dataframe
table_full_path = f"{catalog_name}.{schema_name}.{'geo'}"
    
# Read the table using Spark
df_geo = spark.read.table(table_full_path)
    

In [0]:
display(df_geo)

In [0]:
from pyspark.sql.functions import col, struct, to_timestamp

# Check if 'latitude' and 'longitude' exist before creating 'coordinates'
if "latitude" in df_geo.columns and "longitude" in df_geo.columns:
    df_geo = df_geo.withColumn("coordinates", struct(col("latitude"), col("longitude")))

    #Cast coordinates as string
    df_geo = df_geo.withColumn("coordinates", col("coordinates").cast("string"))

    # Drop 'latitude' and 'longitude' columns
    df_geo = df_geo.drop("latitude", "longitude")

# Convert 'timestamp' column to datetime format
df_geo = df_geo.withColumn("timestamp", to_timestamp(col("timestamp")))

# Reorder the columns
df_geo = df_geo.select("ind", "country", "coordinates", "timestamp")

# Show the result
df_geo.show(truncate=False)


In [0]:
%python
# Drop the existing table if it exists
spark.sql("DROP TABLE IF EXISTS workspace.default.df_geo")

# Save Spark DataFrame as managed delta table
df_geo.write.mode("overwrite").saveAsTable("workspace.default.df_geo")
