In [0]:
from pyspark.sql import SparkSession
import pandas as pd
import random

# Initialize Spark Session
spark = SparkSession.builder.appName("Databricks_Catalog_Read").getOrCreate()

# Define the catalog and schema
catalog_name = "workspace"
schema_name = "default"

# Read data from Databricks tables and convert them to Pandas DataFrames

table_full_path = f"{catalog_name}.{schema_name}.{'user_data'}"
    
# Read the table using Spark
df_user = spark.read.table(table_full_path)

In [0]:
display(df_user)

In [0]:
from pyspark.sql.functions import col, struct, to_timestamp, concat_ws

# Print columns to debug issue
print("Available columns:", df_user.columns)

# Check if 'first_name' and 'last_name' exist before creating 'user_name'
if "first_name" in df_user.columns and "last_name" in df_user.columns:
    df_user = df_user.withColumn("user_name", concat_ws(" ", col("first_name"), col("last_name")))
    
    # Drop 'first_name' and 'last_name' only if they exist
    df_user=df_user.drop("first_name", "last_name")

# Convert 'date_joined' column to datetime format
df_user=df_user.withColumn("timestamp",to_timestamp(col("date_joined")))

# Reorder the columns
df_user = df_user.select('ind', 'user_name', 'age', 'date_joined')

# Show the result
df_user.show(truncate=False)

In [0]:
%python
# Drop the existing table if it exists
spark.sql("DROP TABLE IF EXISTS workspace.default.df_user")

# Save Spark DataFrame as managed delta table
df_user.write.mode("overwrite").saveAsTable("workspace.default.df_user")
