# Load user data

In [None]:
import pyspark.sql.functions as F
from functools import reduce

In [None]:
# delta table paths for geo_df, pin_df and user_df
delta_base_path = "/mnt/pinterest_data/delta_tables/"

#### Paths for raw/original data
raw_delta_user_path = delta_base_path + "raw/user"

#### Paths for transformed/processed data
transformed_delta_user_path = delta_base_path + "transformed/user"

In [None]:
# load user data
df_user = spark.read.format("delta").load(raw_delta_user_path)

# Transformations

In [None]:
# transformations
transformed_df = (
        # create a new column 'user_name' by concatenating 'first_name' and 'last_name'
        df_user.withColumn("user_name", F.concat(F.col("first_name"), F.col("last_name")))

        # drop first_name and last_name
        .drop("first_name", "last_name")

        # convert the date_joined column from a string to a timestamp data type
        .withColumn("date_joined", F.to_timestamp(F.col("date_joined")).cast("timestamp"))
)

In [None]:
# action to execute transformations
transformed_df.show()

# Write cleaned dataframe as a delta table

In [None]:
transformed_df.write.format("delta").mode("overwrite").save(transformed_delta_user_path)