# Transform Refunds Data
1. Extract Specific portion of the string from refund_reason using split function
2. Extract Specific portion of the string from refund_reason using regex_extract function
3. Extract Date and Time from the refund_timestamp
4. Write Transformed data to the Silver Schema

In [0]:
refunds_df = spark.table('gizmobox.bronze.py_refunds')
display(refunds_df)

### 1. Extract Specific portion of the string from refund_reason using split function

In [0]:
from pyspark.sql.functions import split
split_refunds_df = refunds_df.select('refund_id', 'payment_id', "refund_timestamp", 'refund_amount', 
                  split('refund_reason', ':')[0].alias("refund_reason"),
                  split('refund_reason', ':')[1].alias("refund_source")
                 )
display(split_refunds_df)

### 2. Extract Specific portion of the string from refund_reason using regex_extract function

In [0]:
from pyspark.sql.functions import regexp_extract
split_refunds_df = refunds_df.select('refund_id', 'payment_id', "refund_timestamp", 'refund_amount', 
                  regexp_extract('refund_reason', '^([^:]+):', 1).alias('refund_reason'),
                  regexp_extract('refund_reason', ':([^:]+)$', 1).alias('refund_source')
                 )
display(split_refunds_df)

### 3. Extract Date and Time from the refund_timestamp

In [0]:
from pyspark.sql.functions import *
transformed_refunds_df = split_refunds_df.select('refund_id', 'payment_id', 
                        date_format("refund_timestamp", 'yyyy-MM-dd').cast('date').alias('refund_date'),
                        date_format("refund_timestamp", 'HH:mm:ss').alias('refund_time'),
                        'refund_amount', 'refund_reason', 'refund_source')
display(transformed_refunds_df)

### 4. Write Transformed data to the Silver Schema

In [0]:
transformed_refunds_df.writeTo('gizmobox.silver.py_refunds').createOrReplace()

In [0]:
%sql
select * from gizmobox.silver.py_refunds;