In [0]:
import pandas as pd
from pyspark.sql.functions import to_date, col

In [0]:
def parse_date(date):
    try:
        return pd.to_date(date,
                          infer_datetime_format=True, # Inspect the 1st date strings and try to estimate the date format
                          dayfirst=True, # It prefers to parse with day first (not strict)
                          errors='coerce') # Returns NaT/missing values for the invalid parsing
    except Exception as e:
        return None

In [0]:
#Using spark since its query runs/is distributed across the clusters, instead of Pandas (which runs on a single machine)
df = spark.table("workspace.sc_silver.propostas_realizadas") \
          .select("_fivetran_id", 
                  "data_de_validade_da_proposta") \
          .where("data_de_validade_da_proposta IS NOT NULL")

#Using Pandas since its apply is employed for every single dataframe value
pandas_df = df.toPandas()
pandas_df["data_de_validade_da_proposta"] = pandas_df["data_de_validade_da_proposta"].apply(parse_date)

parsed_df = spark.createDataFrame(pandas_df)
parsed_df.createOrReplaceTempView("parsed_dates")
spark.sql("""MERGE INTO workspace.sc_silver.propostas_realizadas AS target
             USING parsed_dates AS source
             ON target._fivetran_id = source._fivetran_id
             WHEN MATCHED THEN 
                UPDATE SET target.data_de_validade_da_proposta = source.data_de_validade_da_proposta""")

In [0]:
df = spark.table("workspace.sc_silver.propostas_realizadas") \
          .withColumn("data_de_validade_da_proposta", 
                      to_date(col("data_de_validade_da_proposta"), "d-M-yyyy"))

df.write.format("delta") \
        .mode("overwrite") \
        .option("overwriteSchema", "true") \
        .saveAsTable("workspace.sc_silver.propostas_realizadas")
