In [0]:
from pyspark.sql.functions import expr, concat, lit

data = [
    ("123456789012", "John Doe", "john.doe@gmail.com"),
    ("987654321098", "Jane Smith", "jane.smith@outlook.com"),
    ("456789123456", "Alan Turing", "alan.turing@yahoo.com"),
]

columns = ["AadharNumber", "FullName", "EmailAddress"]
df = spark.createDataFrame(data, columns)
df.display()

AadharNumber,FullName,EmailAddress
123456789012,John Doe,john.doe@gmail.com
987654321098,Jane Smith,jane.smith@outlook.com
456789123456,Alan Turing,alan.turing@yahoo.com


In [0]:
df_masked = (
    df.withColumn(
        "MaskedAadharNumber",
        concat(
            lit("*****"), expr("substring(AadharNumber, 6, length(AadharNumber) - 5)")
        ),
    )
    .withColumn(
        "MaskedEmailAddress",
        concat(
            lit("*****"), expr("substring(EmailAddress, 6, length(EmailAddress) - 5)")
        ),
    )
    .select("AadharNumber", "FullName", "MaskedAadharNumber", "MaskedEmailAddress")
)

df_masked.display()

AadharNumber,FullName,MaskedAadharNumber,MaskedEmailAddress
123456789012,John Doe,*****6789012,*****doe@gmail.com
987654321098,Jane Smith,*****4321098,*****smith@outlook.com
456789123456,Alan Turing,*****9123456,*****turing@yahoo.com


In [0]:
df.createOrReplaceTempView("YourTable")

In [0]:
%sql
SELECT 
    AadharNumber,
    FullName,
    CONCAT('*****', SUBSTRING(AadharNumber, 6, LEN(AadharNumber) - 5)) AS MaskedAadharNumber,
    CONCAT('*****', SUBSTRING(EmailAddress, 6, LEN(EmailAddress) - 5)) AS MaskedEmailAddress
FROM 
    YourTable;


AadharNumber,FullName,MaskedAadharNumber,MaskedEmailAddress
123456789012,John Doe,*****6789012,*****doe@gmail.com
987654321098,Jane Smith,*****4321098,*****smith@outlook.com
456789123456,Alan Turing,*****9123456,*****turing@yahoo.com


In [0]:
%sql
SELECT 
    AadharNumber,
    FullName,
    CASE 
        WHEN LEN(AadharNumber) > 5 
        THEN CONCAT('*****', SUBSTRING(AadharNumber, 6, LEN(AadharNumber) - 5)) 
        ELSE 'Invalid Data' 
    END AS MaskedAadharNumber,
    CASE 
        WHEN LEN(EmailAddress) > 5 
        THEN CONCAT('*****', SUBSTRING(EmailAddress, 6, LEN(EmailAddress) - 5)) 
        ELSE 'Invalid Data' 
    END AS MaskedEmailAddress
FROM 
    YourTable;


AadharNumber,FullName,MaskedAadharNumber,MaskedEmailAddress
123456789012,John Doe,*****6789012,*****doe@gmail.com
987654321098,Jane Smith,*****4321098,*****smith@outlook.com
456789123456,Alan Turing,*****9123456,*****turing@yahoo.com
