In [0]:
from pyspark.sql.functions import*
from pyspark.sql.types import StructType,StructField, StringType, IntegerType,LongType
from pyspark.sql.functions import row_number,rank,dense_rank
from pyspark.sql.window import Window
from pyspark.sql.functions import udf
from pyspark.sql.functions import current_date,date_format,to_date,datediff,months_between,add_months

In [0]:
data=[("HDFC","Chennai",1050010123940000),
      ("CANARA","Bangalore",2050010123940001),
      ("INDIAN","Hyderabad",3050010123940002),
      ("UNION","Mumbai",4050010123940003)]

schema=StructType([
    StructField("Bank",StringType()),
    StructField("Address",StringType()),
    StructField("AccountNumber",LongType())
])

df=spark.createDataFrame(data=data,schema=schema)
display(df)

Bank,Address,AccountNumber
HDFC,Chennai,1050010123940000
CANARA,Bangalore,2050010123940001
INDIAN,Hyderabad,3050010123940002
UNION,Mumbai,4050010123940003


In [0]:
star_df = df.withColumn("AccountNumberStared",concat(lit('*' * 12),substring(col("AccountNumber").cast("string"), -4, 4)))
display(star_df)

Bank,Address,AccountNumber,AccountNumberStared
HDFC,Chennai,1050010123940000,************0000
CANARA,Bangalore,2050010123940001,************0001
INDIAN,Hyderabad,3050010123940002,************0002
UNION,Mumbai,4050010123940003,************0003


In [0]:
df=spark.read.format("csv").option("header", "false").load("dbfs:/FileStore/sample12__1_.csv")
df1=df.toDF("Col1")
df1.show(truncate=False)

+--------------------------------------------------------------------------+
|Col1                                                                      |
+--------------------------------------------------------------------------+
|1001|Ram|28|Java|1002|Raj|24|Database|1004|Jam|28|DotNet|1005|Kesh|25|Java|
+--------------------------------------------------------------------------+



In [0]:
from pyspark.sql import functions as f
df2=df1.withColumn('Col2',f.regexp_replace(f.col("Col1"),"(.*?\\|){4}","$0\n"))
df2.select("Col2").show(truncate=False)

+--------------------------------------------------------------------------------+
|Col2                                                                            |
+--------------------------------------------------------------------------------+
|1001|Ram|28|Java|\n1002|Raj|24|Database|\n1004|Jam|28|DotNet|\n1005|Kesh|25|Java|
+--------------------------------------------------------------------------------+



In [0]:
df_header=['eno','ename','age','tech']
df2.select(f.explode(f.split("Col2","\n")))\
    .select(f.split("col","\|").alias("value"))\
    .select(*map(lambda i: f.col("value").getItem(df_header.index(i)).alias(i),df_header)).show()

+----+-----+---+--------+
| eno|ename|age|    tech|
+----+-----+---+--------+
|1001|  Ram| 28|    Java|
|1002|  Raj| 24|Database|
|1004|  Jam| 28|  DotNet|
|1005| Kesh| 25|    Java|
+----+-----+---+--------+



In [0]:
df_header=['eno','ename','age','tech']
df2.select(f.explode(f.split("Col2","\n")))\
    .select(f.split("col","\|").alias("value"))\
    .select(*map(lambda i: f.col("value").getItem(df_header.index(i)).alias(i),df_header)).show()

+----+-----+---+--------+
| eno|ename|age|    tech|
+----+-----+---+--------+
|1001|  Ram| 28|    Java|
|1002|  Raj| 24|Database|
|1004|  Jam| 28|  DotNet|
|1005| Kesh| 25|    Java|
+----+-----+---+--------+



In [0]:
df_header=['eno','ename','age','tech']
df2.select(f.split("Col2","\n")).show(truncate=False)
df2.select(f.explode(f.split("Col2","\n"))).show(truncate=False)
df2.select(f.explode(f.split("Col2","\n")))\
    .select(f.split("col","\|").alias("value")).show(truncate=False)

+----------------------------------------------------------------------------------+
|split(Col2, \n, -1)                                                               |
+----------------------------------------------------------------------------------+
|[1001|Ram|28|Java|, 1002|Raj|24|Database|, 1004|Jam|28|DotNet|, 1005|Kesh|25|Java]|
+----------------------------------------------------------------------------------+

+---------------------+
|col                  |
+---------------------+
|1001|Ram|28|Java|    |
|1002|Raj|24|Database||
|1004|Jam|28|DotNet|  |
|1005|Kesh|25|Java    |
+---------------------+

+---------------------------+
|value                      |
+---------------------------+
|[1001, Ram, 28, Java, ]    |
|[1002, Raj, 24, Database, ]|
|[1004, Jam, 28, DotNet, ]  |
|[1005, Kesh, 25, Java]     |
+---------------------------+

