In [0]:
from pyspark.sql.types import StructType, StructField, StringType, DateType, IntegerType
import pyspark.sql.functions as f
from pyspark.sql.functions import input_file_name, col, lit

In [0]:
configs = {"fs.azure.account.auth.type": "OAuth",
"fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
"fs.azure.account.oauth2.client.id": "",
"fs.azure.account.oauth2.client.secret": "",
"fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/<>/oauth2/token"}

In [0]:
dbutils.fs.mount(
source = "abfss://container1@sanskarstorageaccount.dfs.core.windows.net",
mount_point = "/mnt/loksabhaattendance",
extra_configs = configs)

In [0]:
schema = StructType([
        StructField('mpsno', IntegerType(), False),
        StructField('memberName', StringType(), False),
        StructField('attendanceStatus', StringType(), True),
        StructField('division', StringType(), True)
    ])


In [0]:
adls_path = '/mnt/loksabhaattendance/loksabha_attendance_files'

attendance_df = spark.read.format("csv").schema(schema).option("header", "true").load(adls_path)

attendance_df = attendance_df.withColumn(
    'date', f.split(
        f.split(
            input_file_name(), '[/]'
        ).getItem(6), '[.]'
    ).getItem(0)
)
attendance_df = attendance_df.select(
    'mpsno',
    'memberName',
    'loksabha',
    'session',
    f.to_date(col('date'), 'yyyy_M_d').cast(DateType()).alias('date'),
    'attendanceStatus'
)

attendance_df.printSchema()
attendance_df.show(10, truncate=False)
print(attendance_df.count())

root
 |-- mpsno: integer (nullable = true)
 |-- memberName: string (nullable = true)
 |-- loksabha: integer (nullable = true)
 |-- session: integer (nullable = true)
 |-- date: date (nullable = true)
 |-- attendanceStatus: string (nullable = true)

+-----+-----------------------------------------+--------+-------+----------+----------------+
|mpsno|memberName                               |loksabha|session|date      |attendanceStatus|
+-----+-----------------------------------------+--------+-------+----------+----------------+
|4589 |Shri Narendra Modi                       |17      |8      |2022-02-11|NS              |
|4268 |Shri Rajnath Singh                       |17      |8      |2022-02-11|NS              |
|5021 |Shri Amit Shah                           |17      |8      |2022-02-11|NS              |
|4923 |Shri Nitin Jairam Gadkari                |17      |8      |2022-02-11|NS              |
|3979 |Shri Devaragunda Venkappa Sadananda Gowda|17      |8      |2022-02-11|NS       

In [0]:
attendance_url = '/mnt/loksabhaattendance/consolidated_attendance_file/'

attendance_df.write.format('delta').mode("overwrite").option("header",'true').save(attendance_url)