In [0]:
from pyspark.sql.functions import lit
from datetime import datetime


spark.conf.set(
    "fs.azure.account.key.ipldataadlsg2.dfs.core.windows.net",
    "TPpjUJkEsQms7TPvRF5yFJb5fdzcy6Sdn8qLaN2S8KLQ7Jufa9D2lupgDXQQfc2VoP9NpQjexWaH+AStTcTc0Q=="
)


def list_files_in_blob(container_name, storage_account, folder_path=""):
    '''This function helps user to list all files in a given  path and return a list of file paths'''
    blob_path = f"abfss://{container_name}@{storage_account}.dfs.core.windows.net/{folder_path}"
    try:
        files = dbutils.fs.ls(blob_path)
        return [file.path for file in files]
    except Exception as e:
        print(f"Error: {e}")
        return []

def read_file(blob_path):
    '''This function helps uer to read a file from a given path and return a Spark DataFrame and the source file name '''
    try: 
        df= spark.read.format("csv").options(header="true", inferSchema="true").load(blob_path)
        source_file=blob_path.split("/")[-1]
        return df,source_file
    except Exception as e:
       print(f"Error: {e}")
       return None,None
   
def write_file(df, blob_base_path, source_file):
    ''' This function helps user to write a Spark DataFrame to a given path and return the source file name while adding audit columns(ingestion_date, source_file)
    '''
    try:
        base_name = source_file.split(".")[0]
        full_path = f"{blob_base_path}/{base_name}_bronze"

        ingestion_date = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        df = df.withColumn("ingestion_date", lit(ingestion_date)) \
            .withColumn("source_file", lit(source_file))

        df.write.format("parquet").option("header", "true").save(full_path)
        print(f"Data written to: {full_path}")
    except Exception as e:
       print(f"Error: {e}")
       return None,None

def delete_file_from_blob(blob_path):
    '''Deletes a file or folder from Azure Blob Storage using full abfss path.'''
    try:
        dbutils.fs.rm(blob_path, True)
        print(f"Successfully deleted: {blob_path}")
    except Exception as e:
        print(f"Error deleting file: {e}")


# cleaning and transformations 
from pyspark.sql.functions import col, sum as s, when
def null_counts(df):
    ''' Counts the number of null values in each column of a Spark DataFrame and returns a new DataFrame'''
    return df.select([
        s(when(col(c).isNull(), 1).otherwise(0)).alias(c)
        for c in df.columns
    ])


def write_file_to_silver(df, source_file):
    """
    Writes a Spark DataFrame to the 'ipl-silver' container on 'ipldataadlsg2' storage account.
    Ensures it creates a folder, not a file.
    """
    try:
        storage_account = "ipldataadlsg2"
        container = "ipl-silver"
        base_name = source_file.split(".")[0]
        full_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/{base_name}-silver/"  
        df.write.format("parquet").option("header", "true").mode("overwrite").save(full_path)
    except Exception as e:
       print(f"Error: {e}")
       return None,None

def write_df_to_sql(df, table_name):
    ''' This function helps user to write a Spark DataFrame to a given table name in a SQL database'''
    try:
        df.write \
            .format("jdbc") \
            .option("url", jdbc_url) \
            .option("dbtable", f"dbo.{table_name}") \
            .option("user", user) \
            .option("password", password) \
            .option("driver", driver) \
            .mode("overwrite") \
            .save()
    except Exception as e:
       print(f"Error: {e}")
       return None,None