- **CSV to JSON**

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, udf
from pyspark.sql.types import ArrayType, StringType
import ast

# Initialize Spark session
spark = SparkSession.builder.appName("TransformGenres").getOrCreate()

# Path to your CSV file
input_file = "abfss://Data@onelake.dfs.fabric.microsoft.com/Datasets.Lakehouse/Files/bookers.csv"

# Load data into Spark DataFrame
df = spark.read.format("csv").option("header", "true").load(input_file)

# Function to convert genres string to a list of strings with error handling
def convert_genres_to_list(genres_str):
    try:
        return ast.literal_eval(genres_str)
    except (ValueError, SyntaxError):
        return []

# Register the UDF
convert_genres_udf = udf(convert_genres_to_list, ArrayType(StringType()))

# Apply the conversion UDF to the Genres column
df = df.withColumn("Genres", convert_genres_udf(col("Genres")))

# Rename columns to match the index headers
df = df.withColumnRenamed("_c0", "id") \
       .withColumnRenamed("Book", "Title") \
       .withColumnRenamed("Avg_Rating", "Rating")

# Ensure the id field is a string
df = df.withColumn("id", df["id"].cast(StringType()))

# Show the transformed DataFrame to verify
df.show(truncate=False)

# Save the transformed DataFrame to a JSON file
output_file = "abfss://Data@onelake.dfs.fabric.microsoft.com/Datasets.Lakehouse/Files/bookers.json"  # Adjust the path as needed
df.write.json(output_file, mode='overwrite')

# Verify the JSON structure by reading it back
transformed_df = spark.read.json(output_file)
transformed_df.show(truncate=False)
