# Assignment Solutions

**Expected files on Colab**: `/content/sales_details.csv`, `/content/cust_info.csv`, `/content/prd_info.csv`.


## Setup

In [None]:
from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.appName("Business-Assignment-Answers").getOrCreate()

# Load datasets with headers and inferred schema
sd = spark.read.option("header", True).option("inferSchema", True).csv("/content/sales_details.csv")
ci = spark.read.option("header", True).option("inferSchema", True).csv("/content/cust_info.csv")
pr = spark.read.option("header", True).option("inferSchema", True).csv("/content/prd_info.csv")

### 1) List every field in the Sales Details dataset with its data type to understand what the file contains.

In [None]:
sd.printSchema()

### 2) Find the top 5 products that appear in the most sales lines.

In [None]:
sd.groupBy("sls_prd_key").count().orderBy(F.desc("count"), "sls_prd_key").show(5, truncate=False)

### 3) Report the average selling price across all items, rounded to two decimals.

In [None]:
sd.select(F.round(F.avg("sls_price"), 2).alias("avg_price_2dp")).show(truncate=False)

### 4) Count the number of unique orders present in the dataset.

In [None]:
sd.select(F.countDistinct("sls_ord_num").alias("orders")).show(truncate=False)

### 5) Rename the order number to a business-friendly label (order_number) and show a small sample.

In [None]:
sd.withColumnRenamed("sls_ord_num","order_number").select("order_number").show(5, truncate=False)

### 6) Load the customer master (cust_info.csv) and preview the first 5 customers.

In [None]:
ci.show(5, truncate=False)

### 7) List customer IDs and keys to confirm how customers are uniquely identified.

In [None]:
ci.select("cst_id","cst_key").show(5, truncate=False)

### 8) Show Upper Case Last Name for each customer.

In [None]:
ci.select("cst_gndr", F.upper("cst_lastname").alias("UpperCase_LastName")).show(5, truncate=False)

### 9) Load the product catalog (prd_info.csv) and preview to confirm it is readable.

In [None]:
pr.show(5, truncate=False)

### 10) Create a simple SKU by concatenating product ID and product key, and display sample SKUs.

In [None]:
pr.select("prd_id","prd_key", F.concat(F.col("prd_id").cast("string"), F.col("prd_key").cast("string")).alias("sku")).show(5, truncate=False)