# Sales Trends by Year

This notebook calculates total sales for each year (2019-2024) using the Order table and visualizes the results.

In [None]:
import pandas as pd
from pyspark.sql.functions import col, sum as spark_sum, year

# Configuration
WORKSPACE_NAME = "Fabric_MAAG"
SOURCE_LAKEHOUSE_NAME = "maag_silver"
SOURCE_SCHEMA = "salesadb"
SOURCE_TABLE = "order"

SOURCE_TABLE_PATH = f"abfss://{WORKSPACE_NAME}@onelake.dfs.fabric.microsoft.com/{SOURCE_LAKEHOUSE_NAME}.Lakehouse/Tables/{SOURCE_SCHEMA}/{SOURCE_TABLE}"

# Read Order table from lakehouse
df = spark.read.format("delta").load(SOURCE_TABLE_PATH)

# Extract year from OrderDate
df = df.withColumn("Year", year(col("OrderDate")))

# Aggregate total sales by year, exclude 2025
sales_by_year = (
    df.groupBy("Year")
      .agg(spark_sum(col("OrderTotal")).alias("TotalSales"))
      .orderBy("Year")
      .filter(col("Year") < 2025)
)

pdf = sales_by_year.toPandas()

print("Total sales by year (excluding 2025):")
print(pdf)

# Plot sales trends
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

plt.figure(figsize=(10, 5))
plt.plot(pdf["Year"], pdf["TotalSales"], marker='o')
plt.title("Total Sales by Year (2019-2024)")
plt.xlabel("Year")
plt.ylabel("Total Sales (Thousands)")
plt.grid(True)
plt.gca().yaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{int(x/1000):,}K'))
plt.tight_layout()
plt.show()