In [18]:
import os
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

BASE_DIR = os.path.abspath(os.path.join(os.getcwd(), ".."))
DWH_DIR = os.path.join(BASE_DIR, "data", "warehouse")

DimDate = pd.read_csv(os.path.join(DWH_DIR, "DimDate.csv"))
DimClient = pd.read_csv(os.path.join(DWH_DIR, "DimClient.csv"))
DimEmployee = pd.read_csv(os.path.join(DWH_DIR, "DimEmployee.csv"))
FactSales = pd.read_csv(os.path.join(DWH_DIR, "FactSales.csv"))

df = (FactSales
      .merge(DimDate[["sk_date","year","month"]], on="sk_date", how="left")
      .merge(DimClient[["sk_client","company_name"]], on="sk_client", how="left")
      .merge(DimEmployee[["sk_employee","employee_name"]], on="sk_employee", how="left")
)

df["year_month"] = df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2)
df["company_name"] = df["company_name"].fillna("Unknown Client")
df["employee_name"] = df["employee_name"].fillna("Unknown Employee")
df["total_amount"] = pd.to_numeric(df["total_amount"], errors="coerce").fillna(0)

df.head()


Unnamed: 0,fact_id,bk_order_id,sk_client,sk_employee,sk_date,quantity,unit_price,discount,total_amount,delivery_status,year,month,company_name,employee_name,year_month
0,1,30,,10,20060115,100.0,14.0,0.0,1400.0,Livr√©e,2006,1,Unknown Client,Anne Dodsworth,2006-01
1,2,30,,10,20060115,30.0,3.5,0.0,105.0,Livr√©e,2006,1,Unknown Client,Anne Dodsworth,2006-01
2,3,31,,4,20060120,10.0,30.0,0.0,300.0,Livr√©e,2006,1,Unknown Client,Janet Leverling,2006-01
3,4,31,,4,20060120,10.0,53.0,0.0,530.0,Livr√©e,2006,1,Unknown Client,Janet Leverling,2006-01
4,5,31,,4,20060120,10.0,3.5,0.0,35.0,Livr√©e,2006,1,Unknown Client,Janet Leverling,2006-01


In [17]:
orders = df[["bk_order_id","delivery_status"]].drop_duplicates()

nbr_commandes = orders["bk_order_id"].nunique()
nbr_livree = orders[orders["delivery_status"] == "Livr√©e"]["bk_order_id"].nunique()
nbr_non_livree = orders[orders["delivery_status"] == "Non Livr√©e"]["bk_order_id"].nunique()

kpi = pd.DataFrame({
    "KPI": [
        "Nombre total de commandes",
        "Commandes livr√©es",
        "Commandes non livr√©es"
    ],
    "Valeur": [
        nbr_commandes,
        nbr_livree,
        nbr_non_livree
    ]
})
kpi


Unnamed: 0,KPI,Valeur
0,Nombre total de commandes,870
1,Commandes livr√©es,841
2,Commandes non livr√©es,29


In [16]:
status_counts = orders["delivery_status"].value_counts().reset_index()
status_counts.columns = ["delivery_status","nbr_commandes"]

fig = px.pie(
    status_counts,
    names="delivery_status",
    values="nbr_commandes",
    title="Statut des commandes",
    color="delivery_status",
    color_discrete_map={
        "Livr√©e": "green",
        "Non Livr√©e": "red"
    }
)
fig.show()


In [15]:
orders_emp = df[["bk_order_id","employee_name","delivery_status"]].drop_duplicates()

emp_status = (orders_emp
              .groupby(["employee_name","delivery_status"], as_index=False)["bk_order_id"]
              .nunique()
              .rename(columns={"bk_order_id":"nbr_commandes"})
)

fig = px.bar(
    emp_status,
    x="employee_name",
    y="nbr_commandes",
    color="delivery_status",
    barmode="stack",
    title="Livraison par employ√©",
    color_discrete_map={
        "Livr√©e": "green",
        "Non Livr√©e": "red"
    }
)

fig.update_layout(xaxis_title="Employ√©", yaxis_title="Nombre de commandes")
fig.show()


In [20]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown
import itertools
import numpy as np

# -----------------------------
# 0) Harmoniser les colonnes
# -----------------------------
# Employee name
if "Employee_name" not in df.columns:
    if "employee_name" in df.columns:
        df["Employee_name"] = df["employee_name"].astype(str)
    elif {"first_name", "last_name"}.issubset(df.columns):
        df["Employee_name"] = df["first_name"].astype(str) + " " + df["last_name"].astype(str)
    else:
        df["Employee_name"] = "Unknown Employee"

# Client name
if "company_name" not in df.columns:
    df["company_name"] = "Unknown Client"

df["company_name"] = df["company_name"].fillna("Unknown Client").astype(str)
df["Employee_name"] = df["Employee_name"].fillna("Unknown Employee").astype(str)

# Date columns (we need year + month + year_month)
if "year" not in df.columns or "month" not in df.columns:
    # try build from full_date if exists
    if "full_date" in df.columns:
        d = pd.to_datetime(df["full_date"], errors="coerce")
        df["year"] = d.dt.year
        df["month"] = d.dt.month
    elif "sk_date" in df.columns:
        d = pd.to_datetime(df["sk_date"].astype(str), format="%Y%m%d", errors="coerce")
        df["year"] = d.dt.year
        df["month"] = d.dt.month
    else:
        raise ValueError("Impossible de construire year/month (pas de full_date ni sk_date).")

df["year"] = pd.to_numeric(df["year"], errors="coerce")
df["month"] = pd.to_numeric(df["month"], errors="coerce")
df = df.dropna(subset=["year", "month"]).copy()
df["year"] = df["year"].astype(int)
df["month"] = df["month"].astype(int)

df["year_month"] = df["year"].astype(str) + "-" + df["month"].astype(str).str.zfill(2)

# total_amount safety
df["total_amount"] = pd.to_numeric(df["total_amount"], errors="coerce").fillna(0)

# -----------------------------
# 1) Widgets
# -----------------------------
years_sorted = sorted(df["year"].unique().tolist())
year_options = ["All Years"] + [str(y) for y in years_sorted]

olap_dropdown = widgets.Dropdown(
    options=year_options,
    value="All Years",
    description="Date:",
    style={"description_width": "initial"}
)

topn_slider = widgets.IntSlider(
    value=0, min=0, max=50, step=5,
    description="Top N Clients (0=ALL):",
    style={"description_width": "initial"}
)

olap_output = widgets.Output()

# -----------------------------
# 2) Update function
# -----------------------------
def update_olap(change=None):
    selected_year = olap_dropdown.value
    top_n = topn_slider.value

    with olap_output:
        clear_output(wait=True)

        # ---- Time axis logic
        if selected_year == "All Years":
            data = df.copy()
            time_col = "year"
            xlabel = "Year"
            full_timeline = [str(y) for y in range(int(data["year"].min()), int(data["year"].max()) + 1)]
            data[time_col] = data[time_col].astype(str)  # category axis
        else:
            y = int(selected_year)
            data = df[df["year"] == y].copy()
            time_col = "year_month"
            xlabel = "Month"
            full_timeline = [f"{selected_year}-{m:02d}" for m in range(1, 13)]

        if data.empty:
            display(Markdown("### ‚ùå No data available for this selection."))
            return

        # ---- Optional: Top N clients (0 = all)
        if top_n and top_n > 0:
            top_clients = (data.groupby("company_name")["total_amount"]
                             .sum()
                             .nlargest(top_n)
                             .index.tolist())
            data = data[data["company_name"].isin(top_clients)].copy()

        # ---- Build grid (densification)
        clients = sorted(data["company_name"].unique().tolist())
        employees = sorted(data["Employee_name"].unique().tolist())

        # If very large, densification can explode -> safety
        grid_size = len(full_timeline) * len(clients) * len(employees)
        if grid_size > 250000:
            display(Markdown(
                f"### ‚ö†Ô∏è Trop de points pour densification ({grid_size:,}).\n"
                f"- Mets `Top N Clients` (ex: 10/15)\n"
                f"- ou choisis une ann√©e sp√©cifique."
            ))

        grid = pd.DataFrame(
            list(itertools.product(full_timeline, clients, employees)),
            columns=[time_col, "company_name", "Employee_name"]
        )

        # ---- Aggregate facts
        actuals = (data.groupby([time_col, "company_name", "Employee_name"], as_index=False)["total_amount"]
                     .sum())

        # ---- Merge grid + fill gaps
        df_dense = pd.merge(grid, actuals, on=[time_col, "company_name", "Employee_name"], how="left")
        df_dense["total_amount"] = df_dense["total_amount"].fillna(0)

        df_zeros = df_dense[df_dense["total_amount"] == 0]
        df_sales = df_dense[df_dense["total_amount"] > 0]

        # ---- Plot sales
        fig = px.scatter_3d(
            df_sales,
            x=time_col,
            y="company_name",
            z="Employee_name",
            size="total_amount",
            color="total_amount",
            opacity=0.90,
            title=f"3D OLAP: {xlabel} √ó Client √ó Employee",
            labels={
                "total_amount": "Revenue",
                time_col: xlabel,
                "company_name": "Client",
                "Employee_name": "Employee"
            }
        )

        # ---- Add gaps as grey points (optional but nice)
        fig.add_trace(go.Scatter3d(
            x=df_zeros[time_col],
            y=df_zeros["company_name"],
            z=df_zeros["Employee_name"],
            mode="markers",
            marker=dict(size=3, color="lightgrey", opacity=0.20),
            name="No Orders (Gap)"
        ))

        # ---- Force axis ordering
        fig.update_layout(
            height=700,
            margin=dict(l=0, r=0, b=0, t=50),
            showlegend=True,
            scene=dict(
                xaxis=dict(
                    title=xlabel,
                    type="category",
                    categoryorder="array",
                    categoryarray=full_timeline,
                    tickmode="linear",
                    dtick=1
                ),
                yaxis=dict(title="Client"),
                zaxis=dict(title="Employee")
            )
        )

        fig.show()

# Observe changes
olap_dropdown.observe(update_olap, names="value")
topn_slider.observe(update_olap, names="value")

display(Markdown("### üìå 3D OLAP Analysis (Time, Client, Employee)"))
display(widgets.HBox([olap_dropdown, topn_slider]), olap_output)

# First render
update_olap()


### üìå 3D OLAP Analysis (Time, Client, Employee)

HBox(children=(Dropdown(description='Date:', options=('All Years', '1996', '1997', '1998', '2006'), style=Desc‚Ä¶

Output()