In [None]:


# Import python packages
import streamlit as st
import pandas as pd  #%%
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session

session = get_active_session()

sales = session.table("SALES").to_pandas()
purchase = session.table("PURCHASES").to_pandas()
invoice = session.table("INVOICE_PURCHASE").to_pandas()
end_inv = session.table("END_INV").to_pandas()
beg_inv = session.table("BEG_INV").to_pandas()
display(beg_inv.head())
display(end_inv.head())



     

In [None]:

nuniques = {"beg_inv": beg_inv.nunique(), "end_inv": end_inv.nunique()}
display(
    pd.DataFrame(nuniques).T[
        [
            "INVENTORYID",
            "STORE",
            "CITY",
            "BRAND",
            "DESCRIPTION",
            "SIZE",
            "ONHAND",
            "BEGINDATE",
            "ENDDATE",
        ]
    ]
)
print(
    f"BEG_INV BRAND nunique: {beg_inv.BRAND.nunique()}, desc + size nunique: {(beg_inv['DESCRIPTION'] + ' ' + beg_inv['SIZE']).nunique()}??? Might need cleaning"
)
print(
    f"end_inv Brand nunique: {end_inv.BRAND.nunique()}, desc + size nunique: {(end_inv['DESCRIPTION'] + ' ' + end_inv['SIZE']).nunique()}"
)
beg_inv_brand = beg_inv.loc[:]
beg_inv_brand["DESC_SIZE"] = beg_inv_brand["DESCRIPTION"] + " " + beg_inv_brand["SIZE"]
group_desc = (
    beg_inv_brand[["BRAND", "DESC_SIZE"]].groupby("DESC_SIZE")["BRAND"].unique()
)
group_desc.loc[group_desc.apply(len) > 1]

display(sales.head())
display(
    beg_inv.loc[
        (beg_inv["BRAND"] == 1004) & (beg_inv["INVENTORYID"] == "1_HARDERSFIELD_1004")
        ]
)
# ? Inventory ID = store_city_brand, Brand = description + Size, With Inventory ID we can find how many onhand the inventory have at the beginning and end.
print(sales.CLASSIFICATION.unique())

## 1. Aggregate the data from the tables

- **Group** the `sales` DataFrame by the `"SALESDATE"` column.  
- **Aggregate** by summing `"SALESQUANTITY"` to get total daily sales.  
- Result stored in a new DataFrame `sales_quantity_price`.



In [None]:
# * group by date, sum sales quantity to get total sales quantity per day
sales_quantity_price = sales.groupby("SALES_DATE").agg({"SALES_QUANTITY": "sum"})

sales_quantity_price.describe();

print(sales_quantity_price);



## 2. Native Snowflake ML Forecasting

Snowflake provides built-in time-series forecasting as a SQL object you train and invoke entirely in SQL

In [None]:
df_to_write = sales_quantity_price.reset_index()


snowpark_df = session.create_dataframe(df_to_write)  
snowpark_df.write.mode("overwrite").save_as_table("SALES_QUANTITY_PRICE");


In [None]:
-- 1) Train the model (requires CREATE SNOWFLAKE.ML.FORECAST privilege)
CREATE SNOWFLAKE.ML.FORECAST inventory_forecast_model (
  INPUT_DATA       => TABLE(sales_quantity_price),
  TIMESTAMP_COLNAME=> 'SALES_DATE',
  TARGET_COLNAME   => 'SALES_QUANTITY'
);



In [None]:
-- 2) Generate a 90-day forecast
CREATE or REPLACE table sales_quantity_price_forecast AS
SELECT * 
FROM TABLE(inventory_forecast_model!FORECAST(FORECASTING_PERIODS => 90));

In [None]:
# ── 2) Load your tables into pandas (using SALES_DATE & SALES_QUANTITY) ─────────
sales_df = (
    session
    .table("SALES")
    .select("SALES_DATE", "SALES_QUANTITY")
    .to_pandas()
)
sq_df = (
    session
    .table("SALES_QUANTITY_PRICE")
    .select("SALES_DATE", "SALES_QUANTITY")
    .to_pandas()
)

# Ensure the date columns are datetime
sales_df["SALES_DATE"] = pd.to_datetime(sales_df["SALES_DATE"])
sq_df["SALES_DATE"]    = pd.to_datetime(sq_df["SALES_DATE"])



In [None]:

# ── 3) Merge on SALES_DATE ───────────────────────────────────────────────────────
merged = pd.merge(
    sales_df.rename(columns={"SALES_QUANTITY": "Raw_Sales"}),
    sq_df.rename(columns={"SALES_QUANTITY": "Agg_Sales"}),
    on="SALES_DATE",
    how="inner"
)
# Rename for plotting
merged = merged.rename(columns={"SALES_DATE": "Date"}).set_index("Date")


In [None]:


# 4) Render with Streamlit
st.title("📈 Raw vs Aggregated Sales Quantity")
st.line_chart(merged.iloc[::32])