In [1]:
import numpy as np
import pandas as pd
import datetime
from sqlalchemy import create_engine
import plotly.express as px
from database_utils import add_new_unl, add_new_dot, clean_unleashed, clean_dot, get_level_2

In [2]:
# database connection

db_password = "UnitCircle42!"
db_user = "postgres"
db_name = "dot"
endpoint = "awakedb.cre3f7yk1unp.us-west-1.rds.amazonaws.com"

connection_string = f"postgresql://{db_user}:{db_password}@{endpoint}:5432/{db_name}"
engine = create_engine(connection_string)

In [79]:
## PASTE 2 FILES BELOW AND UPDATE ALL TABLES

unl_download = r"C:\Users\mikej\Downloads\SalesEnquiryList - 2024-02-17T071954.084.xlsx"  ### file download from unleashed
dot_download = r"C:\Users\mikej\Downloads\SHOP Supplier Reporting - Invoice Details Excel - 2024-02-17T071913.307.xlsx"  ### file download from dot

add_new_unl(unl_download)  ## adds new direct purchases to raw database
add_new_dot(dot_download)  ## adds new indirect purchases to raw database

clean_unleashed()          ## clean raw direct purchases and add to clean directdb
clean_dot()                ## clean raw indirect purchases and add to clean indirect db
get_level_2()              ## combine clean direct/indirect and populate level_2 db  --  dot invisible

In [80]:
### updating all_sales_data.csv in "data" directory

level_2 = pd.read_sql("SELECT * FROM level_2 WHERE date > '2020-12-31';", con = engine)
level_2 = level_2[level_2.market_segment != 'Samples']
level_2.to_csv(r"C:/Users/mikej/Desktop/cpg-sales/data/all_sales_data.csv", index=False)

In [81]:
### bar graph should populate if all is well

# level_2 = pd.read_sql('SELECT * FROM level_2;', con = engine)
level_2.date = pd.to_datetime(level_2.date)

px.bar(level_2.set_index('date').groupby(pd.Grouper(freq='M'))['usd'].sum(),
        y='usd',
        template='plotly_white',
        labels={'usd':'$USD',
                'date':''},
        color='usd',
        width=900,
        text_auto=",.2s",
        height=400).update(layout_coloraxis_showscale=False).update_traces(textposition='outside')

In [82]:
### bar graph should populate if all is well

# level_2 = pd.read_sql('SELECT * FROM level_2;', con = engine)
level_2.date = pd.to_datetime(level_2.date)

px.bar(level_2[level_2.date>'2024-01-31'].set_index('date').groupby(pd.Grouper(freq='d'))['usd'].sum(),
        y='usd',
        template='plotly_white',
        labels={'usd':'$USD',
                'date':''},
        color='usd',
        width=900,
        color_continuous_scale='Oranges',
        text_auto=",.2s",
        height=400).update(layout_coloraxis_showscale=False).update_traces(textposition='outside')

In [70]:
# MAKE LEVEL_2 INTO TABLEU PREP OUTPUT

In [83]:
# load in level_2 and check columns

level_2 = pd.read_sql("SELECT * FROM level_2 WHERE date > '2020-12-31';", con = engine)

tab = level_2.copy().drop(columns=['qty','cad','month','year'])
tab.columns

# fix columns to mimic tableau download data output

tab.columns = ['Invoice Date', 'Sale Origin', 'Market Segment', 'Parent Customer', 'Customer', 'Item Full Description', 'Dollars', ]

# add vistar retail y/n column

tab['Vistar Retail'] = np.where(tab['Parent Customer'] == 'Vistar Retail', str('Yes'), str('No'))

tab.to_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\all_sales_data_tableau.csv", index=False)

In [84]:
# make unleashed raw tableau-ready (level_1)
# load in level_1 and check columns

level_1 = pd.read_sql("SELECT * FROM unleashed_clean WHERE completed_date > '2020-12-31';", con = engine)


# load in level_1 and check columns

tab = level_1.copy().drop(columns=['month','year'])

# fix columns to mimic tableau download data output

tab.columns = ['Customer Name', 'Invoice Date','Item Full Description', 'Quantity','Sub Total', 'Dollars', 'Table','Market Segment', 'Parent Customer']

tab['Table'] = 'Unleashed'

# tab.to_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\level_1_tableau.csv", index=False)

In [73]:
# l1 = pd.read_sql("SELECT * FROM unleashed_raw WHERE completed_date > '2022-12-31';", con = engine)
px.scatter(l1[l1.customer_name=='DOT Foods, Inc.'][['completed_date','sub_total']].set_index('completed_date').sort_index())

In [17]:
px.bar(pd.DataFrame(level_1.groupby('parent_customer')['usd'].sum()))

In [3]:
l1 = pd.read_sql("SELECT * FROM unleashed_raw WHERE completed_date > '2022-12-31';", con = engine)
l1

Unnamed: 0,order_num,order_date,req_date,completed_date,warehouse,customer_name,customer_type,product,product_group,status,quantity,sub_total
0,SO-00140851,2023-12-21,2023-12-22,2023-12-22,Complemar - PA,Amazon SC,Amazon,50ct Pouch - PB Milk Choc Bites,Finished Goods,Completed,1.0,46.65
1,SO-00140850,2023-12-21,2023-12-22,2023-12-22,Complemar - Nevada,Amazon SC,Amazon,50ct Pouch - PB Milk Choc Bites,Finished Goods,Completed,1.0,46.65
2,SO-00140849,2023-12-21,2023-12-22,2023-12-22,Complemar - PA,Amazon SC,Amazon,50ct Pouch - PB Milk Choc Bites,Finished Goods,Completed,1.0,46.65
3,SO-00140848,2023-12-21,2023-12-22,2023-12-22,Complemar - PA,Amazon SC,Amazon,50ct Pouch - PB Milk Choc Bites,Finished Goods,Completed,1.0,46.65
4,SO-00140847,2023-12-21,2023-12-22,2023-12-22,Complemar - Nevada,Amazon SC,Amazon,50ct Pouch - PB Milk Choc Bites,Finished Goods,Completed,1.0,46.65
...,...,...,...,...,...,...,...,...,...,...,...,...
49815,AMZNSCFBA-20471,2024-02-11,2024-02-12,2024-02-16,Amazon Seller Central,Amazon SC FBA,Amazon,50ct Pouch - Variety Pk Choc Bites,Finished Goods,Completed,1.0,46.65
49816,AMZNSCFBA-20470,2024-02-11,2024-02-12,2024-02-16,Amazon Seller Central,Amazon SC FBA,Amazon,50ct Pouch - Variety Pk Choc Bites,Finished Goods,Completed,1.0,46.65
49817,AMZNSCFBA-20469,2024-02-11,2024-02-12,2024-02-16,Amazon Seller Central,Amazon SC FBA,Amazon,50ct Pouch - Dark Choc Bites,Finished Goods,Completed,1.0,46.65
49818,AMZNSCFBA-20468,2024-02-11,2024-02-12,2024-02-16,Amazon Seller Central,Amazon SC FBA,Amazon,50ct Pouch - Milk Choc Bites,Finished Goods,Completed,1.0,46.65


In [None]:
# DAILY BY All (level 2)
df = all_sales[all_sales.market_segment != 'Samples'].groupby(all_sales.date).usd.sum().reset_index().set_index('date')
df = round(df[df.index>'2024-01-31'].sort_index())

bar_all = px.bar(
        df,
        y='usd',
        template = 'plotly_white',
        labels={'date':'',
                'usd':''},
        height=325,
        text_auto='.2s',
    )
bar_all.update_traces(hovertemplate = '$%{y:.2s}'+'<br>%{x:%Y-%m-%d}<br>')
bar_all.update_traces(marker_color='#E09641')
bar_all.update_coloraxes(showscale=False)
bar_all.update_yaxes(showticklabels=False,showgrid=True,tickprefix='$',gridcolor="#B1A999",tickvals=[0,25000,50000,75000,100000],tickfont=dict(color='#5A5856', size=14))
bar_all.update_xaxes(showgrid=False,gridcolor='gray',tickfont=dict(color='#5A5856', size=13),title_font=dict(color='#5A5856',size=15))
bar_all.update_xaxes(tickmode='array',tickvals = df.index, ticktext=df.index.strftime('<b>%a<br>%d</b>'))
bar_all.update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"),)

In [28]:
### bar graph should populate if all is well

l1.completed_date = pd.to_datetime(l1.completed_date)
l1['usd'] = l1['sub_total']*.75

l1_bar_df = l1.groupby('completed_date')['usd'].sum().reset_index().set_index('completed_date')
l1_bar_df =round(l1_bar_df[l1_bar_df.index>'2024-01-31']).sort_index()

level_1_bar = px.bar(l1_bar_df,
                     y='usd',
                     template='plotly_white',
                     labels={'usd':'',
                             'completed_date':''},
                     height=325,
                     text_auto=",.2s")

level_1_bar.update_traces(hovertemplate = '$%{y:.2s}'+'<br>%{x:%Y-%m-%d}<br>')
level_1_bar.update_traces(marker_color='rgb(239, 83, 80)')
level_1_bar.update_coloraxes(showscale=False)
level_1_bar.update_yaxes(showticklabels=False,showgrid=True,tickprefix='$',gridcolor="#B1A999",tickvals=[0,25000,50000,75000,100000],tickfont=dict(color='#5A5856', size=14))
level_1_bar.update_xaxes(showgrid=False,gridcolor='gray',tickfont=dict(color='#5A5856', size=13),title_font=dict(color='#5A5856',size=15))
level_1_bar.update_xaxes(tickmode='array',tickvals = l1_bar_df.index, ticktext=l1_bar_df.index.strftime('<b>%a<br>%d</b>'))
level_1_bar.update_layout(hoverlabel=dict(font_size=18,font_family="Rockwell"))

In [13]:
l1.groupby(l1.completed_date).sub_total.sum().reset_index().set_index('completed_date')

Unnamed: 0_level_0,sub_total
completed_date,Unnamed: 1_level_1
2023-01-03,182.61
2023-01-05,197.28
2023-01-10,1158.38
2023-01-11,785.14
2023-01-12,3009.91
...,...
2024-02-12,93.30
2024-02-13,63686.34
2024-02-14,16621.23
2024-02-15,14800.26


In [14]:
pd.DataFrame(l1[l1.completed_date>'2024-01-31'].set_index('completed_date').groupby([pd.Grouper(freq='d'),'customer_name'])['sub_total'].sum())

Unnamed: 0_level_0,Unnamed: 1_level_0,sub_total
completed_date,customer_name,Unnamed: 2_level_1
2024-02-01,Amazon SC,147.96
2024-02-01,UNFI,159.8
2024-02-01,USA.SAMPLES.KENNEDY,0.0
2024-02-02,Amazon SC,93.3
2024-02-02,Shopify - AWAKE Energy Canada,981.45
2024-02-05,Amazon SC,375.87
2024-02-05,USA.SAMPLES.KENNEDY,0.0
2024-02-06,Amazon SC,186.61
2024-02-06,Shopify - AWAKE Energy Canada,501.37
2024-02-06,USA.SAMPLES.KENNEDY,0.0
