In [1]:
import numpy as np
import pandas as pd
import datetime
from sqlalchemy import create_engine
import plotly.express as px
from database_utils import add_new_unl, add_new_dot, clean_unleashed, clean_dot, get_level_2
import streamlit as st

In [2]:
# database connection

db_password = "UnitCircle42!"
db_user = "postgres"
db_name = "dot"
endpoint = "awakedb.cre3f7yk1unp.us-west-1.rds.amazonaws.com"

connection_string = f"postgresql://{db_user}:{db_password}@{endpoint}:5432/{db_name}"
engine = create_engine(connection_string)

In [None]:
# UPLOAD AND CLEAN DIRECT

unl_download = r"C:\Users\mikej\Downloads\SalesEnquiryList - 2024-03-30T063201.166.xlsx" ### file download from unleashed
add_new_unl(unl_download)  ## adds new direct purchases to raw database
clean_unleashed()          ## clean raw direct purchases and add to clean directdb

In [8]:
# UPLOAD AND CLEAN DOT
engine = create_engine(connection_string)
dot_download = r"C:\Users\mikej\Downloads\SHOP Supplier Reporting - Invoice Details Excel - 2024-04-16T175756.609.xlsx"  ### file download from dot
# add_new_dot(dot_download)  ## adds new indirect purchases to raw database
# clean_dot()                ## clean raw indirect purchases and add to clean indirect db

In [None]:
# COMBINE clean_direct & clean_indirect POSTGRES TABLES TO GET NEW TABLE level_2 (or tRUE Sales)

get_level_2()              ## combine clean direct/indirect and populate level_2 db  --  dot invisible

In [None]:
## copy postgres tables into CSV's for local use below

In [None]:
# create level_1.csv 
level_1 = pd.read_sql("SELECT * FROM unleashed_clean WHERE completed_date > '2022-12-31';", con = engine)

level_1.completed_date = pd.to_datetime(level_1.completed_date)
level_1['usd'] = level_1['sub_total']*.75

In [None]:
### create level_2.csv
level_2 = pd.read_sql("SELECT * FROM level_2 WHERE date > '2021-12-31';", con = engine)

level_2 = level_2[level_2.market_segment != 'Samples']
level_2.date = pd.to_datetime(level_2.date)

In [None]:
print("Latest Dates")
print(f"Direct: {level_1.completed_date.max()}")
print(f"tRUE:   {level_2.date.max()}")

In [None]:
# send csv's to data folder

level_1.to_csv(r"C:/Users/mikej/Desktop/cpg-sales/data/level_1.csv", index=False)
level_2.to_csv(r"C:/Users/mikej/Desktop/cpg-sales/data/level_2.csv", index=False)

In [None]:
# level_1_tableau

l1_tableau = level_1.drop(columns=['month','year'])
l1_tableau.columns = ['Customer Name', 'Invoice Date','Item Full Description', 'Quantity','Sub Total', 'Dollars', 'Table','Market Segment', 'Parent Customer']

l1_tableau['Table'] = 'Unleashed'

In [None]:
# level_2_tableau

# fix columns to mimic tableau download data output
l2_tableau = level_2.drop(columns=['qty','cad','month','year'])
l2_tableau.columns = ['Invoice Date', 'Sale Origin', 'Market Segment', 'Parent Customer', 'Customer', 'Item Full Description', 'Dollars', ]

# add vistar retail y/n and Sales Goal columns
l2_tableau['Vistar Retail'] = np.where(l2_tableau['Parent Customer'] == 'Vistar Retail', str('Yes'), str('No'))
l2_tableau['Sales Goal'] = l2_tableau.Dollars*1.5

In [None]:
# send tableau-ready tables to data folder

l2_tableau.to_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\level_2_tableau.csv", index=False)
l1_tableau.to_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\level_1_tableau.csv", index=False)

In [None]:
### check Direct Sales (level_1) daily

l1_bar_df = level_1.groupby('completed_date')['usd'].sum().reset_index().set_index('completed_date')
l1_bar_df =round(l1_bar_df[l1_bar_df.index>'2024-02-29']).sort_index()

level_1_bar = px.bar(l1_bar_df,
                     y='usd',
                     labels={'usd':'',
                             'completed_date':''},
                     height=325,
                     text_auto=",.2s").update_traces(textposition='outside')

level_1_bar.show()

In [None]:
### check tRUE Sales (level_2) daily

px.bar(level_2[level_2.date>'2024-02-29'].set_index('date').groupby(pd.Grouper(freq='d'))['usd'].sum(),
        y='usd',
        labels={'usd':'$USD','date':''},
        text_auto=",.2s",
        title='Daily tRUE Sales in USD',
        height=400).update_traces(textposition='outside')

In [None]:
### check tRUE Sales (level_2) all years

px.bar(level_2.set_index('date').groupby(pd.Grouper(freq='M'))['usd'].sum(),
        y='usd',
        labels={'usd':'$USD','date':''},
        text_auto=",.2s",
        title='Monthly tRUE Sales in USD',
        height=400).update_traces(textposition='outside')