In [25]:
# dependencies

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import datetime
import plotly.express as px

from sqlalchemy import create_engine

pd.set_option('display.max_columns', None)

In [26]:
# database connection

db_password = "UnitCircle42!"
db_user = "postgres"
db_name = "dot"
endpoint = "awakedb.cre3f7yk1unp.us-west-1.rds.amazonaws.com"

connection_string = f"postgresql://{db_user}:{db_password}@{endpoint}:5432/{db_name}"
engine = create_engine(connection_string)

In [27]:
dot = pd.read_sql('SELECT * FROM invoice_clean;', con = engine)
unl = pd.read_sql('SELECT * FROM unleashed_clean;', con = engine)

unl.columns = ['month','year','customer','date','item','qty','cad','usd','sale_origin','market_segment','parent_customer']
dot.columns = ['month','year','customer','date','item','qty','usd','cad','sale_origin','parent_customer','market_segment']

In [29]:
# fix columns in dot table to match unl table

new_order = ['month', 'year', 'date', 'customer', 'item',
       'qty', 'cad', 'usd', 'sale_origin', 'market_segment',
       'parent_customer']

for column in new_order:
    dot[column] = dot.pop(column)

Unnamed: 0,month,year,date,customer,item,qty,cad,usd,sale_origin,market_segment,parent_customer
0,February,2017,2017-02-16,A & R WHOLESALE DNU,AWAKE CHOCOLATE AWAKE CAFF MILK CHOC- 6X12PK M...,1.0,104.38,78.48,dot,Broadline Distributor,A&R Wholesale
1,September,2017,2017-09-15,A.C.C. WHOLESALE,AWAKE 1.41OZ ENERGY GRANOLA BARS 4 X 16CT MAST...,1.0,92.78,69.76,dot,Convenience,ACC Wholesale
2,August,2017,2017-08-04,A.E. WEASE INC.,CAFFEINATED CHOCOLATE BITES SINGLES- MILK,7.0,595.84,448.00,dot,Convenience,A.E. Wease
3,August,2017,2017-08-04,A.E. WEASE INC.,CAFFEINATED CHOCOLATE BITES SINGLES- CARAMEL,7.0,595.84,448.00,dot,Convenience,A.E. Wease
4,August,2017,2017-08-18,A.E. WEASE INC.,CAFFEINATED CHOCOLATE BITES SINGLES- MILK,1.0,85.12,64.00,dot,Convenience,A.E. Wease
...,...,...,...,...,...,...,...,...,...,...,...
67457,October,2023,2023-10-25,ZORO,AWAKE CHOCOLATE AWAKE CAFF DARK CHOC- 6X12PK M...,1.0,114.85,86.35,dot,Online Distributor,Zoro
67458,October,2023,2023-10-25,ZORO,CAFFEINATED CHOCOLATE BITES SINGLES DARK,1.0,189.29,142.32,dot,Online Distributor,Zoro
67459,November,2023,2023-11-07,ZORO,AWAKE CHOCOLATE AWAKE CHOC CARAMEL- 6X12PK MASTER,1.0,114.98,86.45,dot,Online Distributor,Zoro
67460,November,2023,2023-11-08,ZORO,CAFFEINATED CHOCOLATE BITES SINGLES PB CHOC,1.0,189.29,142.32,dot,Online Distributor,Zoro


In [30]:
## concat datasets

lvl2 = pd.concat([dot,unl]).sort_values(by='date',ascending=False).reset_index(drop=True)
lvl2 = lvl2[lvl2.customer != 'DOT Foods, Inc.']
lvl2 = lvl2.convert_dtypes()
lvl2.sample(3)

Unnamed: 0,month,year,date,customer,item,qty,cad,usd,sale_origin,market_segment,parent_customer
166591,August,2020,2020-08-31,Amazon FBM,50ct Change Maker - AWAKE Chocolate Bites USA ...,1.0,22.13,16.5975,unl,Online Distributor,Amazon
89038,February,2022,2022-02-28,Amazon SC FBA,50ct Change Maker - AWAKE Chocolate Bites USA ...,1.0,39.99,29.9925,unl,Online Distributor,Amazon
11571,October,2023,2023-10-25,Amazon SC,50ct Pouch - Dark Choc Bites,1.0,46.65,34.9875,unl,Online Distributor,Amazon


In [31]:
new_order = ['date', 'sale_origin', 'market_segment', 'parent_customer', 'customer', 'item', 'qty', 'usd', 'cad','month','year']

for i,col in enumerate(new_order):
    tmp = lvl2[col]
    lvl2.drop(labels=[col],axis=1,inplace=True)
    lvl2.insert(i,col,tmp) 

lvl2.head(2)

Unnamed: 0,date,sale_origin,market_segment,parent_customer,customer,item,qty,usd,cad,month,year
0,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Dark Choc Bites,1.0,34.9875,46.65,January,2024
1,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Mint Choc Bites,1.0,34.9875,46.65,January,2024


In [32]:
##### CAREFUL!!!! REPLACES ENTIRE CLEAN LEVEL_2 DATABASE

lvl2.to_sql('level_2', engine, if_exists='replace', index=False)

93

In [36]:
##  ------ READ ALL DATA INTO APPLICATION

def get_data_from_csv():
    df = pd.read_sql("""
            SELECT * 
            FROM level_2
            WHERE year > '2020'
            """
            ,con = engine)
    return df
df = get_data_from_csv()

### MASTER DATA ###
all_sales = df.copy()
all_sales

Unnamed: 0,date,sale_origin,market_segment,parent_customer,customer,item,qty,usd,cad,month,year
0,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Dark Choc Bites,1.0,34.9875,46.65,January,2024
1,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Mint Choc Bites,1.0,34.9875,46.65,January,2024
2,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Dark Choc Bites,1.0,34.9875,46.65,January,2024
3,2024-01-17,dot,Alternate Retail,Vistar Retail,VISTAR-RETAIL CENTRAL,AWAKE CHOCOLATE AWAKE CAFF MILK CHOC- 6X12PK M...,13.0,1112.0200,1478.99,January,2024
4,2024-01-17,unl,Online Distributor,Amazon,Amazon SC,50ct Pouch - Dark Choc Bites,1.0,34.9875,46.65,January,2024
...,...,...,...,...,...,...,...,...,...,...,...
144282,2021-01-04,unl,Online Direct,Shopify,Shopify Customer - AWAKE Energy USA,Shopify Shipping cost,1.0,4.9725,6.63,January,2021
144283,2021-01-04,unl,Online Direct,Shopify,Shopify Customer - AWAKE Energy USA,Milk Chocolate Bars 12 Pack,1.0,19.8900,26.52,January,2021
144284,2021-01-04,unl,Online Direct,Shopify,Shopify Customer - AWAKE Energy USA,Shopify Shipping cost,1.0,4.9725,6.63,January,2021
144285,2021-01-04,unl,Online Direct,Shopify,Shopify Customer - AWAKE Energy USA,Shopify Shipping cost,1.0,4.9725,6.63,January,2021


In [37]:
all_sales.groupby('year')['usd'].sum()

year
2021    5.139914e+06
2022    7.467697e+06
2023    9.614741e+06
2024    2.751878e+05
Name: usd, dtype: float64