In [1]:
# dependencies

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import datetime
import plotly.express as px

from sqlalchemy import create_engine

pd.set_option('display.max_columns', None)

In [8]:
# database connection

db_password = "UnitCircle42!"
db_user = "postgres"
db_name = "dot"
endpoint = "awakedb.cre3f7yk1unp.us-west-1.rds.amazonaws.com"

connection_string = f"postgresql://{db_user}:{db_password}@{endpoint}:5432/{db_name}"
engine = create_engine(connection_string)

In [9]:
dot = pd.read_sql('SELECT * FROM invoice_clean;', con = engine)
unl = pd.read_sql('SELECT * FROM unleashed_clean;', con = engine)

unl.columns = ['month','year','customer','date','item','qty','cad','usd','sale_origin','market_segment','parent_customer']
dot.columns = ['month','year','customer','date','item','qty','usd','cad','sale_origin','parent_customer','market_segment']

In [14]:
## concat datasets

lvl2 = pd.concat([dot,unl]).sort_values(by='date',ascending=False).reset_index(drop=True)
lvl2 = lvl2[lvl2.customer != 'DOT Foods, Inc.']
lvl2 = lvl2.convert_dtypes()
lvl2.sample(3)

Unnamed: 0,month,year,customer,date,item,qty,usd,cad,sale_origin,parent_customer,market_segment
123324,May,2021,Amazon SC,2021-05-24,50ct Change Maker - AWAKE Chocolate Bites USA ...,1.0,30.1725,40.23,unl,Amazon,Online
99559,November,2021,Amazon SC FBA,2021-11-30,50ct Change Maker - AWAKE Chocolate Bites USA ...,1.0,30.93,41.24,unl,Amazon,Online
249815,October,2017,SYSCO/BOSTON,2017-10-02,AWAKE CHOCOLATE AWAKE CAFF DARK CHOC- 4X12PK M...,1.0,52.32,69.59,dot,Sysco,Broadline Distributor


In [15]:
# ORDER COLUMNS TO MATCH POSTGRES DATABASE

new_order = ['date', 'sale_origin', 'market_segment', 'parent_customer', 'customer', 'item', 'qty', 'usd', 'cad','month','year']

for i,col in enumerate(new_order):
    tmp = lvl2[col]
    lvl2.drop(labels=[col],axis=1,inplace=True)
    lvl2.insert(i,col,tmp) 

lvl2.head(2)

Unnamed: 0,date,sale_origin,market_segment,parent_customer,customer,item,qty,usd,cad,month,year
0,2024-01-19,dot,Alternate Retail,Vistar Retail,VISTAR-RETAIL WEST,AWAKE CHOCOLATE AWAKE CAFF DARK CHOC- 6X12PK M...,1.0,85.54,113.77,January,2024
1,2024-01-19,dot,Alternate Retail,Tropical,TROPICAL FOODS--GA,AWAKE CHOCOLATE AWAKE CHOC CARAMEL- 6X12PK MASTER,1.0,85.54,113.77,January,2024


In [16]:
##### CAREFUL!!!! REPLACES ENTIRE CLEAN LEVEL_2 DATABASE

lvl2.to_sql('level_2', engine, if_exists='replace', index=False)

319

In [2]:
##  ------ READ ALL DATA INTO APPLICATION

def get_data_from_csv():
    # df = pd.read_sql("""
    #         SELECT * 
    #         FROM level_2
    #         WHERE date > '2019-12-31'
    #         """
    #         ,con = engine)
    df = pd.read_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\all_sales_data.csv")
    return df
df = get_data_from_csv()

### MASTER DATA ###
all_sales = df.copy()

In [47]:
all_sales = all_sales.convert_dtypes()

# # invoice date cleanup
all_sales['date'] = pd.to_datetime(all_sales['date'])
all_sales['date'] = all_sales['date'].dt.normalize()
all_sales['date'] = all_sales['date'].dt.floor('D')
all_sales.sort_values(by='usd',ascending=False,inplace=True)
all_sales.info()

<class 'pandas.core.frame.DataFrame'>
Index: 196662 entries, 141999 to 48742
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   date             196662 non-null  datetime64[ns]
 1   sale_origin      196662 non-null  string        
 2   market_segment   196662 non-null  string        
 3   parent_customer  196662 non-null  string        
 4   customer         196662 non-null  string        
 5   item             196662 non-null  string        
 6   qty              196662 non-null  Float64       
 7   usd              196662 non-null  Float64       
 8   cad              196662 non-null  Float64       
 9   month            196662 non-null  string        
 10  year             196662 non-null  Int64         
dtypes: Float64(3), Int64(1), datetime64[ns](1), string(6)
memory usage: 18.8 MB


In [61]:
# user query

year = sorted(list(all_sales['year'].unique()))
segment = np.array(all_sales['market_segment'].unique())

df = all_sales[(all_sales['year'].isin(year)) & (all_sales['market_segment'].isin(segment))]
               
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 196662 entries, 141999 to 48742
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype         
---  ------           --------------   -----         
 0   date             196662 non-null  datetime64[ns]
 1   sale_origin      196662 non-null  string        
 2   market_segment   196662 non-null  string        
 3   parent_customer  196662 non-null  string        
 4   customer         196662 non-null  string        
 5   item             196662 non-null  string        
 6   qty              196662 non-null  Float64       
 7   usd              196662 non-null  Float64       
 8   cad              196662 non-null  Float64       
 9   month            196662 non-null  string        
 10  year             196662 non-null  Int64         
dtypes: Float64(3), Int64(1), datetime64[ns](1), string(6)
memory usage: 18.8 MB


In [62]:
df.market_segment.unique()

<StringArray>
[              'Vending',               'Grocery',      'Alternate Retail',
                'Canada',                'Online',                 'Other',
           'Convenience', 'Broadline Distributor',               'Samples']
Length: 9, dtype: string

In [None]:
fig_mth_bar = px.bar(df,
        template='plotly_white',
        x= df.index,
        y='usd',
        color='usd',
        color_continuous_scale=px.colors.sequential.Oranges,
        labels = {'date':' ','usd':'<b>$USD</b>'},
        text='usd',
        opacity=.8,
        hover_data=['usd'],
        title=' ',
        height=400
        ).update_coloraxes(showscale=False)
fig_mth_bar.update_traces(texttemplate='<b>%{text:$,}</b>',hovertext=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
fig_mth_bar.update_layout(title_x=0.5,hovermode="x")
# fig_mth_bar.update_xaxes(tickmode='array',tickvals = df.index, ticktext=df.index.month_name())
fig_mth_bar.update_yaxes(tick0=0,dtick=250000)#,showticklabels=False)

In [None]:
fig_mth_bar.show()

In [None]:
import plotly.express as px

In [None]:
# df['month'] = pd.to_datetime(df['month'])
# df['year'] = pd.to_datetime(df['year'])

df['date'] = pd.to_datetime(df['date'])
df['monthy'] = df['date'].dt.month
df['yeary'] = df['date'].dt.year
df = df.sort_values(by=['yeary','monthy'])
# df.groupby(pd.Grouper(freq='M'))['usd'].sum()


# px.bar(df,
#        # x = 'date',
#        y = 'usd')

In [None]:
bar_df = df.set_index('date').groupby(pd.Grouper(freq='M'))['usd'].sum()
px.bar(bar_df,
        template='plotly_white',
        y='usd',
        color='usd',
        color_continuous_scale=px.colors.sequential.Oranges,
        labels = {'date':' ','usd':'<b>$USD</b>'},
        text='usd',
        opacity=.8,
        hover_data=['usd'],
        title=' ',
        height=400
        )

In [None]:
# df = pd.DataFrame(bar_df).reset_index()
# df.date = df.date.astype('category')

px.bar(bar_df.reset_index(),
       x = 'date',
       y = 'usd',
       color=df.date)