In [2]:
# dependencies

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import datetime
import plotly.express as px

from sqlalchemy import create_engine

pd.set_option('display.max_columns', None)

In [4]:
# database connection

db_password = "UnitCircle42!"
db_user = "postgres"
db_name = "dot"
endpoint = "awakedb.cre3f7yk1unp.us-west-1.rds.amazonaws.com"

connection_string = f"postgresql://{db_user}:{db_password}@{endpoint}:5432/{db_name}"
engine = create_engine(connection_string)

In [5]:
# READ NEW DATA, FIX HEADERS/FOOTERS, DATATYPES, TAKE A LOOK

file_path = r"C:\Users\mikej\Downloads\SalesEnquiryList (99).xlsx"
new_unl_raw = pd.read_excel(file_path,header=2)

new_unl_raw = new_unl_raw.loc[:, ~new_unl_raw.columns.str.match('Unnamed')]     #remove 'Unnamed' column
new_unl_raw.drop(new_unl_raw.tail(1).index,inplace=True)                        #remove final row where vendor totals the Sub Total column

new_unl_raw.columns = ['order_num','order_date','req_date','completed_date',     #rename columns
                       'warehouse','customer_name','customer_type','product',
                       'product_group','status','quantity','sub_total']

new_unl_raw['quantity'] = new_unl_raw['quantity'].astype('float')
new_unl_raw['sub_total'] = new_unl_raw['sub_total'].astype('float')

print(f"{len(new_unl_raw)} new rows")
print(f"Dates: {new_unl_raw['completed_date'].dt.date.min()} thru {new_unl_raw['completed_date'].dt.date.max()}")
print(" ")

new_unl_raw.info()

29 new rows
Dates: 2024-01-17 thru 2024-01-17
 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29 entries, 0 to 28
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   order_num       29 non-null     object        
 1   order_date      29 non-null     datetime64[ns]
 2   req_date        29 non-null     datetime64[ns]
 3   completed_date  29 non-null     datetime64[ns]
 4   warehouse       29 non-null     object        
 5   customer_name   29 non-null     object        
 6   customer_type   26 non-null     object        
 7   product         29 non-null     object        
 8   product_group   29 non-null     object        
 9   status          29 non-null     object        
 10  quantity        29 non-null     float64       
 11  sub_total       29 non-null     float64       
dtypes: datetime64[ns](3), float64(2), object(7)
memory usage: 2.8+ KB


In [4]:
## LIKE WHAT YOU SEE? PUSH TO CLEAN DATABASE

new_unl_raw.to_sql('unleashed_raw', engine, if_exists='append', index=False)       #send to database

29

In [6]:
# READ RAW DATA FROM POSTGRES, ADD COLUMNS 

unleashed_raw = pd.read_sql("""
                            SELECT completed_date, customer_name, product, quantity, sub_total 
                            FROM unleashed_raw
                            ;""", 
                            con = engine)

# unleashed_raw.drop(columns=['order_num','order_date','req_date','warehouse','customer_type','product_group','status'],inplace=True)

## add $ USD columns converted from CAD
unleashed_raw['usd'] = unleashed_raw['sub_total']*.75
## add origin of sale (dot or unleashed)
unleashed_raw['sale_origin'] = 'unl'

# assign market segments to each parent customer
segment_table = pd.read_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\customer_table.csv",usecols=('customer','market_segment')).set_index('customer')
unleashed_raw.set_index('customer_name',inplace=True)
unleashed_raw = unleashed_raw.merge(segment_table, how='left',left_index=True,right_index=True)

# assign parent customers to customers
cus_table = pd.read_csv(r"C:\Users\mikej\Desktop\cpg-sales\data\customer_table.csv",usecols=('customer','parent_customer')).set_index('customer')
unleashed_raw = unleashed_raw.merge(cus_table, how='left',left_index=True,right_index=True).reset_index()

# add year/month columns
unleashed_raw['completed_date'] = pd.to_datetime(unleashed_raw['completed_date'])
year_col = unleashed_raw.set_index(['completed_date']).index.year
month_col = unleashed_raw.set_index(['completed_date']).index.month_name()

# month & year columns
unleashed_raw.insert(0,"month", month_col)
unleashed_raw.insert(1,"year", year_col)

unleashed_raw['completed_date'] = unleashed_raw['completed_date'].dt.date

print("  ")
print(unleashed_raw.completed_date.min())
print("thru")
print(unleashed_raw.completed_date.max())
print("  ")
print(f"Rows: {len(unleashed_raw)}")
unleashed_raw.info()
# unleashed_raw.head(3)

  
2018-01-05
thru
2024-01-17
  
Rows: 197182
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 197182 entries, 0 to 197181
Data columns (total 11 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   month            197182 non-null  object 
 1   year             197182 non-null  int32  
 2   customer_name    197182 non-null  object 
 3   completed_date   197182 non-null  object 
 4   product          197182 non-null  object 
 5   quantity         197182 non-null  float64
 6   sub_total        197182 non-null  float64
 7   usd              197182 non-null  float64
 8   sale_origin      197182 non-null  object 
 9   market_segment   195631 non-null  object 
 10  parent_customer  195631 non-null  object 
dtypes: float64(3), int32(1), object(7)
memory usage: 15.8+ MB


In [9]:
##  -- SEND TO UNLEASHED CLEAN DATABASE

unleashed_raw.to_sql('unleashed_clean', engine, if_exists='replace', index=False)

182

In [10]:
## read in unleashed_clean

unleashed_clean = pd.read_sql('SELECT * FROM unleashed_clean;', con = engine)

print(unleashed_clean.completed_date.min())
print(unleashed_clean.completed_date.max())
print(f"Rows: {len(unleashed_clean)}")
unleashed_clean.head(3)

2018-01-05
2024-01-17
Rows: 197182


Unnamed: 0,month,year,customer_name,completed_date,product,quantity,sub_total,usd,sale_origin,market_segment,parent_customer
0,January,2018,1999,2018-01-19,Shopify Shipping cost,1.0,6.39,4.7925,unl,Other,1999
1,January,2018,1999,2018-01-19,Cinnamon Bun Granola Bars 16 Pack,1.0,28.74,21.555,unl,Other,1999
2,January,2018,ACME,2018-01-12,Shopify Shipping cost,1.0,6.39,4.7925,unl,Other,ACME
