In [234]:
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None) 

In [235]:
file_path = "TushyCartonisationAnalysis_recreate.xlsx"
excel_sheets = pd.ExcelFile(file_path).sheet_names

In [236]:
invoice_df = pd.read_excel(file_path, sheet_name='Raw Baja Invoices')

invoice_df['Charge'] = invoice_df['Charge'].str.upper()
invoice_df_drop_columns = ['Reference','Entry Date', 'Entry Number', 'Customs Value Currency']
invoice_df.drop(columns=invoice_df_drop_columns,axis=1,inplace=True)
invoice_df['id'] = range(1, len(invoice_df) + 1)

invoice_df_data_types = {"Order Number":"str"}
invoice_df = invoice_df.astype(invoice_df_data_types)
invoice_df['Transaction Date'] = pd.to_datetime(invoice_df['Transaction Date'], unit='D', origin='1899-12-30').dt.strftime('%Y-%m-%d')

In [237]:
invoice_df.columns

Index(['Invoice', 'Invoice Date', 'Order ID', 'Order Number', 'Profile',
       'Carrier', 'Transaction Date', 'Tracking Number', 'Service Type',
       'Charge', 'Package Quantity', 'Packaging Type', 'Entered Weight (LB)',
       'Billed Weight (LB)', 'Dim Length', 'Dim Width', 'Dim Height', 'Zone',
       'Origin State', 'Origin Country', 'Receiver Name', 'Receiver Company',
       'Receiver Address Line 1', 'Receiver Address Line 2', 'Receiver City',
       'Receiver State', 'Receiver Zip', 'Receiver Country', 'Customs Value',
       'Duty & Taxes', 'Bill Amount', 'id'],
      dtype='object')

In [238]:
invoice_df.dtypes

Invoice                             int64
Invoice Date               datetime64[ns]
Order ID                            int64
Order Number                       object
Profile                            object
Carrier                            object
Transaction Date                   object
Tracking Number                    object
Service Type                       object
Charge                             object
Package Quantity                  float64
Packaging Type                     object
Entered Weight (LB)               float64
Billed Weight (LB)                float64
Dim Length                        float64
Dim Width                         float64
Dim Height                        float64
Zone                               object
Origin State                       object
Origin Country                     object
Receiver Name                      object
Receiver Company                   object
Receiver Address Line 1            object
Receiver Address Line 2           

In [239]:
invoice_df.head()

Unnamed: 0,Invoice,Invoice Date,Order ID,Order Number,Profile,Carrier,Transaction Date,Tracking Number,Service Type,Charge,Package Quantity,Packaging Type,Entered Weight (LB),Billed Weight (LB),Dim Length,Dim Width,Dim Height,Zone,Origin State,Origin Country,Receiver Name,Receiver Company,Receiver Address Line 1,Receiver Address Line 2,Receiver City,Receiver State,Receiver Zip,Receiver Country,Customs Value,Duty & Taxes,Bill Amount,id
0,33477,2025-03-20,576899052,#1442861,default,UPS,2025-02-11,1ZAC8375YW34252402,UPS SurePost - 1 lb or Greater,TRANSPORTATION,1.0,PKG,4.1,5.0,19.0,9.0,5.0,7,TX,US,Dana Wallace,"TUSHY, Inc",10813 NW 30TH ST STE 115,,DORAL,FL,331922147,US,0.0,0.0,9.8,1
1,33477,2025-03-20,576899052,#1442861,default,UPS,2025-02-11,1ZAC8375YW34252402,UPS SurePost - 1 lb or Greater,FUEL SURCHARGE,1.0,PKG,4.1,5.0,19.0,9.0,5.0,7,TX,US,Dana Wallace,"TUSHY, Inc",10813 NW 30TH ST STE 115,,DORAL,FL,331922147,US,0.0,0.0,1.23,2
2,33477,2025-03-20,581683521,#1446209,default,UPS,2025-02-24,1ZAC8375YW14943702,UPS SurePost - 1 lb or Greater,TRANSPORTATION,1.0,PKG,2.1,3.0,18.0,10.0,4.0,7,TX,US,,Jacob Vosper,22 DORLAND AVE,,POUGHKEEPSIE,NY,126036404,US,0.0,0.0,8.9,3
3,33477,2025-03-20,581683521,#1446209,default,UPS,2025-02-24,1ZAC8375YW14943702,UPS SurePost - 1 lb or Greater,FUEL SURCHARGE,1.0,PKG,2.1,3.0,18.0,10.0,4.0,7,TX,US,,Jacob Vosper,22 DORLAND AVE,,POUGHKEEPSIE,NY,126036404,US,0.0,0.0,1.12,4
4,33477,2025-03-20,582604555,#1446902,default,UPS,2025-02-26,1ZAC83750301909861,Ground Residential,TRANSPORTATION,1.0,PKG,0.1,18.0,23.0,21.0,8.0,7,TX,US,,Alisha Sare,277 SANTA ROSA AVE,PUSH 3 ON THE HILAVATOR OR TAKE,SAUSALITO,CA,949652036,US,0.0,0.0,16.86,5


In [240]:
charge_df = pd.read_excel(file_path, sheet_name='Charge Map')
charge_df['Charge'] = charge_df['Charge'].str.upper()
charge_map_df = charge_df[["Charge","Map"]]

In [241]:
invoice_charge_df = invoice_df.merge(charge_map_df, on='Charge', how='inner')

In [242]:
#missing Charge "Return To Sender - Web Request" and map "Returns"
invoice_charge_df["Map"].unique()

array(['Freight', 'Fuel', 'Residential', 'Surcharge', 'Freight Adj',
       'Fuel Adj', 'Surcharge Adj', 'Duties/Tax'], dtype=object)

In [243]:
invoice_Flatten = invoice_charge_df.copy()
unique_maps = invoice_Flatten['Map'].unique()
for map_value in unique_maps:
    invoice_Flatten[map_value] = invoice_Flatten.apply(
        lambda row: row['Bill Amount'] if row['Map'] == map_value else 0, axis=1
    )

In [244]:
invoice_Flatten.head()

Unnamed: 0,Invoice,Invoice Date,Order ID,Order Number,Profile,Carrier,Transaction Date,Tracking Number,Service Type,Charge,Package Quantity,Packaging Type,Entered Weight (LB),Billed Weight (LB),Dim Length,Dim Width,Dim Height,Zone,Origin State,Origin Country,Receiver Name,Receiver Company,Receiver Address Line 1,Receiver Address Line 2,Receiver City,Receiver State,Receiver Zip,Receiver Country,Customs Value,Duty & Taxes,Bill Amount,id,Map,Freight,Fuel,Residential,Surcharge,Freight Adj,Fuel Adj,Surcharge Adj,Duties/Tax
0,33477,2025-03-20,576899052,#1442861,default,UPS,2025-02-11,1ZAC8375YW34252402,UPS SurePost - 1 lb or Greater,TRANSPORTATION,1.0,PKG,4.1,5.0,19.0,9.0,5.0,7,TX,US,Dana Wallace,"TUSHY, Inc",10813 NW 30TH ST STE 115,,DORAL,FL,331922147,US,0.0,0.0,9.8,1,Freight,9.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,33477,2025-03-20,576899052,#1442861,default,UPS,2025-02-11,1ZAC8375YW34252402,UPS SurePost - 1 lb or Greater,FUEL SURCHARGE,1.0,PKG,4.1,5.0,19.0,9.0,5.0,7,TX,US,Dana Wallace,"TUSHY, Inc",10813 NW 30TH ST STE 115,,DORAL,FL,331922147,US,0.0,0.0,1.23,2,Fuel,0.0,1.23,0.0,0.0,0.0,0.0,0.0,0.0
2,33477,2025-03-20,581683521,#1446209,default,UPS,2025-02-24,1ZAC8375YW14943702,UPS SurePost - 1 lb or Greater,TRANSPORTATION,1.0,PKG,2.1,3.0,18.0,10.0,4.0,7,TX,US,,Jacob Vosper,22 DORLAND AVE,,POUGHKEEPSIE,NY,126036404,US,0.0,0.0,8.9,3,Freight,8.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,33477,2025-03-20,581683521,#1446209,default,UPS,2025-02-24,1ZAC8375YW14943702,UPS SurePost - 1 lb or Greater,FUEL SURCHARGE,1.0,PKG,2.1,3.0,18.0,10.0,4.0,7,TX,US,,Jacob Vosper,22 DORLAND AVE,,POUGHKEEPSIE,NY,126036404,US,0.0,0.0,1.12,4,Fuel,0.0,1.12,0.0,0.0,0.0,0.0,0.0,0.0
4,33477,2025-03-20,582604555,#1446902,default,UPS,2025-02-26,1ZAC83750301909861,Ground Residential,TRANSPORTATION,1.0,PKG,0.1,18.0,23.0,21.0,8.0,7,TX,US,,Alisha Sare,277 SANTA ROSA AVE,PUSH 3 ON THE HILAVATOR OR TAKE,SAUSALITO,CA,949652036,US,0.0,0.0,16.86,5,Freight,16.86,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [245]:
invoice_charge_df.columns

Index(['Invoice', 'Invoice Date', 'Order ID', 'Order Number', 'Profile',
       'Carrier', 'Transaction Date', 'Tracking Number', 'Service Type',
       'Charge', 'Package Quantity', 'Packaging Type', 'Entered Weight (LB)',
       'Billed Weight (LB)', 'Dim Length', 'Dim Width', 'Dim Height', 'Zone',
       'Origin State', 'Origin Country', 'Receiver Name', 'Receiver Company',
       'Receiver Address Line 1', 'Receiver Address Line 2', 'Receiver City',
       'Receiver State', 'Receiver Zip', 'Receiver Country', 'Customs Value',
       'Duty & Taxes', 'Bill Amount', 'id', 'Map'],
      dtype='object')

In [246]:
invoice_Flatten.columns

Index(['Invoice', 'Invoice Date', 'Order ID', 'Order Number', 'Profile',
       'Carrier', 'Transaction Date', 'Tracking Number', 'Service Type',
       'Charge', 'Package Quantity', 'Packaging Type', 'Entered Weight (LB)',
       'Billed Weight (LB)', 'Dim Length', 'Dim Width', 'Dim Height', 'Zone',
       'Origin State', 'Origin Country', 'Receiver Name', 'Receiver Company',
       'Receiver Address Line 1', 'Receiver Address Line 2', 'Receiver City',
       'Receiver State', 'Receiver Zip', 'Receiver Country', 'Customs Value',
       'Duty & Taxes', 'Bill Amount', 'id', 'Map', 'Freight', 'Fuel',
       'Residential', 'Surcharge', 'Freight Adj', 'Fuel Adj', 'Surcharge Adj',
       'Duties/Tax'],
      dtype='object')

In [247]:
#invoice_Flatten = invoice_Flatten[invoice_Flatten[['Dim Length', 'Dim Width', 'Dim Height']].notnull().any(axis=1)]
#invoice_Flatten = invoice_Flatten[invoice_Flatten[['Dim Length', 'Dim Width', 'Dim Height']].notnull().any(axis=1)]

In [248]:
grouped_df = invoice_Flatten.groupby(
    ['Order Number', 'Tracking Number', 'Origin State', 'Transaction Date', 'Receiver Zip', 'Carrier', 'Service Type'],
    as_index=False
).agg({
    'Freight': 'sum',
    'Fuel': 'sum',
    'Residential': 'sum',
    'Surcharge': 'sum',
    'Freight Adj': 'sum',
    'Fuel Adj': 'sum',
    'Surcharge Adj': 'sum',
    'Duties/Tax': 'sum',
    'Entered Weight (LB)': 'max',
    'Billed Weight (LB)': 'max',
    'Dim Length': 'max',
    'Dim Width': 'max',
    'Dim Height': 'max',
})

In [249]:
grouped_df['F+F+R'] = grouped_df['Freight'] + grouped_df['Fuel'] + grouped_df['Residential']
grouped_df['Bill Amount'] = grouped_df[['Freight', 'Freight Adj', 'Fuel', 'Fuel Adj', 'Residential', 'Surcharge', 'Surcharge Adj', 'Duties/Tax']].sum(axis=1)

In [250]:
grouped_df.head()

Unnamed: 0,Order Number,Tracking Number,Origin State,Transaction Date,Receiver Zip,Carrier,Service Type,Freight,Fuel,Residential,Surcharge,Freight Adj,Fuel Adj,Surcharge Adj,Duties/Tax,Entered Weight (LB),Billed Weight (LB),Dim Length,Dim Width,Dim Height,F+F+R,Bill Amount
0,#1399993,D10016185702604,CA,2025-03-04,92154-7405,OnTrac,C,8.87,0.85,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,,,,9.72,9.72
1,#1421715,4630320365,WA,2025-03-07,V5R 6C3,DHL Express,EXPRESS WORLDWIDE nondoc,0.0,10.9,0.0,51.29,0.0,0.0,0.0,0.0,22.48,22.0,,,,10.9,62.19
2,#1424926-C,3505633670,WA,2025-01-29,V5K 5H7,DHL Express,EXPRESS WORLDWIDE nondoc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22.68,5.26,5.0,,,,0.0,22.68
3,#1427630-C,D10016188685055,CA,2025-03-05,60647-4073,OnTrac,C,19.95,2.9,0.88,9.25,0.0,0.0,0.0,0.0,15.0,18.0,23.0,19.0,8.0,23.73,32.98
4,#1428701,D10016188686475,CA,2025-03-05,28273-4401,OnTrac,C,21.94,3.09,0.88,9.25,0.0,0.0,0.0,0.0,15.0,18.0,19.0,8.0,23.0,25.91,35.16


In [251]:
Flattened_df = pd.read_excel(file_path, sheet_name='Flattened-All Orders',skiprows=1)
grouped_order_list = grouped_df['Order Number'].to_list()
Flattened_group_data = Flattened_df[Flattened_df['Order Number'].isin(grouped_order_list)]
#Flattened_group_data.to_csv("Flattened_group_data.csv", index=False)

In [252]:
Flattened_group_data['Order Number'].value_counts()

Order Number
#1448934(1)      4
#1451227         4
#1448451         3
#1451800         3
#1449801         3
                ..
#1449105         1
#1449107         1
#1449108         1
#1449109         1
EXC-1448436-1    1
Name: count, Length: 2706, dtype: int64

In [253]:
grouped_df[grouped_df['Order Number'].isin(['#1435752'])]

Unnamed: 0,Order Number,Tracking Number,Origin State,Transaction Date,Receiver Zip,Carrier,Service Type,Freight,Fuel,Residential,Surcharge,Freight Adj,Fuel Adj,Surcharge Adj,Duties/Tax,Entered Weight (LB),Billed Weight (LB),Dim Length,Dim Width,Dim Height,F+F+R,Bill Amount
22,#1435752,D10016188981916,CA,2025-03-06,30309-4130,OnTrac,C,47.41,5.54,0.88,9.25,0.0,0.0,0.0,0.0,15.0,47.0,19.0,8.0,23.0,53.83,63.08
23,#1435752,D10016188984671,CA,2025-03-06,30309-4130,OnTrac,C,23.84,3.28,0.88,9.25,0.0,0.0,0.0,0.0,15.0,20.0,19.0,9.0,23.0,28.0,37.25
