In [1]:
#import dependencies
import pandas as pd

In [2]:
#create csv path
stats = "InvoiceStats.csv"

# Read the CSV into a Pandas DataFrame
inv_stats = pd.read_csv(stats)

inv_stats.head()

Unnamed: 0,Billing Group,Invoice Status,Servicing Dealer,Dealer Invoice,Requested Amount,Approved Amount,Invoice Create Date,Invoice Issue?
0,KN,Cancel,60791,V30098805,1050.15,0.0,02-26-2021,No
1,ML,Cancel,60791,V30099281,1835.3,0.0,03-18-2021,No
2,KN,Cancel,60791,V30102014,2685.0,0.0,06-03-2021,No
3,HD,Cancel,60792,V20091751,1152.3,0.0,06-09-2021,No
4,XPO,Cancel,60791,V30102750,6176.3,0.0,06-17-2021,No


## Data Cleaning

In [3]:
#Reorganize columns and drop unnecessary columns
cust_stats = inv_stats[["Servicing Dealer","Billing Group", "Requested Amount"]]

cust_stats.head()

Unnamed: 0,Servicing Dealer,Billing Group,Requested Amount
0,60791,KN,1050.15
1,60791,ML,1835.3
2,60791,KN,2685.0
3,60792,HD,1152.3
4,60791,XPO,6176.3


In [4]:
#Rename Columns
cust_stats = cust_stats.rename(columns={"Billing Group":"Customer", "Servicing Dealer":"Branch", "Requested Amount":"Inv_Amt"})

cust_stats.head()

Unnamed: 0,Branch,Customer,Inv_Amt
0,60791,KN,1050.15
1,60791,ML,1835.3
2,60791,KN,2685.0
3,60792,HD,1152.3
4,60791,XPO,6176.3


In [5]:
#rename branch number to abbreviation
cust_stats = cust_stats.replace({60789:"DEN", 60790:"DEN", 60791:"SLC", 60792:"ABQ"})

cust_stats.head()


Unnamed: 0,Branch,Customer,Inv_Amt
0,SLC,KN,1050.15
1,SLC,ML,1835.3
2,SLC,KN,2685.0
3,ABQ,HD,1152.3
4,SLC,XPO,6176.3


In [6]:
#View all customer abbreviations to replace with full customer name
cust_stats["Customer"].value_counts()

 AZ     1548
 WM      902
 HD      805
 ML      375
 UA      228
 HS      188
 KH      183
 XPO     163
 ITL     160
 SC      147
 ARM     132
 KN      107
 WW       73
 TCI      63
 BZ       53
 MFD      44
 MR       42
 CO       42
 TSI      40
 OD       34
 PCS      32
 HF       32
 XL       32
 AR       29
 HDD      23
 WAY      23
 BKR      23
 COL      21
 GS       19
 IK       10
 LME       8
 FG        4
 BL        3
 PTN       2
Name: Customer, dtype: int64

In [7]:
#replace customer abbreviations with full customer name
cust_stats = cust_stats.replace({" AZ":"Amazon", " WM":"Walmart", " HD":"HomeDepot", " ML":"Medline", " UA":"US_Auto", " HS":"HD_Supply",
                                " KH":"KeHE", " ITL":"HD_Pro", " SC":"SamsClub", " ARM":"Aramsco", " KN":"KuehneNagel", " WW":"Grainger",
                                " TCI":"TireCenters", " BZ":"Bunzl", " MFD":"MattressFirm", " MR":"Lineage", " CO":"Costco", " TSI":"Sealy", 
                                " OD":"OfficeDepot", " PCS":"Petco", " HF":"HarborFreight", " XL":"DHL", " AR":"Americold", " HDD":"HD_Pro",
                                " WAY":"Wayfair", " BKR":"BakerDist", " COL":"CompositesOne", " GS":"Gensco", " IK":"Ikea", " LME":"XPO_LM",
                                " FG":"Ferguson", " BL":"BigLots", " PTN":"Peloton", " XPO":"XPO"})


cust_stats.head()

Unnamed: 0,Branch,Customer,Inv_Amt
0,SLC,KuehneNagel,1050.15
1,SLC,Medline,1835.3
2,SLC,KuehneNagel,2685.0
3,ABQ,HomeDepot,1152.3
4,SLC,XPO,6176.3


## Customer Invoice Stats

In [25]:
#customer inv stats per branch

#ABQ - separate customers for ABQ only
abq_cust = cust_stats[cust_stats.Branch == "ABQ"]
#Sum of invoices per customer group
abq_cust_dollars = abq_cust.groupby(["Customer"]).sum("Inv_Amt")
abq_cust_dollars = abq_cust_dollars["Inv_Amt"]

#Create new DF with customer summary dollars
a_cust_dollars = pd.DataFrame({"Amazon":0, "Americold":0, "Aramsco":[abq_cust_dollars.Aramsco], "BakerDist":[abq_cust_dollars.BakerDist],    
                            "BigLots":0, "Bunzl":[abq_cust_dollars.Bunzl], "CompositesOne":0, "Costco":[abq_cust_dollars.Costco], "DHL":0,
                            "Ferguson":0, "Gensco":0, "Grainger":[abq_cust_dollars.Grainger], "HarborFreight":[abq_cust_dollars.HarborFreight],
                            "HomeDepot":[abq_cust_dollars.HomeDepot], "HD_Pro":0, "HD_Supply":0, "Ikea":0, "KeHE":0, "KuehneNagel":0, 
                            "Lineage":0, "MattressFirm":0, "Medline":0,"OfficeDepot":0, "Peloton":0, "Petco":[abq_cust_dollars.Petco],
                            "SamsClub":0, "Sealy":0, "TireCenters":[abq_cust_dollars.TireCenters], "US_Auto":0, "Walmart":[abq_cust_dollars.Walmart],
                            "Wayfair":0, "XPO":0, "XPO_LM":0})
#rename index w/ branch name
a_cust_dollars = a_cust_dollars.rename({0:"ABQ"})




#DEN
den_cust = cust_stats[cust_stats.Branch == "DEN"]
#Sum of invoices per customer group
den_cust_dollars = den_cust.groupby(["Customer"]).sum("Inv_Amt")
den_cust_dollars = den_cust_dollars["Inv_Amt"]

#Create new DF with customer summary dollars
d_cust_dollars = pd.DataFrame({"Amazon":[den_cust_dollars.Amazon], "Americold":[den_cust_dollars.Americold], "Aramsco":[den_cust_dollars.Aramsco], "BakerDist":[den_cust_dollars.BakerDist],    
                            "BigLots":[den_cust_dollars.BigLots], "Bunzl":0, "CompositesOne":0, "Costco":0, "DHL":0,
                            "Ferguson":[den_cust_dollars.Ferguson], "Gensco":[den_cust_dollars.Gensco], "Grainger":[den_cust_dollars.Grainger], "HarborFreight":[den_cust_dollars.HarborFreight],
                            "HomeDepot":[den_cust_dollars.HomeDepot], "HD_Pro":[den_cust_dollars.HD_Pro], "HD_Supply":[den_cust_dollars.HD_Supply], "Ikea":0, "KeHE":[den_cust_dollars.KeHE], "KuehneNagel":0, 
                            "Lineage":[den_cust_dollars.Lineage], "MattressFirm":[den_cust_dollars.MattressFirm], "Medline":[den_cust_dollars.Medline],"OfficeDepot":[den_cust_dollars.OfficeDepot], "Peloton":[den_cust_dollars.Peloton], "Petco":[den_cust_dollars.Petco],
                            "SamsClub":[den_cust_dollars.SamsClub], "Sealy":[den_cust_dollars.Sealy], "TireCenters":[den_cust_dollars.TireCenters], "US_Auto":[den_cust_dollars.US_Auto], "Walmart":[den_cust_dollars.Walmart],
                            "Wayfair":[den_cust_dollars.Wayfair], "XPO":0, "XPO_LM":[den_cust_dollars.XPO_LM]}) 
#rename index w/ branch name
d_cust_dollars = d_cust_dollars.rename({0:"DEN"})




#SLC
slc_cust = cust_stats[cust_stats.Branch == "SLC"]
#Sum of invoices per customer group
slc_cust_dollars = slc_cust.groupby(["Customer"]).sum("Inv_Amt")
slc_cust_collars = slc_cust_dollars["Inv_Amt"]


#Create new DF with customer summary dollars
s_cust_dollars = pd.DataFrame({"Amazon":[slc_cust_collars.Amazon], "Americold":[slc_cust_collars.Americold], "Aramsco":[slc_cust_collars.Aramsco], "BakerDist":[slc_cust_collars.BakerDist],    
                            "BigLots":[slc_cust_collars.BigLots], "Bunzl":0, "CompositesOne":[slc_cust_collars.CompositesOne], "Costco":0, "DHL":[slc_cust_collars.DHL],
                            "Ferguson":0, "Gensco":0, "Grainger":[den_cust_dollars.Grainger], "HarborFreight":[den_cust_dollars.HarborFreight],
                            "HomeDepot":[slc_cust_collars.HomeDepot], "HD_Pro":[slc_cust_collars.HD_Pro], "HD_Supply":[slc_cust_collars.HD_Supply], "Ikea":[slc_cust_collars.Ikea], "KeHE":0, "KuehneNagel":[slc_cust_collars.KuehneNagel], 
                            "Lineage":[slc_cust_collars.Lineage], "MattressFirm":[slc_cust_collars.MattressFirm], "Medline":[slc_cust_collars.Medline],"OfficeDepot":0, "Peloton":[slc_cust_collars.Peloton], "Petco":[slc_cust_collars.Petco],
                            "SamsClub":[slc_cust_collars.SamsClub], "Sealy":0, "TireCenters":0, "US_Auto":[slc_cust_collars.US_Auto], "Walmart":[slc_cust_collars.Walmart],
                            "Wayfair":0, "XPO":[slc_cust_collars.XPO], "XPO_LM":[slc_cust_collars.XPO_LM]})
#rename index w/ branch name
s_cust_dollars = s_cust_dollars.rename({0:"SLC"})



Unnamed: 0,Amazon,Americold,Aramsco,BakerDist,BigLots,Bunzl,CompositesOne,Costco,DHL,Ferguson,...,Peloton,Petco,SamsClub,Sealy,TireCenters,US_Auto,Walmart,Wayfair,XPO,XPO_LM
ABQ,0,0,3717.8,1046.47,0,6554.66,0,20528.54,0,0,...,0,2665.39,0,0,1537.15,0,15740.43,0,0,0


In [26]:
#Create new DF recombining branches
cust_dollars = [a_cust_dollars, d_cust_dollars, s_cust_dollars]

cust_dollar_summary = pd.concat(cust_dollars)

cust_dollar_summary

Unnamed: 0,Amazon,Americold,Aramsco,BakerDist,BigLots,Bunzl,CompositesOne,Costco,DHL,Ferguson,...,Peloton,Petco,SamsClub,Sealy,TireCenters,US_Auto,Walmart,Wayfair,XPO,XPO_LM
ABQ,0.0,0.0,3717.8,1046.47,0.0,6554.66,0.0,20528.54,0.0,0.0,...,0.0,2665.39,0.0,0.0,1537.15,0.0,15740.43,0.0,0.0,0.0
DEN,265963.89,625.35,6897.32,1395.47,403.65,0.0,0.0,0.0,0.0,1322.44,...,138.06,4600.89,27647.07,18017.79,24601.88,54944.72,206634.47,5759.1,0.0,7279.98
SLC,246507.69,23839.72,46160.43,6485.57,150.0,0.0,2850.09,0.0,17788.39,0.0,...,731.25,110.0,17037.12,0.0,0.0,10419.1,249322.71,0.0,98400.88,1207.75


In [28]:
#rename axis to Branch
customer_summary = cust_dollar_summary.rename_axis("Branch")

customer_summary

Unnamed: 0_level_0,Amazon,Americold,Aramsco,BakerDist,BigLots,Bunzl,CompositesOne,Costco,DHL,Ferguson,...,Peloton,Petco,SamsClub,Sealy,TireCenters,US_Auto,Walmart,Wayfair,XPO,XPO_LM
Branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ABQ,0.0,0.0,3717.8,1046.47,0.0,6554.66,0.0,20528.54,0.0,0.0,...,0.0,2665.39,0.0,0.0,1537.15,0.0,15740.43,0.0,0.0,0.0
DEN,265963.89,625.35,6897.32,1395.47,403.65,0.0,0.0,0.0,0.0,1322.44,...,138.06,4600.89,27647.07,18017.79,24601.88,54944.72,206634.47,5759.1,0.0,7279.98
SLC,246507.69,23839.72,46160.43,6485.57,150.0,0.0,2850.09,0.0,17788.39,0.0,...,731.25,110.0,17037.12,0.0,0.0,10419.1,249322.71,0.0,98400.88,1207.75


In [29]:
#create summary csv
customer_summary.to_csv("CleanData/CustDollars.csv")