In [1]:
#import dependencies
import pandas as pd

In [2]:
#create csv path
stats = "InvoiceStats.csv"

# Read the CSV into a Pandas DataFrame
inv_stats = pd.read_csv(stats)

inv_stats.head()

Unnamed: 0,Billing Group,Invoice Status,Servicing Dealer,Dealer Invoice,Requested Amount,Approved Amount,Invoice Create Date,Invoice Issue?
0,KN,Cancel,60791,V30098805,1050.15,0.0,02-26-2021,No
1,ML,Cancel,60791,V30099281,1835.3,0.0,03-18-2021,No
2,KN,Cancel,60791,V30102014,2685.0,0.0,06-03-2021,No
3,HD,Cancel,60792,V20091751,1152.3,0.0,06-09-2021,No
4,XPO,Cancel,60791,V30102750,6176.3,0.0,06-17-2021,No


In [3]:
#Reorganize columns and drop unnecessary columns

inv_stats = inv_stats[["Servicing Dealer", "Invoice Issue?", "Requested Amount"]]

inv_stats.head()

Unnamed: 0,Servicing Dealer,Invoice Issue?,Requested Amount
0,60791,No,1050.15
1,60791,No,1835.3
2,60791,No,2685.0
3,60792,No,1152.3
4,60791,No,6176.3


In [4]:
#rename columns

inv_stats = inv_stats.rename(columns={"Servicing Dealer":"Branch", "Invoice Issue?":"Inv_Issue", "Requested Amount":"Inv_Amt"})


inv_stats.dtypes

Branch         int64
Inv_Issue     object
Inv_Amt      float64
dtype: object

In [10]:
#rename branch number to abbreviation
inv_stats = inv_stats.replace({60789:"DEN", 60790:"DEN", 60791:"SLC", 60792:"ABQ"})

inv_stats.head()


Unnamed: 0,Branch,Inv_Issue,Inv_Amt
0,SLC,No,1050.15
1,SLC,No,1835.3
2,SLC,No,2685.0
3,ABQ,No,1152.3
4,SLC,No,6176.3


In [19]:
#create new dataframe with summary of total number of invoices, invoices with and without errors and percentage per branch

#branch total invoice count
branch_total = inv_stats["Branch"].value_counts()

#branch $$
branch_amt = inv_stats.groupby(["Branch"]).sum()["Inv_Amt"]

#branch inv count with errors
#filter main DF to only show "Yes" in Inv_Issue column
stats = inv_stats[inv_stats.Inv_Issue == "Yes"]
#count of invoices with issues per branch
issue = stats.groupby(["Branch"]).count()["Inv_Issue"]

#subtract inv issues from total to calculate no issues
no_issue = branch_total - issue

#calculate percentage with error
percent = issue/branch_total *100



Branch
ABQ      64563.55
DEN    1050875.96
SLC     882622.00
Name: Inv_Amt, dtype: float64

In [20]:
#new DF
stat_summary = pd.DataFrame({"Total_Inv": branch_total, "Issue": issue, "No_Issue":no_issue, "Issue_Percent":percent, "Dollar_Amt":branch_amt})

stat_summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent,Branch_Amt
ABQ,185,101,84,54.594595,64563.55
DEN,3072,939,2133,30.566406,1050875.96
SLC,2147,1167,980,54.354914,882622.0


In [21]:
#caculate totals for each column

#total invoices
summary_total = stat_summary["Total_Inv"].sum()

#total invoices with issues
issue_total = stat_summary["Issue"].sum()

#total invoices without issues
no_issue_total = stat_summary["No_Issue"].sum()

#percentage with error
percent_total = issue_total/summary_total *100

#total $$ submitted


#create new DF to merge with stat_summary
total_summary = pd.DataFrame({"Total_Inv": [summary_total], "Issue":[issue_total], "No_Issue":[no_issue_total], "Issue_Percent":[percent_total]})

total_summary


Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent
0,5404,2207,3197,40.840118


In [15]:
#merge both summary DFs
summary = stat_summary.append(total_summary)

summary = summary.round({"Issue_Percent":2})

summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent
ABQ,185,101,84,54.59
DEN,3072,939,2133,30.57
SLC,2147,1167,980,54.35
0,5404,2207,3197,40.84


In [16]:
#rename index 0 to Total
clean_summary=summary.rename({0: "Total"})

#rename axis to Branch
branch_stat_summary = clean_summary.rename_axis("Branch")

branch_stat_summary

Unnamed: 0_level_0,Total_Inv,Issue,No_Issue,Issue_Percent
Branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABQ,185,101,84,54.59
DEN,3072,939,2133,30.57
SLC,2147,1167,980,54.35
Total,5404,2207,3197,40.84


In [17]:
#create summary csv
branch_stat_summary.to_csv("CleanData/StatSummary.csv")