In [1]:
#import dependencies
import pandas as pd

In [2]:
#create csv path
stats = "InvoiceStats.csv"

# Read the CSV into a Pandas DataFrame
inv_stats = pd.read_csv(stats)

inv_stats.head()

Unnamed: 0,Billing Group,Invoice Status,Servicing Dealer,Dealer Invoice,Requested Amount,Approved Amount,Invoice Create Date,Invoice Issue?
0,HD,Raymond Paid,60791,V30096632,85.0,85.0,01-05-2021,Yes
1,AZ,Raymond Paid,60791,V30096663,120.0,120.0,01-05-2021,No
2,HD,Raymond Paid,60791,V30096723,135.13,135.13,01-05-2021,No
3,HD,Raymond Paid,60791,V30096630,159.13,159.13,01-05-2021,No
4,HD,Raymond Paid,60791,V30096631,195.23,195.23,01-05-2021,No


In [3]:
#Reorganize columns and drop unnecessary columns

inv_stats = inv_stats[["Servicing Dealer", "Invoice Issue?"]]

inv_stats.head()

Unnamed: 0,Servicing Dealer,Invoice Issue?
0,60791,Yes
1,60791,No
2,60791,No
3,60791,No
4,60791,No


In [4]:
#rename columns

inv_stats = inv_stats.rename(columns={"Servicing Dealer":"Branch", "Invoice Issue?":"Inv_Issue"})


inv_stats.head()

Unnamed: 0,Branch,Inv_Issue
0,60791,Yes
1,60791,No
2,60791,No
3,60791,No
4,60791,No


In [5]:
#rename branch number to abbreviation
inv_stats = inv_stats.replace({60789:"DEN", 60790:"DEN", 60791:"SLC", 60792:"ABQ"})

inv_stats.head()

Unnamed: 0,Branch,Inv_Issue
0,SLC,Yes
1,SLC,No
2,SLC,No
3,SLC,No
4,SLC,No


In [6]:
#create new dataframe with summary of total number of invoices, invoices with and without errors and percentage per branch

#branch total invoices
branch_total = inv_stats["Branch"].value_counts()

#branch inv count with errors
#filter main DF to only show "Yes" in Inv_Issue column
stats = inv_stats[inv_stats.Inv_Issue == "Yes"]
#count of invoices with issues per branch
issue = stats.groupby(["Branch"]).count()["Inv_Issue"]

#subtract inv issues from total to calculate no issues
no_issue = branch_total - issue

#calculate percentage with error
percent = issue/branch_total *100

In [7]:
#new DF
stat_summary = pd.DataFrame({"Total_Inv": branch_total, "Issue": issue, "No_Issue":no_issue, "Issue_Percent":percent})

stat_summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent
ABQ,176,97,79,55.113636
DEN,2962,916,2046,30.925051
SLC,2097,1150,947,54.840248


In [8]:
#caculate totals for each column

#total invoices
summary_total = stat_summary["Total_Inv"].sum()

#total invoices with issues
issue_total = stat_summary["Issue"].sum()

#total invoices without issues
no_issue_total = stat_summary["No_Issue"].sum()

#percentage with error
percent_total = issue_total/summary_total *100

#create new DF to merge with stat_summary
total_summary = pd.DataFrame({"Total_Inv": [summary_total], "Issue":[issue_total], "No_Issue":[no_issue_total], "Issue_Percent":[percent_total]})

total_summary


Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent
0,5235,2163,3072,41.318052


In [9]:
#merge both summary DFs
summary = stat_summary.append(total_summary)

summary = summary.round({"Issue_Percent":2})

summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent
ABQ,176,97,79,55.11
DEN,2962,916,2046,30.93
SLC,2097,1150,947,54.84
0,5235,2163,3072,41.32


In [10]:
#rename index 0 to Total
clean_summary=summary.rename({0: "Total"})

#rename axis to Branch
branch_stat_summary = clean_summary.rename_axis("Branch")

branch_stat_summary

Unnamed: 0_level_0,Total_Inv,Issue,No_Issue,Issue_Percent
Branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ABQ,176,97,79,55.11
DEN,2962,916,2046,30.93
SLC,2097,1150,947,54.84
Total,5235,2163,3072,41.32


In [11]:
#create summary csv
branch_stat_summary.to_csv("CleanData/StatSummary.csv")