In [1]:
#import dependencies
import pandas as pd

In [2]:
#create csv path
stats = "InvoiceStats.csv"

# Read the CSV into a Pandas DataFrame
inv_stats = pd.read_csv(stats)

inv_stats.head()

Unnamed: 0,Billing Group,Invoice Status,Servicing Dealer,Dealer Invoice,Requested Amount,Approved Amount,Invoice Create Date,Invoice Issue?
0,KN,Cancel,60791,V30098805,1050.15,0.0,02-26-2021,No
1,ML,Cancel,60791,V30099281,1835.3,0.0,03-18-2021,No
2,KN,Cancel,60791,V30102014,2685.0,0.0,06-03-2021,No
3,HD,Cancel,60792,V20091751,1152.3,0.0,06-09-2021,No
4,XPO,Cancel,60791,V30102750,6176.3,0.0,06-17-2021,No


## Invoice Stats per Branch - Total Inv Count, Count of Errors Vs. No Errors


In [3]:
#Reorganize columns and drop unnecessary columns

inv_stats = inv_stats[["Servicing Dealer", "Invoice Issue?", "Requested Amount"]]

inv_stats.head()

Unnamed: 0,Servicing Dealer,Invoice Issue?,Requested Amount
0,60791,No,1050.15
1,60791,No,1835.3
2,60791,No,2685.0
3,60792,No,1152.3
4,60791,No,6176.3


In [4]:
#rename columns

inv_stats = inv_stats.rename(columns={"Servicing Dealer":"Branch", "Invoice Issue?":"Inv_Issue", "Requested Amount":"Inv_Amt"})


inv_stats.dtypes

Branch         int64
Inv_Issue     object
Inv_Amt      float64
dtype: object

In [5]:
#rename branch number to abbreviation
inv_stats = inv_stats.replace({60789:"DEN", 60790:"DEN", 60791:"SLC", 60792:"ABQ"})

inv_stats.head()


Unnamed: 0,Branch,Inv_Issue,Inv_Amt
0,SLC,No,1050.15
1,SLC,No,1835.3
2,SLC,No,2685.0
3,ABQ,No,1152.3
4,SLC,No,6176.3


In [6]:
#create new dataframe with summary of total number of invoices, invoices with and without errors and percentage per branch

#branch total invoice count
branch_total = inv_stats["Branch"].value_counts()

#branch $$
branch_amt = inv_stats.groupby(["Branch"]).sum()["Inv_Amt"]

#branch inv count with errors
#filter main DF to only show "Yes" in Inv_Issue column
stats = inv_stats[inv_stats.Inv_Issue == "Yes"]
#count of invoices with issues per branch
issue = stats.groupby(["Branch"]).count()["Inv_Issue"]

#subtract inv issues from total to calculate no issues
no_issue = branch_total - issue

#calculate percentage with error
percent = issue/branch_total *100



In [7]:
#new DF
stat_summary = pd.DataFrame({"Total_Inv": branch_total, "Issue": issue, "No_Issue":no_issue, "Issue_Percent":percent, "Dollar_Amt":branch_amt})

stat_summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent,Dollar_Amt
ABQ,185,101,84,54.594595,64563.55
DEN,3072,939,2133,30.566406,1050875.96
SLC,2147,1167,980,54.354914,882622.0


In [8]:
#caculate totals for each column

#total invoices
summary_total = stat_summary["Total_Inv"].sum()

#total invoices with issues
issue_total = stat_summary["Issue"].sum()

#total invoices without issues
no_issue_total = stat_summary["No_Issue"].sum()

#percentage with error
percent_total = issue_total/summary_total *100

#total $$ submitted
total_dollars = stat_summary["Dollar_Amt"].sum()


#create new DF to merge with stat_summary
total_summary = pd.DataFrame({"Total_Inv": [summary_total], "Issue":[issue_total], "No_Issue":[no_issue_total], 
                              "Issue_Percent":[percent_total], "Dollar_Amt":[total_dollars]})

total_summary


Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent,Dollar_Amt
0,5404,2207,3197,40.840118,1998061.51


In [9]:
#merge both summary DFs
summary = stat_summary.append(total_summary)

summary = summary.round({"Issue_Percent":2})

summary

Unnamed: 0,Total_Inv,Issue,No_Issue,Issue_Percent,Dollar_Amt
ABQ,185,101,84,54.59,64563.55
DEN,3072,939,2133,30.57,1050875.96
SLC,2147,1167,980,54.35,882622.0
0,5404,2207,3197,40.84,1998061.51


In [10]:
#rename index 0 to Total
clean_summary=summary.rename({0: "Total"})

#rename axis to Branch
branch_stat_summary = clean_summary.rename_axis("Branch")

branch_stat_summary

Unnamed: 0_level_0,Total_Inv,Issue,No_Issue,Issue_Percent,Dollar_Amt
Branch,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABQ,185,101,84,54.59,64563.55
DEN,3072,939,2133,30.57,1050875.96
SLC,2147,1167,980,54.35,882622.0
Total,5404,2207,3197,40.84,1998061.51


In [11]:
#create summary csv
branch_stat_summary.to_csv("CleanData/StatSummary.csv")

## Invoice Financial Stats per Branch and Month

In [12]:
#create csv path
stats = "InvoiceStats.csv"

# Read the CSV into a Pandas DataFrame
financial_stats = pd.read_csv(stats)
financial_stats.head()

Unnamed: 0,Billing Group,Invoice Status,Servicing Dealer,Dealer Invoice,Requested Amount,Approved Amount,Invoice Create Date,Invoice Issue?
0,KN,Cancel,60791,V30098805,1050.15,0.0,02-26-2021,No
1,ML,Cancel,60791,V30099281,1835.3,0.0,03-18-2021,No
2,KN,Cancel,60791,V30102014,2685.0,0.0,06-03-2021,No
3,HD,Cancel,60792,V20091751,1152.3,0.0,06-09-2021,No
4,XPO,Cancel,60791,V30102750,6176.3,0.0,06-17-2021,No


In [13]:
#Reorganize columns and drop unnecessary columns

financial_stats = financial_stats[["Servicing Dealer","Requested Amount", "Invoice Create Date"]]

financial_stats.head()

Unnamed: 0,Servicing Dealer,Requested Amount,Invoice Create Date
0,60791,1050.15,02-26-2021
1,60791,1835.3,03-18-2021
2,60791,2685.0,06-03-2021
3,60792,1152.3,06-09-2021
4,60791,6176.3,06-17-2021


In [15]:
#rename columns 
financial_stats = financial_stats.rename(columns={"Servicing Dealer":"Branch", "Requested Amount":"Inv_Amt", "Invoice Create Date":"Inv_Date"})


financial_stats.head()

Unnamed: 0,Branch,Inv_Amt,Inv_Date
0,60791,1050.15,02-26-2021
1,60791,1835.3,03-18-2021
2,60791,2685.0,06-03-2021
3,60792,1152.3,06-09-2021
4,60791,6176.3,06-17-2021


In [16]:
#Change branch dealer code to Abbreviation
financial_stats = financial_stats.replace({60789:"DEN", 60790:"DEN", 60791:"SLC", 60792:"ABQ"})

financial_stats.head()

Unnamed: 0,Branch,Inv_Amt,Inv_Date
0,SLC,1050.15,02-26-2021
1,SLC,1835.3,03-18-2021
2,SLC,2685.0,06-03-2021
3,ABQ,1152.3,06-09-2021
4,SLC,6176.3,06-17-2021


In [17]:
#update Date column to datetime

financial_stats["Inv_Date"] = pd.to_datetime(financial_stats["Inv_Date"])

financial_stats.head()

Unnamed: 0,Branch,Inv_Amt,Inv_Date
0,SLC,1050.15,2021-02-26
1,SLC,1835.3,2021-03-18
2,SLC,2685.0,2021-06-03
3,ABQ,1152.3,2021-06-09
4,SLC,6176.3,2021-06-17


In [18]:
#create new column for month 
financial_stats["Month"] = financial_stats["Inv_Date"].dt.month
financial_stats.head()

Unnamed: 0,Branch,Inv_Amt,Inv_Date,Month
0,SLC,1050.15,2021-02-26,2
1,SLC,1835.3,2021-03-18,3
2,SLC,2685.0,2021-06-03,6
3,ABQ,1152.3,2021-06-09,6
4,SLC,6176.3,2021-06-17,6


In [19]:
#replace Month number with name

financial_stats["Month"] = financial_stats["Month"].replace({1:"January", 2:"February", 3:"March", 4:"April", 5:"May", 6:"June", 
                                                         7:"July", 8:"August", 9:"September", 10:"October", 11:"November", 
                                                         12:"December"})

financial_stats.head()

Unnamed: 0,Branch,Inv_Amt,Inv_Date,Month
0,SLC,1050.15,2021-02-26,February
1,SLC,1835.3,2021-03-18,March
2,SLC,2685.0,2021-06-03,June
3,ABQ,1152.3,2021-06-09,June
4,SLC,6176.3,2021-06-17,June


In [20]:
#create new DF without unnecessary column
branch_financial = financial_stats[["Branch", "Inv_Amt", "Month"]]

branch_financial.head()

Unnamed: 0,Branch,Inv_Amt,Month
0,SLC,1050.15,February
1,SLC,1835.3,March
2,SLC,2685.0,June
3,ABQ,1152.3,June
4,SLC,6176.3,June


In [34]:
#branch $$
#ABQ
abq_monthly = branch_financial[branch_financial.Branch == "ABQ"]

abq_monthly_dollars = abq_monthly.groupby(["Month"]).sum("Inv_Amt")
abq_monthly_dollars = abq_monthly_dollars["Inv_Amt"]

abq_dollars = pd.DataFrame({"January":[abq_monthly_dollars.January], "February":[abq_monthly_dollars.February], "March":[abq_monthly_dollars.March],
                           "April":[abq_monthly_dollars.April], "May":[abq_monthly_dollars.May], "June":[abq_monthly_dollars.June],
                           "July":[abq_monthly_dollars.July], "August":[abq_monthly_dollars.August], "September":[abq_monthly_dollars.September],
                           "October":[abq_monthly_dollars.October], "November":[abq_monthly_dollars.November], "December":[abq_monthly_dollars.December]})

abq_dollars = abq_dollars.rename({0: "ABQ"})



#DEN
den_monthly = branch_financial[branch_financial.Branch == "DEN"]

den_monthly_dollars = den_monthly.groupby(["Month"]).sum("Inv_Amt")
den_monthly_dollars = den_monthly_dollars["Inv_Amt"]

den_dollars = pd.DataFrame({"January":[den_monthly_dollars.January], "February":[den_monthly_dollars.February], "March":[den_monthly_dollars.March],
                           "April":[den_monthly_dollars.April], "May":[den_monthly_dollars.May], "June":[den_monthly_dollars.June],
                           "July":[den_monthly_dollars.July], "August":[den_monthly_dollars.August], "September":[den_monthly_dollars.September],
                           "October":[den_monthly_dollars.October], "November":[den_monthly_dollars.November], "December":[den_monthly_dollars.December]})

den_dollars = den_dollars.rename({0: "DEN"})



#SLC
slc_monthly = branch_financial[branch_financial.Branch == "SLC"]

slc_monthly_dollars = slc_monthly.groupby(["Month"]).sum("Inv_Amt")
slc_monthly_dollars = slc_monthly_dollars["Inv_Amt"]

slc_dollars = pd.DataFrame({"January":[slc_monthly_dollars.January], "February":[slc_monthly_dollars.February], "March":[slc_monthly_dollars.March],
                           "April":[slc_monthly_dollars.April], "May":[slc_monthly_dollars.May], "June":[slc_monthly_dollars.June],
                           "July":[slc_monthly_dollars.July], "August":[slc_monthly_dollars.August], "September":[slc_monthly_dollars.September],
                           "October":[slc_monthly_dollars.October], "November":[slc_monthly_dollars.November], "December":[slc_monthly_dollars.December]})

slc_dollars = slc_dollars.rename({0: "SLC"})



Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December
ABQ,4254.4,6598.89,1677.89,6300.75,2202.53,3762.18,6953.55,4699.63,5114.05,13331.84,6775.89,2891.95


In [35]:
#Create new DF recombining branches
monthly = [abq_dollars, den_dollars, slc_dollars]

monthly_summary = pd.concat(monthly)

monthly_summary

Unnamed: 0,January,February,March,April,May,June,July,August,September,October,November,December
ABQ,4254.4,6598.89,1677.89,6300.75,2202.53,3762.18,6953.55,4699.63,5114.05,13331.84,6775.89,2891.95
DEN,94960.72,70850.77,83968.0,102253.32,60787.47,65215.26,108339.64,94014.83,106473.01,121543.66,79749.09,62720.19
SLC,83072.58,46567.95,76024.56,89303.13,39745.15,69909.7,51973.22,76697.73,100196.56,92508.79,100287.15,56335.48
