In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
src_file = Path.cwd() / 'data' / 'raw' / 'customer_master.xlsx'

In [3]:
# Read in the right sheet
#df = pd.read_excel(src_file, sheet_name = 'customers')

# version 2
df = pd.read_excel(src_file, sheet_name = 'customers', dtype={'zip_code': 'str'})

In [4]:
# Check the data
df.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040


In [5]:
# Zip Code is not the right format
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   company_name  50 non-null     object
 1   channel       50 non-null     object
 2   zip_code      50 non-null     object
 3   city          50 non-null     object
 4   state         50 non-null     object
 5   account_num   50 non-null     object
 6   total_sales   50 non-null     int64 
dtypes: int64(1), object(6)
memory usage: 2.9+ KB


In [6]:
# Useful to look at numbers
df.describe()

Unnamed: 0,total_sales
count,50.0
mean,2529873.0
std,2482702.0
min,746216.0
25%,1115702.0
50%,1328859.0
75%,1705738.0
max,9121596.0


In [7]:
# Bring in everything
df.describe(include='object')

Unnamed: 0,company_name,channel,zip_code,city,state,account_num
count,50,50,50,50,50,50
unique,50,3,50,48,31,50
top,Universal Technology Vision,retail,22910,Dawson,VA,AH5590
freq,1,38,1,2,4,1


In [8]:
df['channel'].value_counts()

channel
retail      38
reseller    10
partner      2
Name: count, dtype: int64

In [9]:
df['total_sales'].sum()

126493662

In [10]:
last_year_sales = df['total_sales'].sum()


In [11]:
print(f"{last_year_sales:,.0f}")

126,493,662


In [12]:
commission_target = 1_000_000
effective_rate = commission_target / last_year_sales
print(f"{effective_rate:,.2%}")

0.79%


In [13]:
print(f"{effective_rate:,.2%}")

0.79%


In [14]:
# How much would we commission per customer then?

In [15]:
df['commission'] = effective_rate * df['total_sales']

In [16]:
df.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,9944.466625
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,9159.06759
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488,13150.761656
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560,9435.729673
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040,7573.818204


In [17]:
df['commission'] = df['commission'].round(0)

In [18]:
df

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,9944.0
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,9159.0
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488,13151.0
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560,9436.0
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040,7574.0
5,Internet Hill Systems,retail,74360,Picher,OK,KK6153,970886,7675.0
6,Pacific Hill Application,retail,49862,Munising,MI,MS1866,1271136,10049.0
7,Net Electronic,retail,42631,Marshes Siding,KY,WA1826,1101414,8707.0
8,Software Bell Technology,retail,45342,Miamisburg,OH,XJ1430,942044,7447.0
9,Innovation Net,retail,20390,Washington,DC,NS1312,1010872,7991.0


In [19]:
df['commission'].sum()

1000004.0

In [20]:
df['commission'].mean()

20000.08

In [21]:
df['commission'].describe().round()

count       50.0
mean     20000.0
std      19627.0
min       5899.0
25%       8820.0
50%      10506.0
75%      13485.0
max      72111.0
Name: commission, dtype: float64