In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
src_file = Path.cwd() / 'data' / 'raw' / 'customer_master.xlsx'

In [3]:
# https://talkpython.fm/us-census-bureau-regions-and-divisions.csv
# version 2
url = 'https://talkpython.fm/us-census-bureau-regions-and-divisions.csv'
df_customers = pd.read_excel(src_file, sheet_name = 'customers', dtype={'zip_code': 'str'})
df_sales_rep = pd.read_excel(src_file, sheet_name = 'sales' )

# Only need two columns - Abbreviation and Region
states = pd.read_csv(url, usecols=[1,2])

output_file = Path.cwd() / 'data' / 'processed' / 'customer_rep_data.xlsx'

In [4]:
df_customers.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912
1,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564
2,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488
3,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560
4,Resource Innovation Future,retail,97013,Canby,OR,DK1362,958040


In [5]:
df_sales_rep

Unnamed: 0,first_name,last_name,region,tenure
0,Shannon,Muniz,NorthEast,5.6
1,Leonard,Malcolm,West,3.8
2,Mona,Sutton,Midwest,5.4
3,Mickey,Tyner,South,0.7


In [6]:
df_customers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   company_name  50 non-null     object
 1   channel       50 non-null     object
 2   zip_code      50 non-null     object
 3   city          50 non-null     object
 4   state         50 non-null     object
 5   account_num   50 non-null     object
 6   total_sales   50 non-null     int64 
dtypes: int64(1), object(6)
memory usage: 2.9+ KB


In [7]:
df_sales_rep.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   first_name  4 non-null      object 
 1   last_name   4 non-null      object 
 2   region      4 non-null      object 
 3   tenure      4 non-null      float64
dtypes: float64(1), object(3)
memory usage: 260.0+ bytes


In [8]:
states.head()

Unnamed: 0,State Code,Region
0,AK,West
1,AL,South
2,AR,South
3,AZ,West
4,CA,West


In [9]:
states['Region'].value_counts()

Region
South        17
West         13
Midwest      12
Northeast     9
Name: count, dtype: int64

In [10]:
df_sales_rep['region'].value_counts()

region
NorthEast    1
West         1
Midwest      1
South        1
Name: count, dtype: int64

In [11]:
# Clean up region names
states['Region'] = states['Region'].str.upper()
df_sales_rep['region'] = df_sales_rep['region'].str.upper()

In [12]:
df_sales_rep['region'].value_counts()

region
NORTHEAST    1
WEST         1
MIDWEST      1
SOUTH        1
Name: count, dtype: int64

In [13]:
states['Region'].value_counts()

Region
SOUTH        17
WEST         13
MIDWEST      12
NORTHEAST     9
Name: count, dtype: int64

In [14]:
customer_region = pd.merge(df_customers, states, left_on='state', right_on='State Code')
customer_region.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,State Code,Region
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,VA,SOUTH
1,Contract Electronics Industries,retail,24153,Salem,VA,GG0303,1035050,VA,SOUTH
2,Star Interactive,retail,22153,Springfield,VA,UM2244,1541486,VA,SOUTH
3,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450,VA,SOUTH
4,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,KS,MIDWEST


In [15]:
customer_region = customer_region.drop(columns=['State Code'])

In [16]:
customer_region.shape

(50, 8)

In [17]:
customer_region.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,Region
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,SOUTH
1,Contract Electronics Industries,retail,24153,Salem,VA,GG0303,1035050,SOUTH
2,Star Interactive,retail,22153,Springfield,VA,UM2244,1541486,SOUTH
3,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450,SOUTH
4,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,MIDWEST


In [18]:
# Try it once and get it wrong
#customer_rep = pd.merge(customer_region, df_sales_rep, left_on='Region', right_on='region')
customer_rep = pd.merge(customer_region, df_sales_rep, left_on='Region', right_on='region', how='left')
# We don't need two region columns
customer_rep = customer_rep.drop(columns=['region'])

In [19]:
customer_rep

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,Region,first_name,last_name,tenure
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,SOUTH,Mickey,Tyner,0.7
1,Contract Electronics Industries,retail,24153,Salem,VA,GG0303,1035050,SOUTH,Mickey,Tyner,0.7
2,Star Interactive,retail,22153,Springfield,VA,UM2244,1541486,SOUTH,Mickey,Tyner,0.7
3,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450,SOUTH,Mickey,Tyner,0.7
4,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,MIDWEST,Mona,Sutton,5.4
5,Hardware Adventure Universal,retail,67118,Norwich,KS,GA3939,1163380,MIDWEST,Mona,Sutton,5.4
6,Solutions Universal,reseller,66212,Shawnee Mission,KS,SA4443,6796068,MIDWEST,Mona,Sutton,5.4
7,Studio Pacific Galaxy,retail,79698,Abilene,TX,YR6861,1663488,SOUTH,Mickey,Tyner,0.7
8,Virtual Vision Data,retail,77501,Pasadena,TX,YA6348,1440886,SOUTH,Mickey,Tyner,0.7
9,Galaxy Building,retail,85275,Mesa,AZ,AS3124,1193560,WEST,Leonard,Malcolm,3.8


In [20]:
customer_rep.shape

(50, 11)

In [21]:
commission_rate = 0.0079
customer_rep['commission'] = commission_rate * customer_rep['total_sales']
customer_rep['commission'] = customer_rep['commission'].round()

In [22]:
customer_rep.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,Region,first_name,last_name,tenure,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,SOUTH,Mickey,Tyner,0.7,9938.0
1,Contract Electronics Industries,retail,24153,Salem,VA,GG0303,1035050,SOUTH,Mickey,Tyner,0.7,8177.0
2,Star Interactive,retail,22153,Springfield,VA,UM2244,1541486,SOUTH,Mickey,Tyner,0.7,12178.0
3,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450,SOUTH,Mickey,Tyner,0.7,10266.0
4,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,MIDWEST,Mona,Sutton,5.4,9153.0


In [23]:
customer_rep['commission'].sum()

999304.0

In [24]:
# Clear issue that commissions are not aligned
customer_rep.groupby(['Region']).agg({'commission': 'sum'})

Unnamed: 0_level_0,commission
Region,Unnamed: 1_level_1
MIDWEST,441929.0
NORTHEAST,101517.0
SOUTH,353870.0
WEST,101988.0


In [25]:
customer_rep.groupby(['Region']).agg({'commission': ['sum', 'mean']})

Unnamed: 0_level_0,commission,commission
Unnamed: 0_level_1,sum,mean
Region,Unnamed: 1_level_2,Unnamed: 2_level_2
MIDWEST,441929.0,29461.933333
NORTHEAST,101517.0,16919.5
SOUTH,353870.0,14744.583333
WEST,101988.0,20397.6


In [26]:
customer_rep.groupby(['Region', 'channel']).agg({'commission': ['sum', 'mean']}).round(0)

Unnamed: 0_level_0,Unnamed: 1_level_0,commission,commission
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean
Region,channel,Unnamed: 2_level_2,Unnamed: 3_level_2
MIDWEST,reseller,356021.0,59337.0
MIDWEST,retail,85908.0,9545.0
NORTHEAST,reseller,49158.0,49158.0
NORTHEAST,retail,52359.0,10472.0
SOUTH,partner,55321.0,27660.0
SOUTH,reseller,107176.0,53588.0
SOUTH,retail,191373.0,9569.0
WEST,reseller,62042.0,62042.0
WEST,retail,39946.0,9986.0


In [27]:
customer_rep.groupby(['channel']).agg({'commission': ['sum', 'mean']}).round(0)

Unnamed: 0_level_0,commission,commission
Unnamed: 0_level_1,sum,mean
channel,Unnamed: 1_level_2,Unnamed: 2_level_2
partner,55321.0,27660.0
reseller,574397.0,57440.0
retail,369586.0,9726.0


In [28]:
customer_rep.groupby(['channel']).agg({'commission': ['sum', 'mean'],
                                      'company_name': ['count']}).round(0)

Unnamed: 0_level_0,commission,commission,company_name
Unnamed: 0_level_1,sum,mean,count
channel,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
partner,55321.0,27660.0,2
reseller,574397.0,57440.0,10
retail,369586.0,9726.0,38


In [29]:
pd.pivot_table(data=customer_rep,
               index=['Region'],
               columns=['channel'],
               aggfunc=['sum'],
               values=['commission'],
               fill_value=0,
               margins=True)

Unnamed: 0_level_0,sum,sum,sum,sum
Unnamed: 0_level_1,commission,commission,commission,commission
channel,partner,reseller,retail,All
Region,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3
MIDWEST,0,356021,85908,441929.0
NORTHEAST,0,49158,52359,101517.0
SOUTH,55321,107176,191373,353870.0
WEST,0,62042,39946,101988.0
All,55321,574397,369586,999304.0


In [30]:
customer_rep.head()

Unnamed: 0,company_name,channel,zip_code,city,state,account_num,total_sales,Region,first_name,last_name,tenure,commission
0,Universal Technology Vision,retail,22910,Charlottesville,VA,AH5590,1257912,SOUTH,Mickey,Tyner,0.7,9938.0
1,Contract Electronics Industries,retail,24153,Salem,VA,GG0303,1035050,SOUTH,Mickey,Tyner,0.7,8177.0
2,Star Interactive,retail,22153,Springfield,VA,UM2244,1541486,SOUTH,Mickey,Tyner,0.7,12178.0
3,Vision People Solutions,retail,24557,Gretna,VA,WL5283,1299450,SOUTH,Mickey,Tyner,0.7,10266.0
4,East Design Hill,retail,66546,Wakarusa,KS,OL0453,1158564,MIDWEST,Mona,Sutton,5.4,9153.0


In [31]:
# Save the file

In [32]:
# Save in the reports folder
customer_rep.to_excel(output_file, index=False)