In [1]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import io

## Load the customer sales dataset

In [2]:
# Load the customer dataset
file_path = "data/Customer_sales.csv"
df_customer_sales = pd.read_csv(file_path,dtype={"BillPostalCode":"string"} )
df_customer_sales

Unnamed: 0,CustomerID,BillPostalCode,BillStateProvCode,BillCountryCode,LOB,Market,ChannelOrderID,OrderDate,Quantity,UnitPrice,Total,SKU,Title,Brand
0,10012,49649,MI,US,Salvage,eBay,4468548,2021-04-29,1,25.00,25.00,883562,Can-Am Commander 1000 XT 14 Fender Left Front ...,Can-Am
1,10012,49649,MI,US,Salvage,eBay,4468548,2021-04-29,1,45.00,45.00,983570,Can-Am Commander 800R XT 12 Fender Right Front...,Can-Am
2,10012,49649,MI,US,Salvage,eBay,4468639,2021-04-29,1,31.99,31.99,1219950,Can-Am Commander 1000 XT 14 Fender Flare Left ...,Can-Am
3,10012,49649,MI,US,Salvage,eBay,4471261,2021-04-30,1,19.99,19.99,1054150,Can-Am Commander 1000 XT 11 Lateral Panel Righ...,Can-Am
4,24012,68713,NE,US,Salvage,Website,4758999,2021-09-07,1,69.99,69.99,1302547,Polaris Sportsman 90 02 Footrest Left 0450616 ...,Polaris
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238353,375213012,68701,NE,US,Salvage,Website,5623912,2023-01-25,1,25.00,25.00,Custom,1 tire from rack,Other-Brand
238354,375214012,70094,LA,US,Salvage,eBay,5623899,2023-01-25,1,60.00,60.00,1412157,Honda Foreman 400 FW 98 Recoil Starter 28400-H...,Honda
238355,375215012,77990,TX,US,Salvage,eBay,5623884,2023-01-25,1,16.99,16.99,1485347,Suzuki Vinson 500 03 Battery Positive Cable 33...,Suzuki
238356,375216012,42437,KY,US,NOS-Rebuilt-Parts,Website,5623898,2023-01-25,1,24.94,24.94,PSN-3022897,Polaris General RZR ACE Ranger 900 1000 XP Tur...,Polaris


## Pivot and output brand totals by Customer

In [3]:
df_customer_sales['Brand'].unique()

array(['Can-Am', 'Polaris', 'Honda', 'Yamaha', 'Kawasaki', 'Other-Brand',
       'Suzuki', 'John Deere', 'Arctic Cat', 'Kubota', 'Bobcat'],
      dtype=object)

In [4]:

# Limit to the columns want to pivot
df_customer_sales_brand_data = df_customer_sales[['CustomerID', 'Brand', 'Total']]

# Group by customer and pivot column
df_group_by_brand = df_customer_sales_brand_data.groupby(['CustomerID','Brand']).sum()

# Unstack coluumns
df_group_by_brand_pivot = df_group_by_brand.unstack()

# Fill in nulls with zeros
df_group_by_brand_pivot.fillna(0, inplace=True)
df_group_by_brand_pivot.columns = df_group_by_brand_pivot.columns.to_flat_index()

column_names = {('Total', 'Arctic Cat') :'ArticCat_Tot', ('Total', 'Bobcat'): "Bobcat_Tot",
                ('Total', 'Can-Am'): 'Can-Am_Tot', ('Total', 'Honda') : 'Honda_Tot',
                ('Total', 'John Deere') : 'John-Deere_Tot', ('Total', 'Kawasaki') : 'Kawasaki_Tot',
                ('Total', 'Kubota') : 'Kubota_Tot', ('Total', 'Other-Brand') : 'Other-Brand_Tot',  ('Total', 'Polaris') : 'Polaris_Tot',
                ('Total', 'Suzuki') : 'Suzuki_Tot', ('Total', 'Yamaha') : 'Yamaha_Tot'}

df_group_by_brand_pivot = df_group_by_brand_pivot.rename(columns=column_names)

df_group_by_brand_pivot

Unnamed: 0_level_0,ArticCat_Tot,Bobcat_Tot,Can-Am_Tot,Honda_Tot,John-Deere_Tot,Kawasaki_Tot,Kubota_Tot,Other-Brand_Tot,Polaris_Tot,Suzuki_Tot,Yamaha_Tot
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
10012,0.0,0.0,121.98,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00
24012,0.0,0.0,590.97,139.99,0.00,0.00,0.0,0.00,402.68,0.00,18.93
36012,0.0,0.0,0.00,0.00,0.00,124.99,0.0,19.99,123.85,0.00,0.00
48012,0.0,0.0,499.93,1595.25,49.99,669.89,0.0,0.00,2629.71,44.99,1525.10
49012,0.0,0.0,0.00,0.00,0.00,655.94,0.0,0.00,1664.53,199.99,0.00
...,...,...,...,...,...,...,...,...,...,...,...
375213012,0.0,0.0,0.00,0.00,0.00,0.00,0.0,25.00,0.00,0.00,0.00
375214012,0.0,0.0,0.00,60.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00
375215012,0.0,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,16.99,0.00
375216012,0.0,0.0,0.00,0.00,0.00,0.00,0.0,0.00,24.94,0.00,0.00


In [5]:
# Output to CSV file
df_group_by_brand_pivot.to_csv('data/Customer_brands.csv')

## Pivot and output LOB totals by Customer

In [6]:
# Limit to the columns want to pivot
df_customer_sales_lob_data = df_customer_sales[['CustomerID', 'LOB', 'Total']]

# Group by customer and pivot column
df_group_by_lob = df_customer_sales_lob_data.groupby(['CustomerID','LOB']).sum()

# Unstack coluumns
df_group_by_lob_pivot = df_group_by_lob.unstack()

# Fill in nulls with zeros
df_group_by_lob_pivot.fillna(0, inplace=True)


df_group_by_lob_pivot.columns = df_group_by_lob_pivot.columns.to_flat_index()

df_group_by_lob_pivot

column_names = {('Total', 'Misc-LOB') :'LOB_Misc_Tot', ('Total', 'NOS-Rebuilt-Parts'): "LOB_NOS-Rebuilt-Part_Tot",
               ('Total', 'Rebuilt-Engines'): 'LOB_Rebuilt-Engines-Tot', ('Total', 'Salvage') : 'LOB_Salvage_Tot'}

df_group_by_lob_pivot = df_group_by_lob_pivot.rename(columns=column_names)

df_group_by_lob_pivot

Unnamed: 0_level_0,LOB_Misc_Tot,LOB_NOS-Rebuilt-Part_Tot,LOB_Rebuilt-Engines-Tot,LOB_Salvage_Tot
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10012,0.0,0.00,0.0,121.98
24012,0.0,0.00,0.0,1152.57
36012,0.0,14.94,0.0,253.89
48012,0.0,119.98,0.0,6894.88
49012,0.0,49.99,0.0,2470.47
...,...,...,...,...
375213012,0.0,0.00,0.0,25.00
375214012,0.0,0.00,0.0,60.00
375215012,0.0,0.00,0.0,16.99
375216012,0.0,24.94,0.0,0.00


In [7]:
# Output to CSV file
df_group_by_lob_pivot.to_csv('data/Customer_lob.csv')

## Pivot and output Market totals by Customer

In [8]:
# Limit to the columns want to pivot
df_customer_sales_market_data = df_customer_sales[['CustomerID', 'Market', 'Total']]

# Group by customer and pivot column
df_group_by_market = df_customer_sales_market_data.groupby(['CustomerID','Market']).sum()

# Unstack coluumns
df_group_by_market_pivot = df_group_by_market.unstack()

# Fill in nulls with zeros
df_group_by_market_pivot.fillna(0, inplace=True)


df_group_by_market_pivot.columns = df_group_by_market_pivot.columns.to_flat_index()

df_group_by_market_pivot.columns

column_names = {('Total', 'Counter') :'Mkt-Counter_Tot', ('Total', 'Other-Market'): "Mkt-Other_Tot",
                ('Total', 'Website'): 'Mkt-Website_Tot', ('Total', 'eBay') : 'Mkt-eBay_Tot'}

df_group_by_market_pivot = df_group_by_market_pivot.rename(columns=column_names)

df_group_by_market_pivot

Unnamed: 0_level_0,Mkt-Counter_Tot,Mkt-Other_Tot,Mkt-Website_Tot,Mkt-eBay_Tot
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
10012,0.0,0.0,0.00,121.98
24012,0.0,0.0,1152.57,0.00
36012,0.0,0.0,0.00,268.83
48012,0.0,0.0,0.00,7014.86
49012,0.0,0.0,1496.84,1023.62
...,...,...,...,...
375213012,0.0,0.0,25.00,0.00
375214012,0.0,0.0,0.00,60.00
375215012,0.0,0.0,0.00,16.99
375216012,0.0,0.0,24.94,0.00


In [9]:
# Output to CSV file
df_group_by_market_pivot.to_csv('data/Customer_market.csv')