In [10]:
%config InlineBackend.figure_format = 'retina'
%load_ext watermark
%watermark

Last updated: 2024-07-16T17:57:07.246067-04:00

Python implementation: CPython
Python version       : 3.12.2
IPython version      : 8.22.2

Compiler    : MSC v.1937 64 bit (AMD64)
OS          : Windows
Release     : 11
Machine     : AMD64
Processor   : Intel64 Family 6 Model 167 Stepping 1, GenuineIntel
CPU cores   : 16
Architecture: 64bit



In [11]:
# Libraries 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
import warnings

warnings.filterwarnings('ignore')

In [12]:
# Data 

CustomerData = pd.read_excel('DATA/CustomersData.xlsx')
Discount_Coupon = pd.read_csv('DATA/Discount_Coupon.csv')
Marketing_Spend = pd.read_csv('DATA/Marketing_Spend.csv')
Online_Sales = pd.read_csv('DATA/Online_Sales.csv')
Tax_amount = pd.read_excel('DATA/Tax_amount.xlsx')

In [13]:
CustomerData.head()

Unnamed: 0,CustomerID,Gender,Location,Tenure_Months
0,17850,M,Chicago,12
1,13047,M,California,43
2,12583,M,Chicago,33
3,13748,F,California,30
4,15100,M,California,49


In [14]:
Discount_Coupon.head()

Unnamed: 0,Month,Product_Category,Coupon_Code,Discount_pct
0,Jan,Apparel,SALE10,10
1,Feb,Apparel,SALE20,20
2,Mar,Apparel,SALE30,30
3,Jan,Nest-USA,ELEC10,10
4,Feb,Nest-USA,ELEC20,20


In [15]:
Marketing_Spend.head()

Unnamed: 0,Date,Offline_Spend,Online_Spend
0,1/1/2019,4500,2424.5
1,1/2/2019,4500,3480.36
2,1/3/2019,4500,1576.38
3,1/4/2019,4500,2928.55
4,1/5/2019,4500,4055.3


In [16]:
Online_Sales.head()

Unnamed: 0,CustomerID,Transaction_ID,Transaction_Date,Product_SKU,Product_Description,Product_Category,Quantity,Avg_Price,Delivery_Charges,Coupon_Status
0,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.5,Used
1,17850,16680,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.5,Used
2,17850,16681,1/1/2019,GGOEGFKQ020399,Google Laptop and Cell Phone Stickers,Office,1,2.05,6.5,Used
3,17850,16682,1/1/2019,GGOEGAAB010516,Google Men's 100% Cotton Short Sleeve Hero Tee...,Apparel,5,17.53,6.5,Not Used
4,17850,16682,1/1/2019,GGOEGBJL013999,Google Canvas Tote Natural/Navy,Bags,1,16.5,6.5,Used


In [17]:
Tax_amount.head()

Unnamed: 0,Product_Category,GST
0,Nest-USA,0.1
1,Office,0.1
2,Apparel,0.18
3,Bags,0.18
4,Drinkware,0.18


# Business Questions

### 1. Calculate Invoice amount or sale_amount or revenue for each transaction and item level 
$$ Invoice Value =[\frac{Quantity*Avg\_price}{(1-Dicount\_pct)}*(1+GST)]+Delivery\_Charges$$

In [18]:
# Including the GST information

data_tmp = pd.merge(Online_Sales, Tax_amount, how='left', on='Product_Category')
data_tmp.head()

Unnamed: 0,CustomerID,Transaction_ID,Transaction_Date,Product_SKU,Product_Description,Product_Category,Quantity,Avg_Price,Delivery_Charges,Coupon_Status,GST
0,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.5,Used,0.1
1,17850,16680,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.5,Used,0.1
2,17850,16681,1/1/2019,GGOEGFKQ020399,Google Laptop and Cell Phone Stickers,Office,1,2.05,6.5,Used,0.1
3,17850,16682,1/1/2019,GGOEGAAB010516,Google Men's 100% Cotton Short Sleeve Hero Tee...,Apparel,5,17.53,6.5,Not Used,0.18
4,17850,16682,1/1/2019,GGOEGBJL013999,Google Canvas Tote Natural/Navy,Bags,1,16.5,6.5,Used,0.18


In [19]:
# Including discount_pct information
## ajusting the month name
data_tmp['Transaction_Date2'] = pd.to_datetime(data_tmp['Transaction_Date'])
data_tmp['Month'] = data_tmp['Transaction_Date2'].dt.strftime('%b')

## Merging the tables
data_tmp = pd.merge(data_tmp, Discount_Coupon,how='left', on='Month')

In [20]:
# Creating a new column to ajust the discount
# I will apply the discount if, just if, the coupon_status == Used.

data_tmp['Discount_pct_ajus'] = data_tmp.apply(lambda x: x['Discount_pct'] if x['Coupon_Status'] == 'Used' else 0, axis=1)

In [21]:
data_tmp

Unnamed: 0,CustomerID,Transaction_ID,Transaction_Date,Product_SKU,Product_Description,Product_Category_x,Quantity,Avg_Price,Delivery_Charges,Coupon_Status,GST,Transaction_Date2,Month,Product_Category_y,Coupon_Code,Discount_pct,Discount_pct_ajus
0,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.50,Used,0.1,2019-01-01,Jan,Apparel,SALE10,10,10
1,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.50,Used,0.1,2019-01-01,Jan,Nest-USA,ELEC10,10,10
2,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.50,Used,0.1,2019-01-01,Jan,Office,OFF10,10,10
3,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.50,Used,0.1,2019-01-01,Jan,Drinkware,EXTRA10,10,10
4,17850,16679,1/1/2019,GGOENEBJ079499,Nest Learning Thermostat 3rd Gen-USA - Stainle...,Nest-USA,1,153.71,6.50,Used,0.1,2019-01-01,Jan,Lifestyle,EXTRA10,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
899703,14600,48497,12/31/2019,GGOENEBQ079099,Nest Protect Smoke + CO White Battery Alarm-USA,Nest-USA,4,80.52,19.99,Clicked,0.1,2019-12-31,Dec,Housewares,HOU30,30,0
899704,14600,48497,12/31/2019,GGOENEBQ079099,Nest Protect Smoke + CO White Battery Alarm-USA,Nest-USA,4,80.52,19.99,Clicked,0.1,2019-12-31,Dec,Accessories,ACC30,30,0
899705,14600,48497,12/31/2019,GGOENEBQ079099,Nest Protect Smoke + CO White Battery Alarm-USA,Nest-USA,4,80.52,19.99,Clicked,0.1,2019-12-31,Dec,Gift Cards,GC30,30,0
899706,14600,48497,12/31/2019,GGOENEBQ079099,Nest Protect Smoke + CO White Battery Alarm-USA,Nest-USA,4,80.52,19.99,Clicked,0.1,2019-12-31,Dec,Notebooks & Journals,NJ30,30,0


In [22]:
data_tmp['Invoice'] = (data_tmp['Quantity'] * data_tmp['Avg_Price'] / (1 - data_tmp['Discount_pct_ajus']/100)*(1+data_tmp['GST']) + data_tmp['Delivery_Charges'])

In [23]:
data_tmp['Invoice'].describe()

count    899708.000000
mean        117.113874
std         199.128769
min           4.861600
25%          22.182693
50%          51.469982
75%         169.625000
max       13803.511875
Name: Invoice, dtype: float64

In [29]:
print(f'The total invoice amount is ${data_tmp["Invoice"].sum():,.2f}')

The total invoice amount is $105,368,289.54


In [25]:
### The invoice transation for each product

data_tmp.groupby('Product_Description')['Invoice'].agg('sum')

Product_Description
1 oz Hand Sanitizer                                          129794.925497
20 oz Stainless Steel Insulated Tumbler                      217554.737513
22 oz Android Bottle                                          80810.048250
22 oz YouTube Bottle Infuser                                 115901.023958
23 oz Wide Mouth Sport Bottle                                104212.274114
                                                                 ...      
YouTube Women's Short Sleeve Tri-blend Badge Tee Charcoal     61233.319285
YouTube Women's Short Sleeve Tri-blend Badge Tee Grey          4813.694257
YouTube Womens 3/4 Sleeve Baseball Raglan White/Black          6221.531839
YouTube Wool Heather Cap Heather/Black                        22393.587167
YouTube Youth Short Sleeve Tee Red                           110459.015267
Name: Invoice, Length: 404, dtype: float64

In [26]:
data_tmp.groupby('Product_Category_x')['Invoice'].agg('sum')

Product_Category_x
Accessories             1.969958e+05
Android                 2.025391e+04
Apparel                 1.591476e+07
Backpacks               1.935527e+05
Bags                    3.707706e+06
Bottles                 1.967111e+05
Drinkware               5.197631e+06
Fun                     1.608967e+05
Gift Cards              3.708630e+05
Google                  2.426284e+05
Headgear                1.202264e+06
Housewares              1.278679e+05
Lifestyle               2.353122e+06
More Bags               7.133084e+04
Nest                    1.042330e+07
Nest-Canada             1.480478e+06
Nest-USA                5.365489e+07
Notebooks & Journals    2.306335e+06
Office                  7.317975e+06
Waze                    2.287333e+05
Name: Invoice, dtype: float64

## 2. Perform Detailed exploratory analysis  

### 2.1. Understanding how many customers acquired every month  