In [1]:
import snowflake.connector
import pandas as pd
import seaborn as sns
import plotly.express as px
import warnings
import numpy as np
import matplotlib.pyplot as plt
import os
usr = os.environ['svc_user']
pwd = os.environ['svc_pwd']

warnings.filterwarnings('ignore')

In [2]:
def sf_connect():
    con = snowflake.connector.connect(  
        account = 'chewy.us-east-1',
        user = usr,
        password=pwd,
        autocommit = False,
        database= 'EDLDB',
        warehouse = 'IT_WH') #EDLDB_DSE_DEV
    return con

In [3]:
def exec_qry_batch(qry,numofdf=1):
    con = sf_connect()
    cur = con.cursor()
    try:
        batches = []
        cur.execute(qry,timeout=1200)
        for df in cur.fetch_pandas_batches():
            if((len(batches)<=numofdf) and (df.shape[0]>0)):
                batches.append(df)
            else:
                break
        return batches   
    except Exception as e:
        print(e)
    finally:
        cur.close()

In [4]:
def exec_qry_all(qry):
    con = sf_connect()
    cur = con.cursor()
    try:
        cur.execute(qry,timeout=1200)
        return cur.fetch_pandas_all()   
    except Exception as e:
        print(e)
    finally:
        cur.close()

---

In [12]:
qry = """WITH CUSTOMERS AS (
  
  SELECT
  CUSTOMER_ID,
  CASE WHEN ORDERS_L3M > 0 THEN 1 ELSE 0 END AS ACTIVE_3M, 
  ORDERS_L3M,
  ORDERS_L6M,
  ORDERS_L9M,
  ORDERS_L12M,
  FIRST_ORDER_DATE,
  LAST_ORDER_DATE,
  AUTOSHIP_STATUS,
  REGISTRATION_DATE
  FROM CDM.CUSTOMER_AGGREGATE
  
  WHERE REGISTRATION_DATE BETWEEN '2021-01-01' AND '2021-09-01'
  
),

ORDERS AS (
  
  SELECT
  CA.CUSTOMER_ID,
  CA.REGISTRATION_DATE,
  CA.FIRST_ORDER_DATE,
  OL.ORDER_ID,
  OL.PRODUCT_ID,
  PD.CATEGORY_LEVEL1,
  PD.CATEGORY_LEVEL2,
  PD.CATEGORY_LEVEL3,
  PD.MERCH_CLASSIFICATION1,
  PD.MERCH_CLASSIFICATION2,
  CAST(OL.ORDER_PLACED_DTTM AS DATE) AS ORDER_PLACED_DATE,
  OL.ORDER_LINE_TOTAL_PRICE AS ORDER_PRICE,
  CASE WHEN OL.ORDER_AUTO_REORDER_FLAG = 'TRUE' then 'Autoship' else 'Non_Autoship' end as ORDER_TYPE,
  ROW_NUMBER() OVER (PARTITION BY CA.CUSTOMER_ID, 
                     CASE WHEN OL.ORDER_AUTO_REORDER_FLAG = 'TRUE' then 'Autoship' else 'Non_Autoship' END ORDER BY OL.ORDER_PLACED_DTTM ASC) AS AS_RANKING,
  DENSE_RANK() OVER (PARTITION BY CA.CUSTOMER_ID ORDER BY OL.ORDER_PLACED_DTTM ASC) AS ORDER_RANKING
  
  FROM CUSTOMERS CA
  
  LEFT JOIN ECOM.ORDER_LINE OL
  ON CA.CUSTOMER_ID = OL.CUSTOMER_ID 
  AND OL.ORDER_LINE_TOTAL_PRICE != 0
  AND OL.ORDER_LINE_SHIPPED_DTTM IS NOT NULL
  AND OL.ORDER_STATUS NOT IN ('X', 'P', 'J')
  
  LEFT JOIN PDM.PRODUCT PD
  ON OL.PRODUCT_ID = PD.PRODUCT_ID

),

FIRST_AS AS (

  SELECT DISTINCT
  CUSTOMER_ID,
  ORDER_ID AS FIRST_AS_ORDER_ID,
  CAST(ORDER_PLACED_DATE AS DATE) AS FIRST_AS_ORDER_DATE
  
  FROM ORDERS
  WHERE AS_RANKING = 1
  AND ORDER_TYPE = 'Autoship'
),

MC1_DATES AS (
  SELECT
  CUSTOMER_ID,
  
  MIN(CASE WHEN MERCH_CLASSIFICATION1 = 'Consumables' THEN ORDER_PLACED_DATE ELSE NULL END) AS FIRST_CONSUMABLES_DATE,
  MIN(CASE WHEN MERCH_CLASSIFICATION1 = 'Healthcare' THEN ORDER_PLACED_DATE ELSE NULL END) AS FIRST_HEALTHCARE_DATE,
  MIN(CASE WHEN MERCH_CLASSIFICATION1 = 'Hard Goods' THEN ORDER_PLACED_DATE ELSE NULL END) AS FIRST_HARDGOODS_DATE,
  MIN(CASE WHEN MERCH_CLASSIFICATION1 = 'Specialty' THEN ORDER_PLACED_DATE ELSE NULL END) AS FIRST_SPECIALITY_DATE,
  
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION1 = 'Consumables' THEN 1 ELSE 0 END) FIRST_ORDER_CONSUMABLES,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION1 = 'Healthcare' THEN 1 ELSE 0 END) FIRST_ORDER_HEALTHCARE,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION1 = 'Hard Goods' THEN 1 ELSE 0 END) FIRST_ORDER_HARDGOODS,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION1 = 'Specialty' THEN 1 ELSE 0 END) FIRST_ORDER_SPECIALTY,
  
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Apparel & People Products' THEN 1 ELSE 0 END) FIRST_ORDER_APPAREL,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Leashes, Collars & Harnesses' THEN 1 ELSE 0 END) FIRST_ORDER_LEASH,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Core Treats' THEN 1 ELSE 0 END) FIRST_ORDER_TREAT,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Toys' THEN 1 ELSE 0 END) FIRST_ORDER_TOY,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Beds & Furniture' THEN 1 ELSE 0 END) FIRST_ORDER_BED,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Litter' THEN 1 ELSE 0 END) FIRST_ORDER_LITTER,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Core Food' THEN 1 ELSE 0 END) FIRST_ORDER_FOOD,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Beauty & Grooming' THEN 1 ELSE 0 END) FIRST_ORDER_BEAUTY,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Containment & Gear' THEN 1 ELSE 0 END) FIRST_ORDER_CONTAINMENT,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Premium' THEN 1 ELSE 0 END) FIRST_ORDER_PREMIUM,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Bowls & Feeders' THEN 1 ELSE 0 END) FIRST_ORDER_BOWL,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Personalized' THEN 1 ELSE 0 END) FIRST_ORDER_PERSONALIZED,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Waste Management' THEN 1 ELSE 0 END) FIRST_ORDER_WASTE,
  MAX(CASE WHEN ORDER_RANKING = 1 AND MERCH_CLASSIFICATION2 = 'Perishable' THEN 1 ELSE 0 END) FIRST_ORDER_PERISHABLE,
  
  MAX(CASE WHEN MERCH_CLASSIFICATION1 = 'Consumables' THEN 1 ELSE 0 END) AS CURR_HAS_CONSUMABLES,
  MAX(CASE WHEN MERCH_CLASSIFICATION1 = 'Healthcare' THEN 1 ELSE 0 END) AS CURR_HAS_HEALTHCARE,
  MAX(CASE WHEN MERCH_CLASSIFICATION1 = 'Hard Goods' THEN 1 ELSE 0 END) AS CURR_HAS_HARDGOODS,
  MAX(CASE WHEN MERCH_CLASSIFICATION1 = 'Specialty' THEN 1 ELSE 0 END) AS CURR_HAS_SPECIALTY,
  
  COUNT(DISTINCT CASE WHEN ORDER_RANKING = 1 THEN MERCH_CLASSIFICATION1 ELSE NULL END) AS FIRST_ORDER_NUMBER_OF_MC1,
  COUNT(DISTINCT MERCH_CLASSIFICATION1) AS CURRENT_NUMBER_OF_MC1,
  
  SUM(CASE WHEN ORDER_RANKING = 1 THEN ORDER_PRICE ELSE 0 END) AS FIRST_ORDER_AMOUNT,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 0 AND 30 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_30D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 30 AND 60 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_60D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 60 AND 90 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_90D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 90 AND 120 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_120D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 120 AND 150 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_160D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 150 AND 180 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_180D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 180 AND 210 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_210D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 210 AND 240 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_240D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 240 AND 270 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_270D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 270 AND 300 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_300D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 300 AND 330 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_330D,
  SUM(CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,ORDER_PLACED_DATE) BETWEEN 330 AND 360 THEN ORDER_PRICE ELSE 0 END) AS ORDER_AMOUNT_360D
  
  FROM ORDERS
  
  GROUP BY 1
),

FO_CUST AS (
  SELECT
  CA.CUSTOMER_ID,
  ACTIVE_3M,
  ORDERS_L3M,
  ORDERS_L6M,
  ORDERS_L9M,
  ORDERS_L12M,
  FIRST_ORDER_DATE,
  LAST_ORDER_DATE,
  AUTOSHIP_STATUS,
  REGISTRATION_DATE,
  DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) AS DAYS_TO_LAST_ORDER,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 30 THEN 1 ELSE 0 END AS ACTIVE_30D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 60 THEN 1 ELSE 0 END AS ACTIVE_60D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 90 THEN 1 ELSE 0 END AS ACTIVE_90D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 120 THEN 1 ELSE 0 END AS ACTIVE_120D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 150 THEN 1 ELSE 0 END AS ACTIVE_150D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 180 THEN 1 ELSE 0 END AS ACTIVE_180D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 210 THEN 1 ELSE 0 END AS ACTIVE_210D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 240 THEN 1 ELSE 0 END AS ACTIVE_240D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 270 THEN 1 ELSE 0 END AS ACTIVE_270D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 300 THEN 1 ELSE 0 END AS ACTIVE_300D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 330 THEN 1 ELSE 0 END AS ACTIVE_330D,
  CASE WHEN DATEDIFF('day',FIRST_ORDER_DATE,LAST_ORDER_DATE) > 360 THEN 1 ELSE 0 END AS ACTIVE_360D,
  DATEDIFF('day',LAST_ORDER_DATE,CURRENT_DATE) AS DAYS_SINCE_LAST_ORDER,
  FIRST_AS_ORDER_ID,
  FIRST_AS_ORDER_DATE,
  FIRST_CONSUMABLES_DATE,
  FIRST_HEALTHCARE_DATE,
  FIRST_HARDGOODS_DATE,
  FIRST_SPECIALITY_DATE,
  FIRST_ORDER_CONSUMABLES,
  FIRST_ORDER_HEALTHCARE,
  FIRST_ORDER_HARDGOODS,
  FIRST_ORDER_SPECIALTY,
  CURR_HAS_CONSUMABLES,
  CURR_HAS_HEALTHCARE,
  CURR_HAS_HARDGOODS,
  CURR_HAS_SPECIALTY,
  FIRST_ORDER_NUMBER_OF_MC1,
  CURRENT_NUMBER_OF_MC1,
  FIRST_ORDER_APPAREL,
  FIRST_ORDER_LEASH,
  FIRST_ORDER_TREAT,
  FIRST_ORDER_TOY,
  FIRST_ORDER_BED,
  FIRST_ORDER_LITTER,
  FIRST_ORDER_FOOD,
  FIRST_ORDER_BEAUTY,
  FIRST_ORDER_CONTAINMENT,
  FIRST_ORDER_PREMIUM,
  FIRST_ORDER_BOWL,
  FIRST_ORDER_PERSONALIZED,
  FIRST_ORDER_WASTE,
  FIRST_ORDER_PERISHABLE,
  CONCAT(CASE WHEN FIRST_ORDER_APPAREL = 1 THEN 'APPAREL-' ELSE '' END,
         CASE WHEN FIRST_ORDER_LEASH = 1 THEN 'LEASH-' ELSE '' END,
         CASE WHEN FIRST_ORDER_TREAT = 1 THEN 'TREAT-' ELSE '' END,
         CASE WHEN FIRST_ORDER_TOY = 1 THEN 'TOY-' ELSE '' END,
         CASE WHEN FIRST_ORDER_BED = 1 THEN 'BED-' ELSE '' END,
         CASE WHEN FIRST_ORDER_FOOD = 1 THEN 'FOOD-' ELSE '' END,
         CASE WHEN FIRST_ORDER_BEAUTY = 1 THEN 'BEAUTY-' ELSE '' END,
         CASE WHEN FIRST_ORDER_CONTAINMENT = 1 THEN 'CONTAINMENT-' ELSE '' END,
         CASE WHEN FIRST_ORDER_PREMIUM = 1 THEN 'PREMIUM-' ELSE '' END,
         CASE WHEN FIRST_ORDER_BOWL = 1 THEN 'BOWL-' ELSE '' END,
         CASE WHEN FIRST_ORDER_PERSONALIZED = 1 THEN 'PERSONALIZED-' ELSE '' END,
         CASE WHEN FIRST_ORDER_WASTE = 1 THEN 'WASTE-' ELSE '' END,
         CASE WHEN FIRST_ORDER_LITTER = 1 THEN 'LITTER-' ELSE '' END,
         CASE WHEN FIRST_ORDER_PERISHABLE = 1 THEN 'PERISHABLE-' ELSE '' END
        ) AS FIRST_ORDER_MC2,
  FIRST_ORDER_AMOUNT,
  ORDER_AMOUNT_30D,
  ORDER_AMOUNT_60D,
  ORDER_AMOUNT_90D,
  ORDER_AMOUNT_120D,
  ORDER_AMOUNT_160D,
  ORDER_AMOUNT_180D,
  ORDER_AMOUNT_210D,
  ORDER_AMOUNT_240D,
  ORDER_AMOUNT_270D,
  ORDER_AMOUNT_300D,
  ORDER_AMOUNT_330D,
  ORDER_AMOUNT_360D

  FROM CUSTOMERS CA

  LEFT JOIN FIRST_AS FA
  ON CA.CUSTOMER_ID = FA.CUSTOMER_ID

  LEFT JOIN MC1_DATES MD
  ON CA.CUSTOMER_ID = MD.CUSTOMER_ID
  
  WHERE FIRST_ORDER_CONSUMABLES = 1 AND FIRST_ORDER_NUMBER_OF_MC1 = 1
)

SELECT 
*
FROM FO_CUST
QUALIFY ROW_NUMBER() OVER (ORDER BY RANDOM()) <= 100000 """

In [13]:
df = exec_qry_all(qry)
df.head()

Unnamed: 0,CUSTOMER_ID,ACTIVE_3M,ORDERS_L3M,ORDERS_L6M,ORDERS_L9M,ORDERS_L12M,FIRST_ORDER_DATE,LAST_ORDER_DATE,AUTOSHIP_STATUS,REGISTRATION_DATE,...,ORDER_AMOUNT_90D,ORDER_AMOUNT_120D,ORDER_AMOUNT_160D,ORDER_AMOUNT_180D,ORDER_AMOUNT_210D,ORDER_AMOUNT_240D,ORDER_AMOUNT_270D,ORDER_AMOUNT_300D,ORDER_AMOUNT_330D,ORDER_AMOUNT_360D
0,148304322,1,2.0,4.0,4.0,4.0,2021-10-13,2022-01-05,True,2021-05-28,...,4.15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,150430078,0,0.0,0.0,2.0,2.0,2021-06-12,2021-06-14,False,2021-06-12,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,147365697,0,0.0,1.0,2.0,2.0,2021-08-02,2021-09-21,False,2021-05-20,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,162935059,0,0.0,1.0,1.0,1.0,2021-08-31,2021-08-31,True,2021-08-30,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,136419913,0,0.0,0.0,0.0,1.0,2021-02-27,2021-02-27,False,2021-02-27,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


---
What does the attrition, monthly spend and % of customers look like at MC2 level?

In [370]:
food = df[(df['FIRST_ORDER_FOOD'] == 1)&(df['FIRST_ORDER_TREAT'] == 0)&(df['FIRST_ORDER_LITTER'] == 0)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food = df[(df['FIRST_ORDER_FOOD'] == 1)&(df['FIRST_ORDER_TREAT'] == 1)&(df['FIRST_ORDER_LITTER'] == 0)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
treat = df[(df['FIRST_ORDER_FOOD'] == 0)&(df['FIRST_ORDER_TREAT'] == 1)&(df['FIRST_ORDER_LITTER'] == 0)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
litter = df[(df['FIRST_ORDER_FOOD'] == 0)&(df['FIRST_ORDER_TREAT'] == 0)&(df['FIRST_ORDER_LITTER'] == 1)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
food_litter = df[(df['FIRST_ORDER_FOOD'] == 1)&(df['FIRST_ORDER_TREAT'] == 0)&(df['FIRST_ORDER_LITTER'] == 1)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
premium = df[(df['FIRST_ORDER_FOOD'] == 0)&(df['FIRST_ORDER_TREAT'] == 0)&(df['FIRST_ORDER_LITTER'] == 0)&(df['FIRST_ORDER_PREMIUM'] == 1)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
food_premium = df[(df['FIRST_ORDER_FOOD'] == 1)&(df['FIRST_ORDER_TREAT'] == 0)&(df['FIRST_ORDER_LITTER'] == 0)&(df['FIRST_ORDER_PREMIUM'] == 1)&(df['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food_litter = df[(df['FIRST_ORDER_FOOD'] == 1)&(df['FIRST_ORDER_TREAT'] == 1)&(df['FIRST_ORDER_LITTER'] == 1)&(df['FIRST_ORDER_PREMIUM'] == 0)&(df['FIRST_ORDER_PERISHABLE'] == 0)]

col = ['% of customers','% Attrition', '0-30D spend','30-60D spend','60-90D spend','90-120D spend']
mc2_name = ['CORE FOODS','CORE TREAT-FOODS','CORE TREAT','LITTER','CORE FOOD-LITTER','PREMIUM','CORE FOODS-PREMIUM','CORE-TREAT-FOODS-LITTER']

In [371]:
attr = df['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0]
thirty = df['ORDER_AMOUNT_30D'].mean().round(2)
sixty = df['ORDER_AMOUNT_60D'].mean().round(2)
ninety = df['ORDER_AMOUNT_90D'].mean().round(2)
onetwenty = df['ORDER_AMOUNT_120D'].mean().round(2)

pd.DataFrame(np.array([['N/A',attr,thirty,sixty,ninety,onetwenty]]),columns = col, index = ['First Order Consumables'])

Unnamed: 0,% of customers,% Attrition,0-30D spend,30-60D spend,60-90D spend,90-120D spend
First Order Consumables,,57.21,82.43,29.98,27.98,26.01


In [385]:
attrition1, first_spend1,second_spend1,third_spend1,fourth_spend1, per_cus1 = [],[],[],[],[],[]
attrition1.append(food['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(treat_food['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(treat['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(litter['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(food_litter['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(premium['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(food_premium['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition1.append(treat_food_litter['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])

first_spend1.append(food['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(treat_food['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(treat['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(litter['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(food_litter['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(premium['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(food_premium['ORDER_AMOUNT_30D'].mean().round(2))
first_spend1.append(treat_food_litter['ORDER_AMOUNT_30D'].mean().round(2))

second_spend1.append(food['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(treat_food['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(treat['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(litter['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(food_litter['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(premium['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(food_premium['ORDER_AMOUNT_60D'].mean().round(2))
second_spend1.append(treat_food_litter['ORDER_AMOUNT_60D'].mean().round(2))

third_spend1.append(food['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(treat_food['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(treat['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(litter['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(food_litter['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(premium['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(food_premium['ORDER_AMOUNT_90D'].mean().round(2))
third_spend1.append(treat_food_litter['ORDER_AMOUNT_90D'].mean().round(2))

fourth_spend1.append(food['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(treat_food['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(treat['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(litter['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(food_litter['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(premium['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(food_premium['ORDER_AMOUNT_120D'].mean().round(2))
fourth_spend1.append(treat_food_litter['ORDER_AMOUNT_120D'].mean().round(2))

per_cus1.append(np.round(len(food['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(treat_food['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(treat['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(litter['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(food_litter['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(premium['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(food_premium['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))
per_cus1.append(np.round(len(treat_food_litter['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))

pd.DataFrame(list(zip(per_cus1,attrition1,first_spend1,second_spend1,third_spend1,fourth_spend1)),columns = col, index = mc2_name)


Unnamed: 0,% of customers,% Attrition,0-30D spend,30-60D spend,60-90D spend,90-120D spend
CORE FOODS,52.4,55.96,81.82,30.83,29.25,26.88
CORE TREAT-FOODS,14.04,52.99,91.14,34.48,31.54,29.72
CORE TREAT,9.17,66.85,59.42,18.82,17.0,16.57
LITTER,7.61,67.52,57.02,14.79,14.0,13.14
CORE FOOD-LITTER,6.3,50.65,93.83,34.09,31.24,28.96
PREMIUM,4.49,60.32,100.23,36.96,34.46,31.35
CORE FOODS-PREMIUM,1.08,55.14,127.6,45.89,40.49,37.68
CORE-TREAT-FOODS-LITTER,1.99,51.59,101.7,37.17,34.57,31.93


In [329]:
# print(mc2)
# attrition, first_spend,second_spend,third_spend,fourth_spend, per_cus = [],[],[],[],[],[]
# for x in mc2:
#     #print(x)
#     attrition.append(df[df[x] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
#     first_spend.append(df[df[x] == 1]['ORDER_AMOUNT_30D'].mean().round(2))
#     second_spend.append(df[df[x] == 1]['ORDER_AMOUNT_60D'].mean().round(2))
#     third_spend.append(df[df[x] == 1]['ORDER_AMOUNT_90D'].mean().round(2))
#     fourth_spend.append(df[df[x] == 1]['ORDER_AMOUNT_120D'].mean().round(2))
#     per_cus.append(np.round(len(df[df[x] == 1]['CUSTOMER_ID'])/len(df['CUSTOMER_ID'])*100,2))

# pd.DataFrame(list(zip(per_cus,attrition,first_spend,second_spend,third_spend,fourth_spend)),columns = col, index = mc2_names)   

---

Are they existing AS users? If yes, then how do they interact with the website vs non-AS users?

In [386]:
col_AS = ['% AS','% Attrition AS', '% Attrition Non-AS']
autoship = df[df['AUTOSHIP_STATUS'] == True]
non_autoship = df[df['AUTOSHIP_STATUS'] == False]

AShip = df['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1]
AS_attr = autoship['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0]
NON_AS_attr = non_autoship['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0]

pd.DataFrame(np.array([[AShip,AS_attr,NON_AS_attr]]),columns = col_AS, index = ['First Order Consumables']) 

Unnamed: 0,% AS,% Attrition AS,% Attrition Non-AS
First Order Consumables,28.5,13.6,74.6


In [390]:
autoship_per, attrition_as, non_attrition_as = [], [], []

autoship_per.append(food['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(treat_food['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(treat['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(litter['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(food_litter['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(premium['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(food_premium['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])
autoship_per.append(treat_food_litter['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(1)[1])

attrition_as.append(food[food['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(treat_food[treat_food['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(treat[treat['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(litter[litter['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(food_litter[food_litter['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(premium[premium['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(food_premium[food_premium['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
attrition_as.append(treat_food_litter[treat_food_litter['AUTOSHIP_STATUS'] == True]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])

non_attrition_as.append(food[food['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(treat_food[treat_food['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(treat[treat['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(litter[litter['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(food_litter[food_litter['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(premium[premium['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(food_premium[food_premium['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])
non_attrition_as.append(treat_food_litter[treat_food_litter['AUTOSHIP_STATUS'] == False]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(1)[0])

pd.DataFrame(list(zip(autoship_per,attrition_as,non_attrition_as)),columns = col_AS, index = mc2_name) 

Unnamed: 0,% AS,% Attrition AS,% Attrition Non-AS
CORE FOODS,30.9,13.4,74.9
CORE TREAT-FOODS,31.2,12.5,71.3
CORE TREAT,16.8,19.5,76.4
LITTER,19.2,14.8,80.1
CORE FOOD-LITTER,31.8,12.9,68.3
PREMIUM,26.9,14.1,77.3
CORE FOODS-PREMIUM,27.8,11.3,72.0
CORE-TREAT-FOODS-LITTER,31.0,14.9,68.1


In [339]:
# autoship_per, attrition_as, non_attrition_as = [], [], []
# for x in mc2:
#     autoship_per.append(df[df[x] == 1]['AUTOSHIP_STATUS'].value_counts(normalize = True).mul(100).round(2)[1])
#     attrition_as.append(autoship[autoship[x] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
#     non_attrition_as.append(non_autoship[non_autoship[x] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])

# pd.DataFrame(list(zip(autoship_per,attrition_as,non_attrition_as)),columns = col_AS, index = mc2_names) 

---

In [166]:
qry2 = """WITH FIRST_WEB_VISIT_AFTER_FO AS (
  SELECT
  CUSTOMER_ID,
  COUNT(DISTINCT UNIQUE_VISIT_ID) AS FIRST_VISIT_SESSIONS_AFTER_FO,
  COUNT(CASE WHEN EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END) AS FIRST_VISIT_IMPRESSION_AFTER_FO,
  COUNT(CASE WHEN EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END) AS FIRST_VISIT_CLICK_AFTER_FO,
  MIN(DAYS_SINCE_FIRST_ORDER) AS DAYS_TO_FIRST_SESSION_AFTER_FO,
  COUNT(CASE WHEN lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END) AS FIRST_VISIT_PDP,
  COUNT(CASE WHEN EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END) AS FIRST_VISIT_PLP,
  COUNT(CASE WHEN PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END) AS FIRST_VISIT_HP,
  MIN(CASE WHEN lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN HIT_NUMBER ELSE NULL END) AS MIN_PDP_HIT_NUMBER,
  MIN(CASE WHEN EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN HIT_NUMBER ELSE NULL END) AS MIN_PLP_HIT_NUMBER,
  MIN(CASE WHEN PAGE_PATH = '/' THEN HIT_NUMBER ELSE NULL END) AS MIN_HP_HIT_NUMBER,
  MAX(CASE WHEN TOTALS:transactionRevenue > 0 THEN 1 ELSE 0 END) AS FIRST_VISIT_PURCHASED,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Email' THEN 1 ELSE 0 END) AS FIRST_EMAIL,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Direct' THEN 1 ELSE 0 END) AS FIRST_DIRECT,
  MAX(CASE WHEN CHANNEL_GROUPING = 'PLA' THEN 1 ELSE 0 END) AS FIRST_PLA,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Organic Search' THEN 1 ELSE 0 END) AS FIRST_OS,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Affiliates' THEN 1 ELSE 0 END) AS FIRST_AFF,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Chewy Paid Search' THEN 1 ELSE 0 END) AS FIRST_CPS,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Display' THEN 1 ELSE 0 END) AS FIRST_DIS,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Referral' THEN 1 ELSE 0 END) AS FIRST_REF,
  MAX(CASE WHEN CHANNEL_GROUPING = 'Social' THEN 1 ELSE 0 END) AS FIRST_SOC
 
  FROM (
    SELECT
    *,
    DENSE_RANK() OVER (PARTITION BY CUSTOMER_ID ORDER BY GA_SESSIONS_DATE ASC) AS RANK,
    DATEDIFF('day',FIRST_ORDER_DATE, GA_SESSIONS_DATE) AS DAYS_SINCE_FIRST_ORDER
 
    FROM ECOM_SANDBOX.MC1_SESSIONS_SV
    WHERE GA_SESSIONS_DATE > FIRST_ORDER_DATE
    )
  WHERE RANK = 1
  GROUP BY 1
),
 
SESSIONS AS (
  SELECT
  CUSTOMER_ID,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_BEFORE_FO,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_30D,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_60D,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_90D,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_120D,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_150D,
  COUNT(DISTINCT CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) THEN UNIQUE_VISIT_ID ELSE NULL END ) AS SESSION_AFTER_FO_180D,
 
  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS IMPRESSION_AFTER_FO_180D,
 
  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND EVENT_ACTION like '%click%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS CLICK_AFTER_FO_180D,
 
  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) = 'detail' AND lower(EVENT_CATEGORY) = 'eec' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PDP_AFTER_FO_180D,
 
  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND EVENT_ACTION = 'impression' AND lower(EVENT_LABEL) in ('search-results','browse','brand-page','deals') THEN UNIQUE_VISIT_ID ELSE NULL END ) AS PLP_AFTER_FO_180D,
 
  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND PAGE_PATH = '/' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS HP_AFTER_FO_180D,

  COUNT(CASE WHEN GA_SESSIONS_DATE <= FIRST_ORDER_DATE AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_BEFORE_FO,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN FIRST_ORDER_DATE AND DATEADD('day',30,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_30D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',30,FIRST_ORDER_DATE) AND DATEADD('day',60,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_60D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',60,FIRST_ORDER_DATE) AND DATEADD('day',90,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_90D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',90,FIRST_ORDER_DATE) AND DATEADD('day',120,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_120D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',120,FIRST_ORDER_DATE) AND DATEADD('day',150,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_150D,
  COUNT(CASE WHEN GA_SESSIONS_DATE BETWEEN DATEADD('day',150,FIRST_ORDER_DATE) AND DATEADD('day',180,FIRST_ORDER_DATE) AND lower(EVENT_ACTION) like '%add%to%cart%' THEN UNIQUE_VISIT_ID ELSE NULL END ) AS ATC_AFTER_FO_180D
 
  FROM ECOM_SANDBOX.MC1_SESSIONS_SV
  GROUP BY 1
)
 
SELECT
A.CUSTOMER_ID,
A.ACTIVE_3M,
A.FIRST_ORDER_MC2,
CURRENT_NUMBER_OF_MC1,
FIRST_ORDER_APPAREL,
FIRST_ORDER_LEASH,
FIRST_ORDER_TREAT,
FIRST_ORDER_TOY,
FIRST_ORDER_BED,
FIRST_ORDER_LITTER,
FIRST_ORDER_FOOD,
FIRST_ORDER_BEAUTY,
FIRST_ORDER_CONTAINMENT,
FIRST_ORDER_PREMIUM,
FIRST_ORDER_BOWL,
FIRST_ORDER_PERSONALIZED,
FIRST_ORDER_WASTE,
FIRST_ORDER_PERISHABLE,
FIRST_VISIT_SESSIONS_AFTER_FO,
FIRST_VISIT_IMPRESSION_AFTER_FO,
FIRST_VISIT_CLICK_AFTER_FO,
DAYS_TO_FIRST_SESSION_AFTER_FO,
FIRST_VISIT_PDP,
FIRST_VISIT_PLP,
FIRST_VISIT_HP,
MIN_PDP_HIT_NUMBER,
MIN_PLP_HIT_NUMBER,
MIN_HP_HIT_NUMBER,
FIRST_VISIT_PURCHASED,
FIRST_EMAIL,
FIRST_DIRECT,
FIRST_PLA,
FIRST_OS,
FIRST_AFF,
FIRST_CPS,
FIRST_DIS,
FIRST_REF,
FIRST_SOC,
COALESCE(SESSION_BEFORE_FO,0) AS SESSION_BEFORE_FO,
COALESCE(SESSION_AFTER_FO_30D,0) AS SESSION_AFTER_FO_30D,
COALESCE(SESSION_AFTER_FO_60D,0) AS SESSION_AFTER_FO_60D,
COALESCE(SESSION_AFTER_FO_90D,0) AS SESSION_AFTER_FO_90D,
COALESCE(SESSION_AFTER_FO_120D,0) AS SESSION_AFTER_FO_120D,
COALESCE(SESSION_AFTER_FO_150D,0) AS SESSION_AFTER_FO_150D,
COALESCE(SESSION_AFTER_FO_180D,0) AS SESSION_AFTER_FO_180D,
COALESCE(IMPRESSION_BEFORE_FO,0) AS IMPRESSION_BEFORE_FO,
COALESCE(IMPRESSION_AFTER_FO_30D,0) AS IMPRESSION_AFTER_FO_30D,
COALESCE(IMPRESSION_AFTER_FO_60D,0) AS IMPRESSION_AFTER_FO_60D,
COALESCE(IMPRESSION_AFTER_FO_90D,0) AS IMPRESSION_AFTER_FO_90D,
COALESCE(IMPRESSION_AFTER_FO_120D,0) AS IMPRESSION_AFTER_FO_120D,
COALESCE(IMPRESSION_AFTER_FO_150D,0) AS IMPRESSION_AFTER_FO_150D,
COALESCE(IMPRESSION_AFTER_FO_180D,0) AS IMPRESSION_AFTER_FO_180D,
COALESCE(CLICK_BEFORE_FO,0) AS CLICK_BEFORE_FO,
COALESCE(CLICK_AFTER_FO_30D,0) AS CLICK_AFTER_FO_30D,
COALESCE(CLICK_AFTER_FO_60D,0) AS CLICK_AFTER_FO_60D,
COALESCE(CLICK_AFTER_FO_90D,0) AS CLICK_AFTER_FO_90D,
COALESCE(CLICK_AFTER_FO_120D,0) AS CLICK_AFTER_FO_120D,
COALESCE(CLICK_AFTER_FO_150D,0) AS CLICK_AFTER_FO_150D,
COALESCE(CLICK_AFTER_FO_180D,0) AS CLICK_AFTER_FO_180D,
COALESCE(PDP_BEFORE_FO,0) AS PDP_BEFORE_FO,
COALESCE(PDP_AFTER_FO_30D,0) AS PDP_AFTER_FO_30D,
COALESCE(PDP_AFTER_FO_60D,0) AS PDP_AFTER_FO_60D,
COALESCE(PDP_AFTER_FO_90D,0) AS PDP_AFTER_FO_90D,
COALESCE(PDP_AFTER_FO_120D,0) AS PDP_AFTER_FO_120D,
COALESCE(PDP_AFTER_FO_150D,0) AS PDP_AFTER_FO_150D,
COALESCE(PDP_AFTER_FO_180D,0) AS PDP_AFTER_FO_180D,
COALESCE(PLP_BEFORE_FO,0) AS PLP_BEFORE_FO,
COALESCE(PLP_AFTER_FO_30D,0) AS PLP_AFTER_FO_30D,
COALESCE(PLP_AFTER_FO_60D,0) AS PLP_AFTER_FO_60D,
COALESCE(PLP_AFTER_FO_90D,0) AS PLP_AFTER_FO_90D,
COALESCE(PLP_AFTER_FO_120D,0) AS PLP_AFTER_FO_120D,
COALESCE(PLP_AFTER_FO_150D,0) AS PLP_AFTER_FO_150D,
COALESCE(PLP_AFTER_FO_180D,0) AS PLP_AFTER_FO_180D,
COALESCE(HP_BEFORE_FO,0) AS HP_BEFORE_FO,
COALESCE(HP_AFTER_FO_30D,0) AS HP_AFTER_FO_30D,
COALESCE(HP_AFTER_FO_60D,0) AS HP_AFTER_FO_60D,
COALESCE(HP_AFTER_FO_90D,0) AS HP_AFTER_FO_90D,
COALESCE(HP_AFTER_FO_120D,0) AS HP_AFTER_FO_120D,
COALESCE(HP_AFTER_FO_150D,0) AS HP_AFTER_FO_150D,
COALESCE(HP_AFTER_FO_180D,0) AS HP_AFTER_FO_180D,
COALESCE(ATC_BEFORE_FO,0) AS ATC_BEFORE_FO,
COALESCE(ATC_AFTER_FO_30D,0) AS ATC_AFTER_FO_30D,
COALESCE(ATC_AFTER_FO_60D,0) AS ATC_AFTER_FO_60D,
COALESCE(ATC_AFTER_FO_90D,0) AS ATC_AFTER_FO_90D,
COALESCE(ATC_AFTER_FO_120D,0) AS ATC_AFTER_FO_120D,
COALESCE(ATC_AFTER_FO_150D,0) AS ATC_AFTER_FO_150D,
COALESCE(ATC_AFTER_FO_180D,0) AS ATC_AFTER_FO_180D
 
FROM ECOM_SANDBOX.MC1_ANALYSIS_SV A
LEFT JOIN FIRST_WEB_VISIT_AFTER_FO B
ON A.CUSTOMER_ID = B.CUSTOMER_ID
 
LEFT JOIN SESSIONS C
ON A.CUSTOMER_ID = C.CUSTOMER_ID"""

In [167]:
data_interaction = exec_qry_all(qry2)
data_interaction.head()

Unnamed: 0,CUSTOMER_ID,ACTIVE_3M,FIRST_ORDER_MC2,CURRENT_NUMBER_OF_MC1,FIRST_ORDER_APPAREL,FIRST_ORDER_LEASH,FIRST_ORDER_TREAT,FIRST_ORDER_TOY,FIRST_ORDER_BED,FIRST_ORDER_LITTER,...,HP_AFTER_FO_120D,HP_AFTER_FO_150D,HP_AFTER_FO_180D,ATC_BEFORE_FO,ATC_AFTER_FO_30D,ATC_AFTER_FO_60D,ATC_AFTER_FO_90D,ATC_AFTER_FO_120D,ATC_AFTER_FO_150D,ATC_AFTER_FO_180D
0,141557459,1,FOOD-,1,0,0,0,0,0,0,...,0,0,0,2,2,0,0,0,0,0
1,138749777,0,FOOD-LITTER-,1,0,0,0,0,0,1,...,0,0,0,3,3,0,0,0,0,0
2,153648897,0,TREAT-,1,0,0,1,0,0,0,...,0,0,0,1,1,0,11,0,0,0
3,146157029,1,TREAT-FOOD-,3,0,0,1,0,0,0,...,0,0,0,7,7,0,0,0,0,0
4,145169893,0,FOOD-,2,0,0,0,0,0,0,...,0,39,62,2,3,0,0,0,1,1


In [374]:
food_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 1)&(data_interaction['FIRST_ORDER_TREAT'] == 0)&(data_interaction['FIRST_ORDER_LITTER'] == 0)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 1)&(data_interaction['FIRST_ORDER_TREAT'] == 1)&(data_interaction['FIRST_ORDER_LITTER'] == 0)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
treat_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 0)&(data_interaction['FIRST_ORDER_TREAT'] == 1)&(data_interaction['FIRST_ORDER_LITTER'] == 0)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
litter_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 0)&(data_interaction['FIRST_ORDER_TREAT'] == 0)&(data_interaction['FIRST_ORDER_LITTER'] == 1)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
food_litter_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 1)&(data_interaction['FIRST_ORDER_TREAT'] == 0)&(data_interaction['FIRST_ORDER_LITTER'] == 1)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
premium_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 0)&(data_interaction['FIRST_ORDER_TREAT'] == 0)&(data_interaction['FIRST_ORDER_LITTER'] == 0)&(data_interaction['FIRST_ORDER_PREMIUM'] == 1)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
food_premium_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 1)&(data_interaction['FIRST_ORDER_TREAT'] == 0)&(data_interaction['FIRST_ORDER_LITTER'] == 0)&(data_interaction['FIRST_ORDER_PREMIUM'] == 1)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food_litter_int = data_interaction[(data_interaction['FIRST_ORDER_FOOD'] == 1)&(data_interaction['FIRST_ORDER_TREAT'] == 1)&(data_interaction['FIRST_ORDER_LITTER'] == 1)&(data_interaction['FIRST_ORDER_PREMIUM'] == 0)&(data_interaction['FIRST_ORDER_PERISHABLE'] == 0)]

col_interact = ['Days to first session after FO','First purchase rate after FO','First PDP hits post FO','First PLP hits post FO','First HP hits post FO','Min PDP hit number','Min PLP hit number','Min HP hit number']

---
Which pages do they interact with for customers that are active on site?

In [375]:
days_fs = np.round(data_interaction['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2)
first_purchase = data_interaction['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1]
first_time_pdp = np.round(data_interaction['FIRST_VISIT_PDP'].mean(),2)
first_time_plp = np.round(data_interaction['FIRST_VISIT_PLP'].mean(),2)
first_time_hp = np.round(data_interaction['FIRST_VISIT_HP'].mean(),2)
min_pdp = np.round(data_interaction['MIN_PDP_HIT_NUMBER'].mean(),2)
min_plp = np.round(data_interaction['MIN_PLP_HIT_NUMBER'].mean(),2)
min_hp = np.round(data_interaction['MIN_HP_HIT_NUMBER'].mean(),2)

pd.DataFrame(np.array([[days_fs,first_purchase,first_time_pdp,first_time_plp,first_time_hp,min_pdp,min_plp,min_hp]]),columns = col_interact, index = ['First Order Consumables']) 

Unnamed: 0,Days to first session after FO,First purchase rate after FO,First PDP hits post FO,First PLP hits post FO,First HP hits post FO,Min PDP hit number,Min PLP hit number,Min HP hit number
First Order Consumables,35.73,19.17,1.56,2.12,14.01,47.81,34.33,14.82


In [396]:
days, purchase, first_pdp, first_plp, first_hp, pdp_min, plp_min, hp_min = [],[],[],[],[],[],[],[]

days.append(np.round(food_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(treat_food_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(treat_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(litter_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(food_litter_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(premium_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(food_premium_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
days.append(np.round(treat_food_litter_int['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))

purchase.append(food_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(treat_food_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(treat_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(litter_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(food_litter_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(premium_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(food_premium_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
purchase.append(treat_food_litter_int['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])

first_pdp.append(np.round(food_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(treat_food_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(treat_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(litter_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(food_litter_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(premium_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(food_premium_int['FIRST_VISIT_PDP'].mean(),2))
first_pdp.append(np.round(treat_food_litter_int['FIRST_VISIT_PDP'].mean(),2))

first_plp.append(np.round(food_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(treat_food_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(treat_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(litter_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(food_litter_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(premium_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(food_premium_int['FIRST_VISIT_PLP'].mean(),2))
first_plp.append(np.round(treat_food_litter_int['FIRST_VISIT_PLP'].mean(),2))

first_hp.append(np.round(food_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(treat_food_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(treat_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(litter_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(food_litter_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(premium_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(food_premium_int['FIRST_VISIT_HP'].mean(),2))
first_hp.append(np.round(treat_food_litter_int['FIRST_VISIT_HP'].mean(),2))

pdp_min.append(np.round(food_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(treat_food_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(treat_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(litter_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(food_litter_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(premium_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(food_premium_int['MIN_PDP_HIT_NUMBER'].mean(),2))
pdp_min.append(np.round(treat_food_litter_int['MIN_PDP_HIT_NUMBER'].mean(),2))

plp_min.append(np.round(food_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(treat_food_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(treat_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(litter_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(food_litter_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(premium_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(food_premium_int['MIN_PLP_HIT_NUMBER'].mean(),2))
plp_min.append(np.round(treat_food_litter_int['MIN_PLP_HIT_NUMBER'].mean(),2))

hp_min.append(np.round(food_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(treat_food_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(treat_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(litter_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(food_litter_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(premium_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(food_premium_int['MIN_HP_HIT_NUMBER'].mean(),2))
hp_min.append(np.round(treat_food_litter_int['MIN_HP_HIT_NUMBER'].mean(),2))

pd.DataFrame(list(zip(days,purchase,first_pdp,first_plp,first_hp,pdp_min,plp_min,hp_min)),columns = col_interact, index = mc2_name) 

Unnamed: 0,Days to first session after FO,First purchase rate after FO,First PDP hits post FO,First PLP hits post FO,First HP hits post FO,Min PDP hit number,Min PLP hit number,Min HP hit number
CORE FOODS,35.61,18.49,1.35,1.86,13.64,47.93,33.52,13.11
CORE TREAT-FOODS,35.6,19.51,1.77,2.52,15.13,52.21,35.78,17.08
CORE TREAT,36.34,22.56,1.89,2.41,13.61,43.47,31.67,19.51
LITTER,38.57,19.3,1.58,1.94,11.32,38.41,31.84,18.56
CORE FOOD-LITTER,37.0,19.55,1.85,2.62,16.19,56.28,39.86,13.81
PREMIUM,33.4,18.16,1.53,1.92,13.38,43.15,31.92,11.64
CORE FOODS-PREMIUM,36.3,20.86,2.33,3.39,17.06,46.02,33.86,18.43
CORE-TREAT-FOODS-LITTER,34.46,17.73,1.79,2.56,15.83,54.63,44.3,20.99


In [377]:
# data_active = data_interaction
# days, purchase, first_pdp, first_plp, first_hp, pdp_min, plp_min, hp_min = [],[],[],[],[],[],[],[]
# for x in mc2:
#     days.append(np.round(data_active[data_active[x] == 1]['DAYS_TO_FIRST_SESSION_AFTER_FO'].mean(),2))
#     purchase.append(data_active[data_active[x] == 1]['FIRST_VISIT_PURCHASED'].value_counts(normalize = True).mul(100).round(2)[1])
#     first_pdp.append(np.round(data_active[data_active[x] == 1]['FIRST_VISIT_PDP'].mean(),2))
#     first_plp.append(np.round(data_active[data_active[x] == 1]['FIRST_VISIT_PLP'].mean(),2))
#     first_hp.append(np.round(data_active[data_active[x] == 1]['FIRST_VISIT_HP'].mean(),2))
#     pdp_min.append(np.round(data_active[data_active[x] == 1]['MIN_PDP_HIT_NUMBER'].mean(),2))
#     plp_min.append(np.round(data_active[data_active[x] == 1]['MIN_PLP_HIT_NUMBER'].mean(),2))
#     hp_min.append(np.round(data_active[data_active[x] == 1]['MIN_HP_HIT_NUMBER'].mean(),2))

# pd.DataFrame(list(zip(days,purchase,first_pdp,first_plp,first_hp,pdp_min,plp_min,hp_min)),columns = col_interact, index = mc2_names) 

---
Do the customers come from a specific traffic source?

In [378]:
sources = ['FIRST_EMAIL','FIRST_DIRECT','FIRST_PLA','FIRST_OS','FIRST_CPS','FIRST_AFF']
source_name = ['Email','Direct','PLA','Organic Search','Chewy Paid Search','Affiliates']

email = np.round(data_interaction['FIRST_EMAIL'].sum()/len(data_interaction)*100,2)
direct = np.round(data_interaction['FIRST_DIRECT'].sum()/len(data_interaction)*100,2)
pla = np.round(data_interaction['FIRST_PLA'].sum()/len(data_interaction)*100,2)
orgsearch = np.round(data_interaction['FIRST_OS'].sum()/len(data_interaction)*100,2)
cps = np.round(data_interaction['FIRST_CPS'].sum()/len(data_interaction)*100,2)
aff = np.round(data_interaction['FIRST_AFF'].sum()/len(data_interaction)*100,2)

pd.DataFrame(np.array([[email,direct,pla,orgsearch,cps,aff]]),columns = source_name, index = ['First Order Consumables'])

Unnamed: 0,Email,Direct,PLA,Organic Search,Chewy Paid Search,Affiliates
First Order Consumables,32.51,24.13,7.17,6.93,7.45,4.62


In [399]:
e,d,p,o,c,a = [],[],[],[],[],[]

e.append(np.round(food_int['FIRST_EMAIL'].sum()/len(food_int)*100,2))
e.append(np.round(treat_food_int['FIRST_EMAIL'].sum()/len(treat_food_int)*100,2))
e.append(np.round(treat_int['FIRST_EMAIL'].sum()/len(treat_int)*100,2))
e.append(np.round(litter_int['FIRST_EMAIL'].sum()/len(litter_int)*100,2))
e.append(np.round(food_litter_int['FIRST_EMAIL'].sum()/len(food_litter_int)*100,2))
e.append(np.round(premium_int['FIRST_EMAIL'].sum()/len(premium_int)*100,2))
e.append(np.round(food_premium_int['FIRST_EMAIL'].sum()/len(food_premium_int)*100,2))
e.append(np.round(treat_food_litter_int['FIRST_EMAIL'].sum()/len(treat_food_litter_int)*100,2))

d.append(np.round(food_int['FIRST_DIRECT'].sum()/len(food_int)*100,2))
d.append(np.round(treat_food_int['FIRST_DIRECT'].sum()/len(treat_food_int)*100,2))
d.append(np.round(treat_int['FIRST_DIRECT'].sum()/len(treat_int)*100,2))
d.append(np.round(litter_int['FIRST_DIRECT'].sum()/len(litter_int)*100,2))
d.append(np.round(food_litter_int['FIRST_DIRECT'].sum()/len(food_litter_int)*100,2))
d.append(np.round(premium_int['FIRST_DIRECT'].sum()/len(premium_int)*100,2))
d.append(np.round(food_premium_int['FIRST_DIRECT'].sum()/len(food_premium_int)*100,2))
d.append(np.round(treat_food_litter_int['FIRST_DIRECT'].sum()/len(treat_food_litter_int)*100,2))

p.append(np.round(food_int['FIRST_PLA'].sum()/len(food_int)*100,2))
p.append(np.round(treat_food_int['FIRST_PLA'].sum()/len(treat_food_int)*100,2))
p.append(np.round(treat_int['FIRST_PLA'].sum()/len(treat_int)*100,2))
p.append(np.round(litter_int['FIRST_PLA'].sum()/len(litter_int)*100,2))
p.append(np.round(food_litter_int['FIRST_PLA'].sum()/len(food_litter_int)*100,2))
p.append(np.round(premium_int['FIRST_PLA'].sum()/len(premium_int)*100,2))
p.append(np.round(food_premium_int['FIRST_PLA'].sum()/len(food_premium_int)*100,2))
p.append(np.round(treat_food_litter_int['FIRST_PLA'].sum()/len(treat_food_litter_int)*100,2))

o.append(np.round(food_int['FIRST_OS'].sum()/len(food_int)*100,2))
o.append(np.round(treat_food_int['FIRST_OS'].sum()/len(treat_food_int)*100,2))
o.append(np.round(treat_int['FIRST_OS'].sum()/len(treat_int)*100,2))
o.append(np.round(litter_int['FIRST_OS'].sum()/len(litter_int)*100,2))
o.append(np.round(food_litter_int['FIRST_OS'].sum()/len(food_litter_int)*100,2))
o.append(np.round(premium_int['FIRST_OS'].sum()/len(premium_int)*100,2))
o.append(np.round(food_premium_int['FIRST_OS'].sum()/len(food_premium_int)*100,2))
o.append(np.round(treat_food_litter_int['FIRST_OS'].sum()/len(treat_food_litter_int)*100,2))

a.append(np.round(food_int['FIRST_AFF'].sum()/len(food_int)*100,2))
a.append(np.round(treat_food_int['FIRST_AFF'].sum()/len(treat_food_int)*100,2))
a.append(np.round(treat_int['FIRST_AFF'].sum()/len(treat_int)*100,2))
a.append(np.round(litter_int['FIRST_AFF'].sum()/len(litter_int)*100,2))
a.append(np.round(food_litter_int['FIRST_AFF'].sum()/len(food_litter_int)*100,2))
a.append(np.round(premium_int['FIRST_AFF'].sum()/len(premium_int)*100,2))
a.append(np.round(food_premium_int['FIRST_AFF'].sum()/len(food_premium_int)*100,2))
a.append(np.round(treat_food_litter_int['FIRST_AFF'].sum()/len(treat_food_litter_int)*100,2))

c.append(np.round(food_int['FIRST_CPS'].sum()/len(food_int)*100,2))
c.append(np.round(treat_food_int['FIRST_CPS'].sum()/len(treat_food_int)*100,2))
c.append(np.round(treat_int['FIRST_CPS'].sum()/len(treat_int)*100,2))
c.append(np.round(litter_int['FIRST_CPS'].sum()/len(litter_int)*100,2))
c.append(np.round(food_litter_int['FIRST_CPS'].sum()/len(food_litter_int)*100,2))
c.append(np.round(premium_int['FIRST_CPS'].sum()/len(premium_int)*100,2))
c.append(np.round(food_premium_int['FIRST_CPS'].sum()/len(food_premium_int)*100,2))
c.append(np.round(treat_food_litter_int['FIRST_CPS'].sum()/len(treat_food_litter_int)*100,2))

pd.DataFrame(list(zip(e,d,p,o,c,a)),columns = source_name, index = mc2_name)

Unnamed: 0,Email,Direct,PLA,Organic Search,Chewy Paid Search,Affiliates
CORE FOODS,32.81,24.91,6.89,6.84,7.49,4.39
CORE TREAT-FOODS,33.8,26.02,6.11,6.75,8.13,5.23
CORE TREAT,30.39,18.6,8.66,6.54,6.99,3.92
LITTER,30.43,19.3,10.74,6.3,5.85,4.06
CORE FOOD-LITTER,33.51,26.4,5.3,7.62,8.73,4.46
PREMIUM,30.64,24.66,7.3,8.08,5.45,5.85
CORE FOODS-PREMIUM,27.36,25.17,8.77,10.3,6.48,8.01
CORE-TREAT-FOODS-LITTER,34.65,25.27,4.85,7.35,10.68,3.75


In [361]:
# e,d,p,o,c,a,di,r,s = [],[],[],[],[],[],[],[],[]
# for x in mc2:
#     e.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_EMAIL'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     d.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_DIRECT'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     p.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_PLA'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     o.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_OS'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     c.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_CPS'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     a.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_AFF'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     di.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_DIS'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     r.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_REF'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))
#     s.append(np.round(data_interaction[data_interaction[x] == 1]['FIRST_SOC'].sum()/len(data_interaction[data_interaction[x] == 1])*100,2))

# pd.DataFrame(list(zip(e,d,p,o,c,a,di,r,s)),columns = source_name, index = mc2_names) 

---

In [272]:
qry3 = """WITH ORDERS AS (
  
  SELECT
  CA.CUSTOMER_ID,
  CA.REGISTRATION_DATE,
  CA.FIRST_ORDER_DATE,
  OL.ORDER_ID,
  OL.PRODUCT_ID,
  PD.CATEGORY_LEVEL1,
  PD.CATEGORY_LEVEL2,
  PD.CATEGORY_LEVEL3,
  PD.MERCH_CLASSIFICATION1,
  PD.MERCH_CLASSIFICATION2,
  CAST(OL.ORDER_PLACED_DTTM AS DATE) AS ORDER_PLACED_DATE,
  OL.ORDER_LINE_TOTAL_PRICE AS ORDER_PRICE,
  CASE WHEN OL.ORDER_AUTO_REORDER_FLAG = 'TRUE' then 'Autoship' else 'Non_Autoship' end as ORDER_TYPE,
  ROW_NUMBER() OVER (PARTITION BY CA.CUSTOMER_ID, 
                     CASE WHEN OL.ORDER_AUTO_REORDER_FLAG = 'TRUE' then 'Autoship' else 'Non_Autoship' END ORDER BY OL.ORDER_PLACED_DTTM ASC) AS AS_RANKING,
  DENSE_RANK() OVER (PARTITION BY CA.CUSTOMER_ID ORDER BY OL.ORDER_PLACED_DTTM ASC) AS ORDER_RANKING,
  CASE WHEN (OL.PROMOTION_USED_FLAG = 'TRUE' and OL.ORDER_AUTO_REORDER_FLAG = 'FALSE') then 1 else 0 end as PROMO_USED
  
  FROM ECOM_SANDBOX.MC1_ANALYSIS_SV CA
  
  LEFT JOIN ECOM.ORDER_LINE OL
  ON CA.CUSTOMER_ID = OL.CUSTOMER_ID 
  AND OL.ORDER_LINE_TOTAL_PRICE != 0
  AND OL.ORDER_LINE_SHIPPED_DTTM IS NOT NULL
  AND OL.ORDER_STATUS NOT IN ('X', 'P', 'J')
  
  LEFT JOIN PDM.PRODUCT PD
  ON OL.PRODUCT_ID = PD.PRODUCT_ID

),

FIRST_ORDER AS (
  SELECT 
  CUSTOMER_ID,
  MAX(CASE WHEN PROMO_USED = 1 THEN 1 ELSE 0 END) AS FIRST_ORDER_PROMOTION_USED
  FROM ORDERS 
  WHERE ORDER_RANKING = 1
  GROUP BY 1
),

NEXT_ORDER AS (
  SELECT 
  CUSTOMER_ID,
  MAX(CASE WHEN PROMO_USED = 1 THEN 1 ELSE 0 END) AS AFTER_ORDER_PROMOTION_USED
  FROM ORDERS 
  WHERE ORDER_RANKING <> 1
  GROUP BY 1
)

SELECT 
A.CUSTOMER_ID,
A.ACTIVE_3M,
A.FIRST_ORDER_MC2,
CURRENT_NUMBER_OF_MC1,
FIRST_ORDER_APPAREL,
FIRST_ORDER_LEASH,
FIRST_ORDER_TREAT,
FIRST_ORDER_TOY,
FIRST_ORDER_BED,
FIRST_ORDER_LITTER,
FIRST_ORDER_FOOD,
FIRST_ORDER_BEAUTY,
FIRST_ORDER_CONTAINMENT,
FIRST_ORDER_PREMIUM,
FIRST_ORDER_BOWL,
FIRST_ORDER_PERSONALIZED,
FIRST_ORDER_WASTE,
FIRST_ORDER_PERISHABLE,
FIRST_ORDER_PROMOTION_USED,
AFTER_ORDER_PROMOTION_USED

FROM ECOM_SANDBOX.MC1_ANALYSIS_SV A
LEFT JOIN FIRST_ORDER B
ON A.CUSTOMER_ID = B.CUSTOMER_ID

LEFT JOIN NEXT_ORDER C
ON A.CUSTOMER_ID = C.CUSTOMER_ID"""

In [273]:
promo_data = exec_qry_all(qry3)
promo_data.head()

Unnamed: 0,CUSTOMER_ID,ACTIVE_3M,FIRST_ORDER_MC2,CURRENT_NUMBER_OF_MC1,FIRST_ORDER_APPAREL,FIRST_ORDER_LEASH,FIRST_ORDER_TREAT,FIRST_ORDER_TOY,FIRST_ORDER_BED,FIRST_ORDER_LITTER,FIRST_ORDER_FOOD,FIRST_ORDER_BEAUTY,FIRST_ORDER_CONTAINMENT,FIRST_ORDER_PREMIUM,FIRST_ORDER_BOWL,FIRST_ORDER_PERSONALIZED,FIRST_ORDER_WASTE,FIRST_ORDER_PERISHABLE,FIRST_ORDER_PROMOTION_USED,AFTER_ORDER_PROMOTION_USED
0,137382677,1,FOOD-LITTER-,2,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1.0
1,129837711,0,FOOD-,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0
2,141452980,1,TREAT-,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0.0
3,132910298,1,FOOD-,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0.0
4,156813854,0,FOOD-,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,


In [380]:
food_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 1)&(promo_data['FIRST_ORDER_TREAT'] == 0)&(promo_data['FIRST_ORDER_LITTER'] == 0)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 1)&(promo_data['FIRST_ORDER_TREAT'] == 1)&(promo_data['FIRST_ORDER_LITTER'] == 0)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
treat_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 0)&(promo_data['FIRST_ORDER_TREAT'] == 1)&(promo_data['FIRST_ORDER_LITTER'] == 0)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
litter_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 0)&(promo_data['FIRST_ORDER_TREAT'] == 0)&(promo_data['FIRST_ORDER_LITTER'] == 1)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
food_litter_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 1)&(promo_data['FIRST_ORDER_TREAT'] == 0)&(promo_data['FIRST_ORDER_LITTER'] == 1)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
premium_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 0)&(promo_data['FIRST_ORDER_TREAT'] == 0)&(promo_data['FIRST_ORDER_LITTER'] == 0)&(promo_data['FIRST_ORDER_PREMIUM'] == 1)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
food_premium_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 1)&(promo_data['FIRST_ORDER_TREAT'] == 0)&(promo_data['FIRST_ORDER_LITTER'] == 0)&(promo_data['FIRST_ORDER_PREMIUM'] == 1)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]
treat_food_litter_promo = promo_data[(promo_data['FIRST_ORDER_FOOD'] == 1)&(promo_data['FIRST_ORDER_TREAT'] == 1)&(promo_data['FIRST_ORDER_LITTER'] == 1)&(promo_data['FIRST_ORDER_PREMIUM'] == 0)&(promo_data['FIRST_ORDER_PERISHABLE'] == 0)]

col_promo = ['% Promo FO', '% Attrition Promo', '% Attrition Non-Promo']

---
Are they heavy promotions users? 

In [381]:
first_promo = promo_data['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1]
promo_attr = promo_data[promo_data['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0]
non_promo_attr = promo_data[promo_data['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0]

pd.DataFrame(np.array([[first_promo,promo_attr,non_promo_attr]]),columns = col_promo, index = ['First Order Consumables']) 

Unnamed: 0,% Promo FO,% Attrition Promo,% Attrition Non-Promo
First Order Consumables,13.71,67.92,55.71


In [393]:
promo_per,attrition_promo,non_attrition_promo = [], [], []

promo_per.append(food_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(treat_food_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(treat_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(litter_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(food_litter_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(premium_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(food_premium_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
promo_per.append(treat_food_litter_promo['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])

attrition_promo.append(food_promo[food_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(treat_food_promo[treat_food_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(treat_promo[treat_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(litter_promo[litter_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(food_litter_promo[food_litter_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(premium_promo[premium_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(food_premium_promo[food_premium_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
attrition_promo.append(treat_food_litter_promo[treat_food_litter_promo['FIRST_ORDER_PROMOTION_USED'] == 1]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])

non_attrition_promo.append(food_promo[food_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(treat_food_promo[treat_food_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(treat_promo[treat_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(litter_promo[litter_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(food_litter_promo[food_litter_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(premium_promo[premium_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(food_premium_promo[food_premium_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
non_attrition_promo.append(treat_food_litter_promo[treat_food_litter_promo['FIRST_ORDER_PROMOTION_USED'] == 0]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])

pd.DataFrame(list(zip(promo_per,attrition_promo,non_attrition_promo)),columns = col_promo, index = mc2_name) 

Unnamed: 0,% Promo FO,% Attrition Promo,% Attrition Non-Promo
CORE FOODS,12.65,69.0,53.94
CORE TREAT-FOODS,19.69,65.71,51.53
CORE TREAT,12.49,71.79,67.19
LITTER,8.6,77.79,66.55
CORE FOOD-LITTER,16.53,62.82,48.92
PREMIUM,9.48,74.12,59.27
CORE FOODS-PREMIUM,17.45,65.03,53.46
CORE-TREAT-FOODS-LITTER,21.21,59.71,49.67


In [366]:
# promo_per,attrition_promo,non_attrition_promo = [], [], []
# for x in mc2:
#     promo_per.append(promo_data[promo_data[x] == 1]['FIRST_ORDER_PROMOTION_USED'].value_counts(normalize = True).mul(100).round(2)[1])
#     attrition_promo.append(promo_data[(promo_data['FIRST_ORDER_PROMOTION_USED'] == 1)&(promo_data[x] == 1)]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])
#     non_attrition_promo.append(promo_data[(promo_data['FIRST_ORDER_PROMOTION_USED'] == 0)&(promo_data[x] == 1)]['ACTIVE_3M'].value_counts(normalize = True).mul(100).round(2)[0])

# pd.DataFrame(list(zip(promo_per,attrition_promo,non_attrition_promo)),columns = col_promo, index = mc2_names) 

---

---

---