# Analyze Customer Segments

## Setup

In [None]:
# Import python packages
import streamlit as st
import pandas as pd

# We can also use Snowpark for our analyses!
from snowflake.snowpark.context import get_active_session
session = get_active_session()


In [None]:
from snowflake.snowpark.version import VERSION

snowflake_environment = session.sql('SELECT current_user(), current_version()').collect()
snowpark_version = VERSION

# current environment details
print('\nConnection Established with the following parameters:')
print('User                        : {}'.format(snowflake_environment[0][0]))
print('Role                        : {}'.format(session.get_current_role()))
print('Database                    : {}'.format(session.get_current_database()))
print('Schema                      : {}'.format(session.get_current_schema()))
print('Warehouse                   : {}'.format(session.get_current_warehouse()))
print('Snowflake version           : {}'.format(snowflake_environment[0][1]))
print('Snowpark for Python version : {}.{}.{}'.format(snowpark_version[0],snowpark_version[1],snowpark_version[2]))

## Segmentation Statistics

In [None]:
print('Number of customers per segment')
session.sql(f'''
    SELECT customer_metrics.customer_segment AS customer_segment, COUNT(*) AS cnt
    FROM customers
    JOIN customer_node_mapping ON customer_node_mapping.customerId=customers.customerId
    JOIN customer_metrics ON customer_node_mapping.nodeId=customer_metrics.nodeId
    GROUP BY customer_segment
    ORDER BY cnt DESC
''').to_pandas()

## Customer Demographics

In [None]:
print('Customers')
customer_df = session.sql(f'''
    SELECT customer_metrics.customer_segment AS customer_segment, customers.*
    FROM customers
    JOIN customer_node_mapping ON customer_node_mapping.customerId=customers.customerId
    JOIN customer_metrics ON customer_node_mapping.nodeId=customer_metrics.nodeId
    ORDER BY customer_metrics.customer_segment DESC
''').to_pandas()

customer_df['CUSTOMER_SEGMENT'] = customer_df['CUSTOMER_SEGMENT'].astype(str)
customer_df

In [None]:
valid_segments = (customer_df[['CUSTOMERID','CUSTOMER_SEGMENT']]
    .groupby('CUSTOMER_SEGMENT')
    .count()
    .query('CUSTOMERID > 1')
    .index
    .tolist())

In [None]:
import plotly.express as px

fig = px.box(customer_df[customer_df.CUSTOMER_SEGMENT.isin(valid_segments)], 
             x='CUSTOMER_SEGMENT', y='AGE')
fig.update_layout(title_text=f"Cohort Age Distributions", 
        font_size=10, 
        height=500
)
fig

## Purchase Behavior

In [None]:
purchase_by_segment_df = session.sql(f'''
    SELECT customer_segment, prodname, producttypename, cnt, rank
    FROM (
        SELECT customer_segment, prodname, producttypename, cnt, 
            RANK() OVER (PARTITION BY customer_segment ORDER BY cnt DESC) AS rank
        FROM (
            SELECT customer_segment, prodname, producttypename, COUNT(*) AS cnt
            FROM CUSTOMER_PURCHASES
            GROUP BY customer_segment, prodname, producttypename
        )
    ) 
    WHERE Rank <=5
''').to_pandas()
purchase_by_segment_df['CUSTOMER_SEGMENT'] = purchase_by_segment_df['CUSTOMER_SEGMENT'].astype(str)
purchase_by_segment_df = purchase_by_segment_df[purchase_by_segment_df['CUSTOMER_SEGMENT'].isin(valid_segments)]
purchase_by_segment_df

In [None]:
fig = px.bar(purchase_by_segment_df, x="PRODUCTTYPENAME", y="CNT",
             facet_row="CUSTOMER_SEGMENT")

fig.update_layout(title_text=f"Top Products by Segment", 
        font_size=10, 
        height=1000,
        width=1200
)
fig