# Snaky 

In [12]:
import pandas as pd
import plotly.graph_objects as go

# Load data
data = pd.read_excel('CE tunnel sample changed.xlsx', sheet_name='Sheet1')
print(data.head())

# Define ordered stages
stages = ['ReceiveEmail', 'OpenEmail', 'ClickLink', 'ClickDemo', 'DidDemo', 'ClickSalesPage', 'Purchase', 'Retained']

# Define visually enhanced colors for "No" and "Yes"
stage_colors = {
    'ReceiveEmail': ['#D3D3D3', '#1E90FF'],  # Light Gray, Deep Blue
    'OpenEmail': ['#D3D3D3', '#00BFFF'],  # Light Gray, Greenish Blue
    'ClickLink': ['#FFCC99', '#FF4500'],  # Faded Orange, Bright Orange
    'ClickDemo': ['#FFFF99', '#FFD700'],  # Light Yellow, Gold
    'DidDemo': ['#D8BFD8', '#800080'],  # Light Purple, Dark Purple
    'ClickSalesPage': ['#FFB6C1', '#FF1493'],  # Light Pink, Deep Pink
    'Purchase': ['#90EE90', '#008000'],  # Light Green, Dark Green
    'Retained': ['#ADD8E6', '#008B8B']  # Light Teal, Dark Teal
}

# Initialize lists for Sankey diagram
source = []
target = []
value = []
link_colors = []
labels = []  # Holds labels for "Yes" and "No" versions of each stage
node_colors = []  # Holds colors for each node

# Create a mapping from stage names to two indices ("No" and "Yes" per stage)
stage_indices = {}
index = 0

for stage in stages:
    stage_indices[stage] = (index, index + 1)  # Assign No = index, Yes = index+1
    labels.extend([f"{stage} - No", f"{stage} - Yes"])  # Labels for nodes
    node_colors.extend(stage_colors[stage])  # Assign respective colors
    index += 2  # Move to next stage indices

# Define colors for transitions
yes_color = "#A0D6B4"  # Light Teal - Soft and pleasant for "Yes" paths
no_color = "#F4A6A6"   # Soft Peach Red - Gentle but noticeable for "No" paths


# Count transitions and create links
for i in range(len(stages) - 1):
    current_stage = stages[i]
    next_stage = stages[i + 1]

    # Count transitions from current_stage to next_stage
    transition_counts = data.groupby([current_stage, next_stage]).size().reset_index(name='count')

    for _, row in transition_counts.iterrows():
        from_index = stage_indices[current_stage][row[current_stage]]  # Get "No" or "Yes" index
        to_index = stage_indices[next_stage][row[next_stage]]  # Get "No" or "Yes" index
        
        source.append(from_index)
        target.append(to_index)
        value.append(row['count'])

        # Assign colors based on "Yes" or "No" flow
        link_colors.append(yes_color if row[current_stage] and row[next_stage] else no_color)

# Create Sankey diagram
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,  # Labels for "Yes" and "No" per stage
        color=node_colors  # Unique colors for each node
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color=link_colors  # Colors based on "Yes" (green) and "No" (red) transitions
    )
))

# Update layout
fig.update_layout(
    title_text="User Flow Through Stages (Yes/No)", 
    font_size=10
)

fig.show()


   CustomerID  ReceiveEmail  OpenEmail  ClickLink  ClickDemo  DidDemo  \
0           1             1          1          1          1        1   
1           2             1          0          0          0        0   
2           3             1          1          1          0        0   
3           4             1          1          1          1        1   
4           5             1          1          0          0        0   

   ClickSalesPage  Purchase  Retained  
0               1         1         1  
1               0         0         0  
2               0         0         0  
3               1         0         0  
4               0         0         0  


# total of each stage

In [13]:
import pandas as pd
import plotly.graph_objects as go

# Load stage totals data
data = pd.read_excel('CE_tunnel_totals.xlsx', sheet_name='StageTotals')

# Define ordered stages
stages = data['Stage'].tolist()
totals = data['Total'].tolist()

# Initialize lists for Sankey diagram
source = []
target = []
value = []
labels = stages  # Labels remain the stage names

# Assign colors for nodes
node_colors = ['#1E90FF', '#00BFFF', '#FF4500', '#FFD700', '#800080', '#FF1493', '#008000', '#008B8B']

# Create source-target-value relationships
for i in range(len(stages) - 1):
    source.append(i)
    target.append(i + 1)
    value.append(totals[i + 1])  # Flow to the next stage

# Create Sankey diagram
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5),
        label=labels,
        color=node_colors  # Colors for each stage
    ),
    link=dict(
        source=source,
        target=target,
        value=value,
        color="#A0D6B4"  # Uniform color for links
    )
))

# Update layout
fig.update_layout(
    title_text="Total User Flow Through Stages",
    font_size=10
)

fig.show()
