In [5]:
import pandas as pd
import plotly.graph_objects as go

# Original dataset
df = pd.read_excel('CE tunnel sample dataset.xlsx')

# Stages in the customer journey
stages = ["ReceiveEmail", "OpenEmail", "ClickLink", "ClickDemo", "ClickSalesPage", "Purchase", "Retained"]
# Transform data into source-target-value format
flows = []
total_customers = len(df)

# Generate all transitions (including skipped stages)
for i, source in enumerate(stages):
    for j, target in enumerate(stages):
        if i < j:  # Only consider forward transitions
            count = df[(df[source] == "Yes") & (df[target] == "Yes")].shape[0]
            if count > 0:
                percentage = (count / total_customers) * 100
                flows.append({"Source": source, "Target": target, "Value": count, "Percentage": percentage})

sankey_data = pd.DataFrame(flows)

# Create a list of unique labels (stages)
unique_labels = list(set(sankey_data["Source"]).union(set(sankey_data["Target"])))

# Map labels to indices
label_to_index = {label: i for i, label in enumerate(unique_labels)}

# Prepare source, target, and value for Sankey graph
sankey_source = [label_to_index[source] for source in sankey_data["Source"]]
sankey_target = [label_to_index[target] for target in sankey_data["Target"]]
sankey_value = sankey_data["Value"]
sankey_percentage = sankey_data["Percentage"]

# Create link labels with percentages
link_labels = [
    f"{row['Source']} → {row['Target']}: {row['Value']} ({row['Percentage']:.1f}%)"
    for _, row in sankey_data.iterrows()
]

# Plot the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        label=unique_labels,
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5)
    ),
    link=dict(
        source=sankey_source,
        target=sankey_target,
        value=sankey_value,
        label=link_labels  # Add labels with percentages
    )
)])

# Add title and show the figure
fig.update_layout(title_text="Customer Journey Sankey Diagram (Including Skipped Stages)", font_size=10)
fig.show()


In [23]:
import pandas as pd
import plotly.graph_objects as go

# Original dataset
data = {
    "CustomerID": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    "ReceiveEmail": ["Yes"]*10,
    "OpenEmail": ["Yes", "No", "Yes", "Yes", "Yes", "Yes", "Yes", "No", "Yes", "Yes"],
    "ClickLink": ["Yes", "No", "Yes", "Yes", "No", "Yes", "Yes", "No", "Yes", "Yes"],
    "ClickDemo": ["Yes", "Yes", "No", "Yes", "No", "No", "Yes", "No", "Yes", "Yes"],
    "DidDemo": ["Yes", "No", "No", "Yes", "No", "Yes", "No", "No", "Yes", "Yes"],
    "ClickSalesPage": ["Yes", "No", "No", "Yes", "No", "Yes", "No", "No", "Yes", "No"],
    "Purchase": ["Yes", "Yes", "No", "No", "No", "Yes", "Yes", "No", "Yes", "No"],
    "Retained": ["Yes", "No", "No", "Yes", "No", "No", "No", "No", "Yes", "No"],
}
df = pd.DataFrame(data)

# Stages in the customer journey
stages = ["ReceiveEmail", "OpenEmail", "ClickLink", "ClickDemo", "DidDemo", "ClickSalesPage", "Purchase", "Retained"]

# Transform data into source-target-value format
flows = []
total_customers = len(df)
for i in range(len(stages) - 1):
    source = stages[i]
    target = stages[i + 1]
    count = df[(df[source] == "Yes") & (df[target] == "Yes")].shape[0]
    percentage = (count / total_customers) * 100
    flows.append({"Source": source, "Target": target, "Value": count, "Percentage": percentage})

sankey_data = pd.DataFrame(flows)

# Create a list of unique labels (stages)
unique_labels = list(set(sankey_data["Source"]).union(set(sankey_data["Target"])))

# Map labels to indices
label_to_index = {label: i for i, label in enumerate(unique_labels)}

# Prepare source, target, and value for Sankey graph
sankey_source = [label_to_index[source] for source in sankey_data["Source"]]
sankey_target = [label_to_index[target] for target in sankey_data["Target"]]
sankey_value = sankey_data["Value"]
sankey_percentage = sankey_data["Percentage"]

# Create link labels with percentages
link_labels = [
    f"{row['Source']} → {row['Target']}: {row['Value']} ({row['Percentage']:.1f}%)"
    for _, row in sankey_data.iterrows()
]

# Plot the Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        label=unique_labels,
        pad=15,
        thickness=20,
        line=dict(color="black", width=0.5)
    ),
    link=dict(
        source=sankey_source,
        target=sankey_target,
        value=sankey_value,
        label=link_labels  # Add labels with percentages
    )
)])

# Add title and show the figure
fig.update_layout(title_text="Customer Journey Sankey Diagram with Percentages", font_size=10)
fig.show()
