<img width="8%" alt="Plotly.png" src="https://raw.githubusercontent.com/jupyter-naas/awesome-notebooks/master/.github/assets/logos/Plotly.png" style="border-radius: 15%">

# Plotly - Follow leads by cohorts

**Tags:** #plotly #html #csv #image #growth #analytics #cohorts

**Author:** [Florent Ravenel](https://www.linkedin.com/in/florent-ravenel/)

**Description:** This notebook creates a chart to follow the number of contacts reached by cohorts over the weeks.

## Input

### Import libraries

In [None]:
import plotly.graph_objects as go
from naas_drivers import gsheet
import pandas as pd
import os
from datetime import date, datetime, timedelta
import naas_data_product

### Setup variables
**Inputs**
- `entity_dir`: Entity directory.
- `entity_name`: Entity name.
- `input_dir`: Path of the directory to retrieve file from.
- `input_file`: Name of the file to be retrieved.
- `spreadsheet_url`: Google Sheets spreadsheet URL.
- `sheet_name`: Google Sheets sheet name.
- `title`: Graph title.
- `linkedin_url`: Entity LinkedIn URL used as graph source.

**Outputs**
- `output_dir`: Path to the directory where the output files will be saved.
- `output_file`: Output files base name

In [None]:
# Inputs
entity_dir = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "entity_dir")
entity_name = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "entity_name")
input_dir = os.path.join(entity_dir, "growth-engine", date.today().isoformat())
input_file = "growth"
spreadsheet_url = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "abi_spreadsheet")
sheet_name = "GROWTH"
title = "Contacts Reached"
linkedin_url = pload(os.path.join(naas_data_product.OUTPUTS_PATH, "entities", "0"), "linkedin_url")

# Outputs
output_dir = os.path.join(entity_dir, "growth-engine", date.today().isoformat())
output_file = "growth_trend"

## Model

### Set outputs

In [None]:
html_output = os.path.join(output_dir, f"{output_file}.html")
image_output = os.path.join(output_dir, f"{output_file}.png")

### Get DB growth

In [None]:
df_input = pload(input_dir, input_file)    
if df_input is None:
    df_input = gsheet.connect(spreadsheet_url).get(sheet_name=sheet_name)

# Filter on entity
if len(df_input) > 0:
    df_input = df_input[(df_input["ENTITY"] == entity_name)]

# Display result
print("Input data:", len(df_input))
df_input.head(1)

### Create trend dataframe

In [None]:
def get_trend(
    df_init,
    col_label,
    col_group,
    col_value,
    agg_value,
    entity_name,
):
    # Get data
    df = df_init.copy()
    
    # Groupby
    to_group = [
        col_label,
        col_group,
    ]
    to_agg = {
        col_value: agg_value
    }
    to_rename = {
        col_label: "LABEL",
        col_group: "GROUP",
        col_value: "VALUE",
    }

    df = df.groupby(to_group, as_index=False).agg(to_agg).rename(columns=to_rename)

    # Get weeks
    def get_weeks(df):
        start_date = datetime.strptime(df.loc[df.index[-1], "LAST_INTERACTION_DATE"][:10], "%Y-%m-%d")
        end_date = date.today()
        weeks = []
        idx = pd.date_range(start_date, end_date, freq="D")
        for i in idx:
            week = i.strftime("W%W-%Y")
            if week not in weeks:
                weeks.append(week)
        return pd.DataFrame({"LABEL": weeks})

    df_weeks = get_weeks(df_init)
    dfs = pd.DataFrame()
    for x in df_weeks.LABEL:
        tmp_df = df_weeks.copy()
        tmp_df["GROUP"] = tmp_df["LABEL"]
        tmp_df["LABEL"] = x
        dfs = pd.concat([dfs, tmp_df]).reset_index(drop=True)
    dfs.loc[(dfs["GROUP"] == TW) & (dfs["LABEL"] == TW), "GROUP"] = "NEW"
    
    # Merge dfs
    fillna = {
        "GROUP": "NA",
        "VALUE": 0,
    }
    df = pd.merge(dfs, df, how="left").fillna(fillna)    
    df["LABEL_ORDER"] = df["LABEL"].str[-4:] + df["LABEL"].str[1:3]
    df["GROUP_ORDER"] = df["GROUP"].str[-4:] + df["GROUP"].str[1:3]
    df.loc[df["GROUP"] == "NEW" , "GROUP_ORDER"] = "999999"
    df = df.sort_values(by=["LABEL_ORDER", "GROUP_ORDER"], ascending=[False, False])
    
    # Update labels and colors
    labels_d = [
        "This week",
        "Last week",
        "2 weeks ago",
        "3 weeks ago",
        "4 weeks ago",
    ]
    # Add colors
    colors_d = [
        "#48DD82",
        "#FFFDA2",
        "#FFAB76",
        "#FF6362",
        "#484549",
    ]
    labels = df["LABEL"].unique().tolist()
    new_labels = {}
    colors = {}
    for i, l in enumerate(labels):
        if i == 0:
            colors["NEW"] = colors_d[i]
        if i < 5:
            new_labels[l] = labels_d[i]
            colors[l] = colors_d[i]
        if i >= 5:
            colors[l] = "#000000"
    df["LABEL_D"] = df["LABEL"].map(new_labels)
    df["COLOR"] = df["GROUP"].map(colors)
    df["COLOR"] = df["COLOR"].fillna("#000000")

    # Filters on labels
    df = df[df["LABEL_D"].isin(labels_d)]

    # Groupby 
    df.loc[df["COLOR"] == "#000000" , "GROUP"] = "> 4 weeks ago"
    df.loc[df["COLOR"] == "#000000" , "GROUP_ORDER"] = "0"
    
    # Groupby 
    to_group = [
        "LABEL",
        "LABEL_ORDER",
        "LABEL_D",
        "GROUP",
        "GROUP_ORDER",
        "COLOR"
    ]
    df = df.groupby(to_group, as_index=False).agg({"VALUE": "sum"})
    df = df.sort_values(by=["LABEL_ORDER", "GROUP_ORDER"], ascending=[True, True])
    
    # Plotly: hovertext
    df["TEXT"] = (
        "<b><span style='font-size: 14px;'>"
        + df["GROUP"].astype(str)
        + ": "
        + df["VALUE"].astype(int).astype(str)
        + "</span></b>"
    )
    df.insert(loc=0, column="ENTITY", value=entity_name)
    return df.reset_index(drop=True)

df_trend = get_trend(
    df_input,
    col_label="SCENARIO",
    col_group="COHORT",
    col_value="PROFILE_URL",
    agg_value="count",
    entity_name=entity_name
)
# df_trend

### Create title and logo

In [None]:
# Groupby weeks
df = df_trend.groupby(["LABEL", "LABEL_ORDER"], as_index=False).agg({"VALUE": "sum"})
df = df.sort_values(by="LABEL_ORDER")
total = 0
total_n1 = 0
varp = 0
if len(df) >= 1:
    total = df.loc[df.index[-1], "VALUE"]
if len(df) >= 2:
    total_n1 = df.loc[df.index[-2], "VALUE"]
varv = total - total_n1
if total_n1 != 0:
    varp = varv / total_n1

total_d = "{:,.0f}".format(total).replace(",", " ")
varv_d = "{:,.0f}".format(varv).replace(",", " ")
varp_d = "{:,.0%}".format(varp).replace(",", " ")
if varv >= 0:
    varv_d = f"+{varv_d}"
    varp_d = f"+{varp_d}"
title_full = f"<b><span style='font-size: 20px;'>{title}</span></b><br><span style='font-size: 18px;'>{TW}: {total_d} | {varv_d} ({varp_d}) vs last week</span>"

# Logo
logo = None
if varv > 0:
    logo = arrow_up
elif varv > -0.2:
    logo = arrow_right
else:
    logo = arrow_down
print("Title:", title_full)

### Create Vertical Barchart stacked

In [None]:
def get_week_number(week):
    return int(week.split("W")[1].split("-")[0]) 

def create_barchart(
    df,
    label,
    groups,
    value,
    title,
    logo,
):
    # Init
    fig = go.Figure()
    
    # Create trace
    list_groups = df[groups].unique()
    for group in list_groups:
        showlegend = True
        if group == "> 4 weeks ago" or group == "NEW":
            showlegend = False
        tmp_df = df[df[groups] == group]
        # Create bar
        fig.add_trace(
            go.Bar(
                x=tmp_df[label],
                y=tmp_df[value],
                name=group,
                marker=dict(color=tmp_df["COLOR"]),
                hoverinfo="text",
                hovertext=tmp_df["TEXT"],
                showlegend=showlegend
            )
        )

    # Add logo
    fig.add_layout_image(
        dict(
            source=logo,
            xref="paper",
            yref="paper",
            x=0.01,
            y=1.06,
            sizex=0.12,
            sizey=0.12,
            xanchor="right",
            yanchor="bottom",
        )
    )

    # Display the legend below the graph
    fig.update_layout(legend=dict(
        orientation="h",
        x=0.25,
        y=-0.2,
        xanchor="left",
        yanchor="bottom",
    ))
    
    # Add annotation
    fig.add_annotation(
        text=f"<i>Source: <a href='{linkedin_url}'>{linkedin_url}</a> / Created at: {date.today().isoformat()}</i>",
        font=dict(family="Arial", color="black"),
        x=0,
        y=-0.35,
        xref="paper",
        yref="paper",
        xanchor="left",
        yanchor="bottom",
        arrowcolor="white",
    )
    # Update layout
    fig.update_layout(
        title=title,
        title_x=0.09,
        title_font=dict(family="Arial", color="black"),
        paper_bgcolor="#ffffff",
        plot_bgcolor="#ffffff",
        width=1200,
        height=600,
        margin_pad=10,
        margin_b=120,
        barmode="stack",
        bargap=0.1,  # gap between bars of adjacent location coordinates.
        bargroupgap=0.1,  # gap between bars of the same location coordinate.
    )
    config = {"displayModeBar": False}
    fig.show(config=config)
    return fig

fig = create_barchart(df_trend, label="LABEL_D", groups="GROUP", value="VALUE", title=title_full, logo=logo)

## Output

### Save data

In [None]:
pdump(output_dir, df_trend, output_file)

### Save and share your graph in HTML


In [None]:
# Save your graph in HTML
fig.write_html(html_output)

# Share output with naas
html_link = naas.asset.add(html_output, override_prod=True, params={"inline": True})

# -> Uncomment the line below to remove your asset
# naas.asset.delete(html_output)

### Save and share your graph in image


In [None]:
# Save your graph in PNG
fig.write_image(image_output)

# Share output with naas
image_link = naas.asset.add(image_output, override_prod=True, params={"inline": True})

# -> Uncomment the line below to remove your asset
# naas.asset.delete(image_output)