In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [3]:
df = pd.read_excel("data\Telco_customer_churn_adapted_v2.xlsx")
df.sample(5)

Unnamed: 0,Customer ID,Tenure Months,Location,Device Class,Games Product,Music Product,Education Product,Call Center,Video Product,Use MyApp,Payment Method,Monthly Purchase (Thou. IDR),Churn Label,Longitude,Latitude,CLTV (Predicted Thou. IDR)
2518,2518,72,Jakarta,High End,Yes,No,Yes,Yes,Yes,No,Debit,127.66,No,106.816666,-6.2,5631.6
5364,5364,12,Jakarta,High End,Yes,Yes,Yes,No,No,No,Pulsa,109.98,No,106.816666,-6.2,3248.7
5963,5963,5,Jakarta,High End,No,No,No,No,Yes,No,Pulsa,104.26,No,106.816666,-6.2,4408.3
1785,1785,35,Jakarta,Mid End,No,No,No,No,Yes,No,Digital Wallet,71.5,Yes,106.816666,-6.2,2886.0
6380,6380,14,Jakarta,High End,No,No,No,No,No,Yes,Pulsa,101.53,No,106.816666,-6.2,4104.1


# Statistik Umum

### Valuasi

In [4]:
# Distribusi Monthly Purchase
fig = px.histogram(df, x="CLTV (Predicted Thou. IDR)", nbins=20)
fig.show()

### Jumlah Customer

In [5]:
def pie(df, names, hover_variable):
    data = df[[names, "Customer ID"]].groupby(names).count().reset_index()
    hover_data = df[[names, hover_variable]].groupby(names).mean().reset_index()
    data[hover_variable] = round(hover_data[hover_variable], 3)

    fig = px.pie(data, values="Customer ID", names=names,
                title=f'Jumlah Customer Menurut {names}',
                hover_data=[hover_variable], labels={'Customer ID':'Jumlah Customer'})
    fig.update_traces(textposition='inside', textinfo='percent+label')
    
    return fig

fig = pie(df, names = "Location", hover_variable = "Monthly Purchase (Thou. IDR)")
fig.show()

### Tenure

In [6]:
def tenure_months_distribution(y, agg):
    mapper = {
        0: "Less than 1 year",
        1: "1-2 years",
        2: "2-3 years",
        3: "3-4 years",
        4: "4-5 years",
        5: "5-6 years",
        6: "6-7 years"
    }

    if y == "Count":
        temp = pd.DataFrame((df["Tenure Months"] // 12).replace(mapper).value_counts()).reset_index()
        temp.columns = ["Tenure Months", "Count"]
    elif y != "Count":
        temp = df[["Tenure Months", y]]
        temp["Tenure Months"] = (temp["Tenure Months"] // 12).replace(mapper)

        if agg == "Sum":
            temp = temp.groupby("Tenure Months").sum().reset_index()
        elif agg == "Mean":
            temp = temp.groupby("Tenure Months").mean().reset_index()
        else:
            temp = temp.groupby("Tenure Months").median().reset_index()
            
    fig = px.bar(temp, y="Tenure Months", x = y,
                category_orders={"Tenure Months": list(mapper.values())},
                title = "Tenure Months Distribution")
    return fig

tenure_months_distribution(y="Count", agg="Median")

In [None]:
def payment_method(y, agg="Mean", hue=None):
    if y == "Count":
        temp = df["Payment Method"].value_counts().reset_index()
        temp.columns = ["Payment Method", "Count"]
    elif y != "Count":
        if "Monthly" in y:
            y = "Monthly Purchase (Thou. IDR)"
        else:
            y = "CLTV (Predicted Thou. IDR)"
        temp = df[["Payment Method", y]].groupby("Payment Method").sum().reset_index()

        if agg == "Sum":
            temp = temp.groupby("Payment Method").sum().reset_index()
        elif agg == "Mean":
            temp = temp.groupby("Payment Method").mean().reset_index()
        else:
            temp = temp.groupby("Payment Method").median().reset_index()

    if hue:
        fig = px.bar(temp, y=y, x = "Payment Method", color=hue,
                    title = "Payment Method Distribution")
    else:
        fig = px.bar(temp, y=y, x = "Payment Method",
                    title = "Payment Method Distribution")
    fig.update_layout(margin=dict(l=0, r=0, t=30, b=0), height=400)
    return fig
    
temp = df[["Payment Method", "Location", "Monthly Purchase (Thou. IDR)"]]
temp = temp.groupby(["Payment Method", "Location"]).mean().reset_index()

fig = px.bar(temp, y=y, x = "Payment Method", color="Location",
             title = "Payment Method Distribution", barmode="group")
fig.show()

payment_method(y="Count", agg="Sum", hue="Location")

In [None]:
temp = df[["Payment Method", "Location", "Monthly Purchase (Thou. IDR)"]]
temp = temp.groupby(["Payment Method", "Location"]).mean().reset_index()

fig = px.bar(temp, y=y, x = "Payment Method", color="Location",
             title = "Payment Method Distribution", barmode="group")
fig.show()

In [None]:
def payment_method(df, y="Count", agg="Mean", hue=None):
    if y == "Count":
        temp = df["Payment Method"].value_counts().reset_index()
        temp.columns = ["Payment Method", "Count"]
        if hue:
            temp = df[["Payment Method", hue]].groupby(["Payment Method", hue]).size().reset_index()
            temp.columns = ["Payment Method", hue, "Count"]
    else:
        if "Monthly" in y:
            y = "Monthly Purchase (Thou. IDR)"
        else:
            y = "CLTV (Predicted Thou. IDR)"
        if hue:
            temp = df[["Payment Method", hue, y]].groupby(["Payment Method", hue]).agg(agg.lower()).reset_index()
        else:
            temp = df[["Payment Method", y]].groupby("Payment Method").agg(agg.lower()).reset_index()

    if hue:
        fig = px.bar(temp, x="Payment Method", y=y, color=hue, title="Payment Method Distribution", barmode="group")
    else:
        fig = px.bar(temp, x="Payment Method", y=y, title="Payment Method Distribution")

    fig.update_layout(margin=dict(l=0, r=0, t=30, b=0), height=400)
    return fig

payment_method(df, y="Monthly Purchase (Predicted Thou. IDR)", agg="Sum")


In [None]:
def plot_distribution(column, nbins=20):
    if df[column].dtype != "object":
        fig = px.histogram(df, x=column, nbins=nbins)
        desc = pd.DataFrame(round(df[column].describe(), 2)).transpose().reset_index(drop=True)
    else:
        temp = df[column].value_counts().reset_index()
        temp.columns = [column, "Count"]
        fig = px.bar(temp, x=column, y="Count")
        desc = pd.DataFrame(df[column].describe()).transpose().reset_index(drop=True)
    
    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0), height=300)

    return fig, desc

plot_distribution("Location")

(Figure({
     'data': [{'alignmentgroup': 'True',
               'hovertemplate': 'Location=%{x}<br>Count=%{y}<extra></extra>',
               'legendgroup': '',
               'marker': {'color': '#636efa', 'pattern': {'shape': ''}},
               'name': '',
               'offsetgroup': '',
               'orientation': 'v',
               'showlegend': False,
               'textposition': 'auto',
               'type': 'bar',
               'x': array(['Jakarta', 'Bandung'], dtype=object),
               'xaxis': 'x',
               'y': array([5031, 2012], dtype=int64),
               'yaxis': 'y'}],
     'layout': {'barmode': 'relative',
                'height': 300,
                'legend': {'tracegroupgap': 0},
                'margin': {'b': 0, 'l': 0, 'r': 0, 't': 0},
                'template': '...',
                'xaxis': {'anchor': 'y', 'domain': [0.0, 1.0], 'title': {'text': 'Location'}},
                'yaxis': {'anchor': 'x', 'domain': [0.0, 1.0], 'title': {'te

In [10]:
products = ["Games Product", "Music Product", "Education Product", "Video Product", "Call Center", "Use MyApp"]

In [13]:
def product(product=products[0], y="Count", hue=None):
    if y == "Count":
        temp = df[product].value_counts().reset_index()
        temp.columns = [product, "Count"]
        if hue:
            temp = df[[product, hue]].groupby([product, hue]).size().reset_index()
            temp.columns = [product, hue, "Count"]

        print(temp)
    
    else:
        if "Monthly" in y:
            y = "Monthly Purchase (Thou. IDR)"
        else:
            y = "CLTV (Predicted Thou. IDR)"

    if hue:
        fig = px.bar(temp, x=product, y=y, color=hue, title="Payment Method Distribution", barmode="group")
    else:
        fig = px.bar(temp, x=product, y=y, title="Payment Method Distribution")

    fig.update_layout(legend=dict(orientation="h"), margin=dict(l=0, r=0, t=30, b=0), height=420)
    return fig

In [15]:
product(products[0], hue="Location")

         Games Product Location  Count
0                   No  Bandung    996
1                   No  Jakarta   2502
2  No internet service  Bandung    435
3  No internet service  Jakarta   1091
4                  Yes  Bandung    581
5                  Yes  Jakarta   1438
