Key metrics for a growing startup:
* MRR (monthly recurring revenue)
* Churn (how many customers leave)
* LTV (life time value)
* CPA -> dunno how to calculate it

In [None]:
import pandas as pd
import numpy as np
import locale # for currency
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# stay offline for plotly
init_notebook_mode(connected=True)

# set region for currency
locale.setlocale( locale.LC_ALL, 'en_US.UTF-8' )

In [None]:
# using sample data from https://github.com/matplotlib/matplotlib/blob/master/lib/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv
file_name = "../data/payments.csv"

In [None]:
# read csv, if that doesn't work, coerce with latin-1 encoding
try:
    df = pd.read_csv(file_name)
except:
    df = pd.read_csv(file_name, encoding='latin-1')

In [None]:
user_columns = ['user', 'client', 'userid', 'clientid', 'user_id', 'client_id', 'client.email', 'customer id']
timestamp_columns = ['timestamp', 'state.openTimestamp', 'datetime', 'created (utc)']

user_id_column = ""
timestamp_column = ""

for column in list(df.columns):
    if column.lower() in user_columns:
        user_id_column = column
    elif column.lower() in timestamp_columns:
        timestamp_column = column

print(f'Identified "{user_id_column}" as user col, "{timestamp_column}" as timestamp col')

In [None]:
# get native timestamp
df['native_timestamp'] = pd.to_datetime(df[timestamp_column], errors='coerce')

# get month
df['month'] = df['native_timestamp'].dt.strftime('%Y-%m')

## MRR

In [None]:
# get just the amount
df_monthly = df.groupby('month').agg('sum')[['Amount']]

# take last 12 months for clean plot
last_12 = df_monthly.iloc[-30:]

In [None]:
# get a description of the mrr
description = last_12.describe()['Amount']

# get the individual stats
months = int(description['count'])
mean = locale.currency( description['mean'], grouping=True )
min_month = last_12.Amount.idxmin()
max_month = last_12.Amount.idxmax()
min_mrr = locale.currency( description['min'], grouping=True )
max_mrr = locale.currency( description['max'], grouping=True )

# print out some nice text
print(f'Over the past {months} months, the average MRR was {mean} \
based on the data inputted. It varied between {min_mrr} in {min_month} \
and {max_mrr} in {max_month}.')

In [None]:
fig = go.Figure(data=go.Scatter(x=last_12.index, y=last_12['Amount'], 
                                mode='lines', marker_color='rgb(227, 0, 6)'))

fig.update_layout(
    title=f'MRR over the past {months} months',
    xaxis_title='Month',
    yaxis_title='Revenue ($)',
    margin=dict(
        pad=10
    ),
    font=dict(
        family="-apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'Helvetica Neue', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';",
        size=12,
        color="#7f7f7f"
    )
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')

fig['layout']['plot_bgcolor'] = 'rgba(0,0,0,0)'
fig.show()

In [None]:
output_json = fig.to_json()

print(output_json)

---
## Churn

**Monthly Churn Rate Example** from [clevertap](https://clevertap.com/blog/churn-rate/)


Users at start of month: 2,000

New users added that month: 400

Users lost at the end of month: 366

Monthly churn rate: 366/2,400 = 15.2%

In [None]:
months = list(df.groupby('month').agg('count').index)

#month = []
#churn = []

# go over every month
#for i in range(len(months)):
    # get the index of each month
#    current_month = months[i]
#    previous_month = months[i-1]
    
    # get users in each month
#    users_prev_month = df[df['month'] == previous_month]['Customer ID'].unique()
#    users_current_month = df[df['month'] == current_month]['Customer ID'].unique()
    
    # find users lost
    # users from last month which are not in current month
#    diff = np.isin(users_prev_month, users_current_month)
#    churned = np.unique(diff, return_counts=True)[1][0]
    
    # calculate churn rate
#    churn_rate = round((churned/len(users_current_month))*100,2)
    
    # append to list
#    month.append(current_month)
#    churn.append(churn)

# create dataframe
#churn_df = pd.DataFrame({'month': month, 'churn': churn})

In [None]:
month = []
churn = []

# go over every month
for i in range(len(months)):
    # get the index of each month
    current_month = months[i]
    previous_month = months[i-1]
    
    # get users in each month
    users_prev_month = df[df['month'] == previous_month]['Customer ID'].unique()
    users_current_month = df[df['month'] == current_month]['Customer ID'].unique()
    
    # find users lost
    # users from last month which are not in current month
    diff = np.isin(users_prev_month, users_current_month)
    churned = np.unique(diff, return_counts=True)[1][0]
    
    # calculate churn rate
    churn_rate = round((churned/len(users_current_month))*100,2)
    
    # append to list
    month.append(current_month)
    churn.append(churn_rate)

In [None]:
churn

In [None]:
churn_df = pd.DataFrame({'month': month, 'churn': churn})

In [None]:
churn_df.head()

In [None]:
fig = go.Figure(data=go.Scatter(x=churn_df['month'], y=churn_df['churn'], 
                                mode='lines', marker_color='rgb(227, 0, 6)'))

In [None]:
fig.show()