## what P/M fit looks like 
from https://andrewchen.co/zero-to-productmarket-fit-presentation/

Consumer products:
    * Usage 3 out of every 7 days
    * Organic growth of 100s of signups/day
    * 30% users are active the day after signup
    * Clear path to 100,000 user
    
SaaS products:
    * 5% conversion rate from free-to-paid
    * 3X CPA to LTV ratio
    * <2% monthly churn rate
    * Clear path to $100k MR

In [None]:
import pandas as pd
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

# stay offline for plotly
init_notebook_mode(connected=True)

In [None]:
# using sample data from https://github.com/matplotlib/matplotlib/blob/master/lib/matplotlib/mpl-data/sample_data/percent_bachelors_degrees_women_usa.csv
file_name = "../data/payments.csv"

In [None]:
# read csv, if that doesn't work, coerce with latin-1 encoding
try:
    df = pd.read_csv(file_name)
except:
    df = pd.read_csv(file_name, encoding='latin-1')

In [None]:
user_columns = ['user', 'client', 'userid', 'clientid', 'user_id', 'client_id', 'client.email', 'customer id']
timestamp_columns = ['timestamp', 'state.openTimestamp', 'datetime', 'created (utc)']

user_id_column = ""
timestamp_column = ""

for column in list(df.columns):
    if column.lower() in user_columns:
        user_id_column = column
    elif column.lower() in timestamp_columns:
        timestamp_column = column

print(f'Identified "{user_id_column}" as user col, "{timestamp_column}" as timestamp col')

In [None]:
# get native timestamp
df['native_timestamp'] = pd.to_datetime(df[timestamp_column], errors='coerce')

# get month
df['month'] = df['native_timestamp'].dt.strftime('%Y-%m')

# get week
df['year_week'] = df['native_timestamp'].dt.strftime('%Y-%U')

# get year day month
df['day'] = df['native_timestamp'].dt.strftime('%Y-%m-%d')

In [None]:
df.describe()

### Usage 3 out of every 7 days

In [None]:
# group by customer id and year_week
weekly_usage = df.groupby(['Customer ID', 'year_week']).agg('count')[['id']]

In [None]:
# find the average weekly usage
avg_weekly_usage = weekly_usage.groupby('year_week').agg('mean')

In [None]:
fig = go.Figure(data=go.Scatter(x=avg_weekly_usage.index, y=avg_weekly_usage['id'], 
                                mode='lines', marker_color='rgb(227, 0, 6)'))

fig.update_layout(
    title=f'Average weekly usage per user',
    xaxis_title='Month',
    yaxis_title='Average weekly usage',
    margin=dict(
        pad=10
    ),
    font=dict(
        family="-apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'Helvetica Neue', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';",
        size=12,
        color="#7f7f7f"
    )
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')

fig['layout']['plot_bgcolor'] = 'rgba(0,0,0,0)'
fig.show()

### Organic growth of 100s of signups/day

In [None]:
retention = df

In [None]:
# set the index to customer ids
retention.set_index('Customer ID', inplace=True)

# find the first time each customer ordered
retention['signup_day'] = retention.groupby(level=0)['day'].min()
retention.reset_index(inplace=True)

In [None]:
daily_signups = retention.groupby('signup_day').agg('count')[['id']]

In [None]:
fig = go.Figure(data=go.Scatter(x=daily_signups.index, y=daily_signups['id'], 
                                mode='lines', marker_color='rgb(227, 0, 6)'))

fig.update_layout(
    title=f'Daily new users',
    xaxis_title='Month',
    yaxis_title='Average weekly usage',
    margin=dict(
        pad=10
    ),
    font=dict(
        family="-apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', 'Helvetica Neue', Helvetica, Arial, sans-serif, 'Apple Color Emoji', 'Segoe UI Emoji', 'Segoe UI Symbol';",
        size=12,
        color="#7f7f7f"
    )
)

fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='#EEEEEE')

fig['layout']['plot_bgcolor'] = 'rgba(0,0,0,0)'
fig.show()

In [None]:
daily_signups

In [None]:
daily_signups['week'] = pd.to_datetime(daily_signups.index)

In [None]:
# get year day month
df_clients['day'] = df_clients['native_datetime'].dt.strftime('%Y-%m-%d')

In [None]:
daily_signups[daily_signups['id'] >= 100]

### 30% users are active the day after signup