In [None]:
!pip freeze | grep retentioneering

In [None]:
### -e git+https://github.com/retentioneering/retentioneering-tools@be0b2f76cd09aaba6909392d0248c111099031a4#egg=retentioneering

In [None]:
import re
import json
import requests
import stripe
import pandas as pd
from retentioneering import init_config

## Config

In [None]:
START_DATE = '2019-06-19'
END_DATE = '2019-07-19'
STRIPE_API_KEY = "sk_live_<>"
LIMIT = None
POSITIVE_EVENTS = [
    'invoice.payment_succeeded', 'charge.succeeded', 'payment_intent.succeeded',
]
NEGATIVE_EVENTS = [
    'churn'
]
COHORT_INITIATION_EVENTS = [
    'customer.created', 
    'payment_method.attached', 
    'payment_intent.created', 
    'customer.subscription.created',
]
CUSTOMER_IDS = None
EXCLUDE_EVENT_TYPES = [
    # Не связаны с customer
    'plan.created',
    'product.created',
    'reporting.report_type.updated',
    # Связаные с платежным методом не привязанным к customer
    'balance.available',
    'source.chargeable'  # https://stripe.com/docs/sources/best-practices#source-creation
]
INCLUDE_EVENTS_CONDITIONS = [
    {'livemode': True} #  live mode only
]

## SETUP

In [None]:
stripe.api_key = STRIPE_API_KEY

In [None]:
import datetime

def get_time_delta_timestamp(days):
    date = (datetime.datetime.now() + datetime.timedelta(days=days))
    print(date)
    return date.strftime('%s')

print("Today")
today_timestamp = get_time_delta_timestamp(0)
print('Last month start date')
last_30_days_timestamp = get_time_delta_timestamp(-30)
print('Last week start date')
last_7_days_timestamp = get_time_delta_timestamp(-7)

In [None]:
if not START_DATE:
    START_DATE_TIMESTAMP = last_30_days_timestamp
else:
    START_DATE_TIMESTAMP = datetime.datetime.strptime(START_DATE, '%Y-%m-%d').strftime('%s')

if not END_DATE:
    END_DATE_TIMESTAMP = today_timestamp
else:
    END_DATE_TIMESTAMP = datetime.datetime.strptime(END_DATE, '%Y-%m-%d').strftime('%s')

## UTILS

In [None]:
def get_customer_id_from_object(value):
    string = json.dumps(value)
    try:
        customer_id = re.search('"(cus_[^"]+)",', string, re.IGNORECASE).group(1)
    except:
        return None
    return customer_id

## Get customers create last 30 days

In [None]:
customers = stripe.Customer.list(
    limit=100,
    created={'gte': START_DATE_TIMESTAMP, 'lte': END_DATE_TIMESTAMP}
)
cohort_customers = [customer for customer in customers.auto_paging_iter()]

In [None]:
if not CUSTOMER_IDS:
    CUSTOMER_IDS = set([customer['id'] for customer in cohort_customers])

In [None]:
print(f"Set CUSTOMER_IDS ({len(CUSTOMER_IDS)} items)", CUSTOMER_IDS)

## Get stripe events

#### All event types with description https://stripe.com/docs/api/events/types

In [None]:
def query(
    customer_ids=CUSTOMER_IDS, initiation_events=COHORT_INITIATION_EVENTS, 
    exclude_event_types=EXCLUDE_EVENT_TYPES,
    include_event_conditions=INCLUDE_EVENTS_CONDITIONS,
    limit=LIMIT
):
    if not customer_ids:
        customer_ids = set()
    counter = 0
    events = stripe.Event.list(
        limit=100,
        created={'gte': START_DATE_TIMESTAMP, 'lte': END_DATE_TIMESTAMP}
    )
    result = []
    for event in events.auto_paging_iter():
        if limit and counter >= limit:
            print("Limit reached")
            break
        try:
            event_id = event['id']
            event_type = event['type']
            
            if event_type in exclude_event_types:
                print(f"Skip event {event_type} {event_id}")
                continue
            skip = False
            for condition in include_event_conditions:
                if skip:
                    break
                for key, value in condition.items():
                    if not event.get(key) == value:
                        print(f"Skip event {event_type} {event_id} not in condition", f"{key} == {value}")
                        skip = True
                        break
            if skip:
                continue
            
            data_object = event['data']['object']
            customer_id = data_object.get('customer')
            if not customer_id:
                if data_object['object'] == 'customer':
                    customer_id = data_object['id']
                else:
                    customer_id = get_customer_id_from_object(data_object)
            
            if event_type in initiation_events:
                customer_ids.add(customer_id)
            if customer_id not in customer_ids:
                print(f"Skip {event_type} {event_id} customer {customer_id} not in the list")
                continue
            result.append(
                {
                    'event_name': event['type'], 'event_timestamp': event['created'], 
                    'user_pseudo_id': customer_id,
                    'livemod': event['livemode'], 'api_version': event['api_version']
                }
            )
        except KeyError as e:
            print(f"{repr(e)}", event)
        else:
            counter += 1
    print(f"Total customers {len(customer_ids)}")
    return pd.DataFrame(result)

In [None]:
data_full = query()

In [None]:
len(data_full)

In [None]:
data_full

## Get stripe logs

In [None]:
COUNT = '?count=1000'
SUCCESS = '&success=false'
DASHBOARD = '&dashboard=false' # made by API
OFFSET = '' #'offset=100'
if START_DATE_TIMESTAMP and END_DATE_TIMESTAMP:
    CREATED = f'&created[gte]={START_DATE_TIMESTAMP}&created[lte]={END_DATE_TIMESTAMP}'
else:
    CREATED = ''

REST_ARGS = '&direction[]=connect_out&direction[]=self'
LOGS_URL = f'https://dashboard.stripe.com/ajax/logs{COUNT}{SUCCESS}{OFFSET}{DASHBOARD}{CREATED}{REST_ARGS}'

CSRF_TOKEN = '<>'
STRIPE_ACCOUNT = 'acct_<>'
STRIPE_LIVEMODE = 'true'
STRIPE_VERSION = '2019-05-16'

In [None]:
COOKIE = '<>'

In [None]:
def query_logs(
    url=LOGS_URL,
    csrf_token=CSRF_TOKEN,
    cookie=COOKIE,
    stripe_account=STRIPE_ACCOUNT,
    stripe_livemode=STRIPE_LIVEMODE,
    stripe_version=STRIPE_VERSION,
    customer_ids=CUSTOMER_IDS
):
    logs = requests.get(
        url,
        headers={
            'x-stripe-csrf-token': csrf_token,
            'cookie': cookie,
            'stripe-account': stripe_account,
            'stripe-livemode': stripe_livemode,
            'stripe-version': stripe_version
        }
    ).json()
    print("Logs", len(logs['data']))
    print("Logs total count", logs['total_count'])
    print("Logs has more", logs['has_more'])
    
    result = []
    for log in logs['data']:
        customer_id = log.get('customer')
        request_id = log['id']
        if not customer_id:
            customer_id = get_customer_id_from_object(log)
        
        event_name = f"{log['response']['status']} {log['request']['method']} {log['request']['url']}"
        
        if not customer_id:
            print(f"Skip log {event_name} {request_id} no customer id")
            continue
        if customer_id not in customer_ids:
            print(f"Skip {event_name} {request_id} customer {customer_id} not in the list")
            continue
        
        result.append(
            {
                'event_name': event_name, 
                'event_timestamp': log['created'], 
                'user_pseudo_id': customer_id,
            }
        )
    return pd.DataFrame(result)

In [None]:
logs_data_full = query_logs()

In [None]:
len(logs_data_full)

In [None]:
logs_data_full

## Concat data

In [None]:
data_full = pd.concat([data_full, logs_data_full], sort=True)

In [None]:
print("Full data")
data_full

## 1. Work with single trajectories

### User with longest billing history this month:

In [None]:
# if you want to select only your trajectory
# put here your user_id
user_id = None
if not user_id:
    user_id = data_full.user_pseudo_id.value_counts().idxmax()
print("Customer ID", user_id)

# this will show your events
data = data_full[data_full.user_pseudo_id == user_id]
print("All customer's events")
data

In [None]:
#cus_FSjyDDxGFjxmNa lots of failed charges # cus_FRbJeQ9BDebfl8 with successed

In [None]:
print("Positive", POSITIVE_EVENTS[0])
print("Negative", NEGATIVE_EVENTS[0])

In [None]:
init_config(
    experiments_folder='experiments', 
     
    index_col='user_pseudo_id',
    event_col='event_name', 
    event_time_col='event_timestamp', 
    
    positive_target_event=POSITIVE_EVENTS[0], 
    negative_target_event=NEGATIVE_EVENTS[0], 
    
    pos_target_definition={},
    neg_target_definition={},
)

In [None]:
data = data.retention.prepare()

In [None]:
desc_table = data.retention.get_step_matrix(max_steps=30)

In [None]:
data.retention.plot_graph(thresh=0.05, width=800, height=800)

## 2. Basic instruments to work with trajectories set

In [None]:
init_config(
    experiments_folder='experiments', # folder for saving experiment results: graph visualization, heatmaps and etc.
    index_col='user_pseudo_id', # column by which we split users / sessions / whatever
    event_col='event_name', # column that describes event
    event_time_col='event_timestamp', # column that describes timestamp of event
    
    #let's pick positive event as sending the score (only available after game completed)
    positive_target_event=POSITIVE_EVENTS[0], # name of positive target event
    negative_target_event=NEGATIVE_EVENTS[0], # name of negative target event
    
    pos_target_definition={ # how to define positive event, e.g. empty means that add passed for whom was not 'lost'
        #'event_list': POSITIVE_EVENTS
    },
    neg_target_definition={ # how to define negative event
    },
)

In [None]:
user_set = ['cus_FSjyDDxGFjxmNa', 'cus_FRbJeQ9BDebfl8']
data = data_full[data_full.apply(lambda x: x['user_pseudo_id'] in user_set, axis=1)]

In [None]:
data = data.retention.prepare()

In [None]:
desc_table = data.retention.get_step_matrix(max_steps=30)

In [None]:
# create group filter based on target events
diff_filter = data.retention.create_filter()

# calculate difference table between two groups
diff_table = data.retention.get_step_matrix_difference(diff_filter, max_steps=30)

In [None]:
data.retention.plot_graph(thresh=0.05, width=800, height=800)

## 3. Inspect all trajectories for larger group of users¶

In [None]:
init_config(
    experiments_folder='experiments', # folder for saving experiment results: graph visualization, heatmaps and etc.
    index_col='user_pseudo_id', # column by which we split users / sessions / whatever
    event_col='event_name', # column that describes event
    event_time_col='event_timestamp', # column that describes timestamp of event
    
    #let's pick positive event as sending the score (only available after game completed)
    positive_target_event=POSITIVE_EVENTS[0], # name of positive target event
    negative_target_event=NEGATIVE_EVENTS[0], # name of negative target event
    
    pos_target_definition={ # how to define positive event, e.g. empty means that add passed for whom was not 'lost'
        
    },
    neg_target_definition={ # how to define negative event
        #emply means all that are not positive will be "lost"
    },
)

In [None]:
data = data_full # [data_full['event_timestamp'] < '2019-07-17 11:00:00']

In [None]:
data = data.retention.prepare()

In [None]:
# create group filter based on target events
diff_filter = data.retention.create_filter()

# calculate difference table between two groups
diff_table = data.retention.get_step_matrix_difference(diff_filter, max_steps=30)

In [None]:
data.retention.learn_tsne(perplexity=3, plot_type='targets', refit=True);

#### churn копиться в группах пользовтелей с определенными событиями в платежной истории

In [None]:
data.retention.get_clusters(n_clusters=6, plot_type='cluster_tsne', refit_cluster=True);

In [None]:
data.retention.get_clusters(plot_type='cluster_pie',plot_cnt=6);

#### 6 классов пользователей 3 из которых платили

#### 10% после invoice payment failed отваливаются