In [None]:
# standard snippets I use a lot

# for auto-reloading extensions - helpful if you're writing and testing a package
%reload_ext autoreload
%autoreload 2

# for inline plotting in python using matplotlib
%matplotlib inline
import matplotlib.pyplot as plt

# for easier plots - also makes matplotlib plots look nicer by default
import seaborn as sns

# set up for using plotly offline without an API key - great for interactive plots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

import plotly.graph_objs as go
import plotly.figure_factory as ff
import plotly.tools as tools
init_notebook_mode(connected=True)

# for numerical work
import pandas as pd
import numpy as np

import pymongo

import datetime
import json
import math

from pandas.io.json import json_normalize
from pymongo import MongoClient

import pickle

from sklearn.neighbors import KernelDensity

In [None]:
# load the database credentials from file
with open('../user_aggregation_pipeline//creds.json') as json_data:
    creds = json.load(json_data)

# set up a database with credentials
client = MongoClient(creds['connection_string'])

In [None]:
def density1D(x, name, y=None, xaxis='x', yaxis='y'):
    
    if len(list(set(x))) < 2:
        return {}
    
    # create the kernel density estimate
    kde = KernelDensity(kernel='gaussian', bandwidth=math.log(len(x))).fit(x.reshape(-1, 1))
    
    grid = np.arange(start=np.min(x), stop=np.max(x), step = (np.max(x)-np.min(x))/1000)
    
    density_estimate = np.exp(kde.score_samples(grid.values.reshape(-1, 1))).flatten()
    
    plot = go.Scatter({'x': grid, 'y': density_estimate, 'name': name, 'xaxis': xaxis, 'yaxis': yaxis, 'mode': 'lines'})
    
    return plot

def get_events_by_category(client, eventCategory):
    
    # read the data from the database
    data = list(client['production']['eventCollection'].find({
            'eventCategory': eventCategory,
            'metadata.email': {'$ne': None}}))
    
    # convert to a dataframe and group by day
    data = pd.DataFrame(json_normalize(data))
    data['date'] = data['created'].apply(lambda x: str(x)[0:10])
    
    return data

def facet_grid(df, x, y, col_category, row_category, plot_method, plot_args={}, xaxis_args={}, yaxis_args={}, group=None, shared_x=False):
    
    # initialize the base plot number
    plot_index = 1
    
    # set up columns
    column_labels = sorted(df[col_category].unique())
    cols = len(column_labels)
    col_step = 1/cols # define domain steps for plotting grid
    
    # set up rows
    row_labels = sorted(df[row_category].unique(), reverse=True)
    rows = len(row_labels)
    row_step = 1/rows # define domain steps for plotting grid
    
    # setup the main containers
    traces = []
    layout = {}
    layout['annotations'] = []
    layout['showlegend']=False
    
    for row_i, row in enumerate(row_labels):
        
        for col_i, column in enumerate(column_labels):
            
            min_col_x = df[(df[col_category] == column)][x].min()
            max_col_x = df[(df[col_category] == column)][x].max()
            
            this_df = df[(df[row_category] == row) & (df[col_category] == column)]
            
            this_df = this_df.sort_values(by=x)
            
            xaxis = 'x'+str(plot_index)
            yaxis = 'y'+str(plot_index)
            
            xaxis_layout_name = 'xaxis'+str(plot_index)
            yaxis_layout_name = 'yaxis'+str(plot_index)
            
            trace = plot_method(
                x=this_df[x],
                y=this_df[y],
                name = '{} {}'.format(column.title(), row.title()),
                xaxis=xaxis,
                yaxis=yaxis
            )
            
            for key, value in plot_args.items():
                trace[key]=value
            
            col_domain_start = col_i*col_step + (col_step*0.10)
            col_domain_end = (col_i+1)*col_step - (col_step*0.10)
            row_domain_start = row_i*row_step + (row_step*0.20)
            row_domain_end = (row_i+1)*row_step - (row_step*0.20)
            
            traces.append(trace)
            
            layout[xaxis_layout_name] = {**{'anchor': yaxis, 'domain': [col_domain_start, col_domain_end], 'range': [min_col_x, max_col_x] if shared_x == True else 'auto'}, **xaxis_args}
            
            layout[yaxis_layout_name] = {**{'anchor': xaxis, 'domain': [row_domain_start, row_domain_end]}, **yaxis_args}
            
            # add the row labels
            if col_i == 0:
                layout['annotations'].append({
                    'font': {'size': 12},
                    'showarrow': False,
                    'text': row.upper(),
                    'x': 0,
                    'xanchor': 'left',
                    'xref': 'paper',
                    'y': row_domain_start + (row_domain_end-row_domain_start)/2,
                    'yanchor': 'center',
                    'yref': 'paper',
                    'xshift': -20})
            
            # add the column labels
            if row_i == 0:
                layout['annotations'].append({
                    'font': {'size': 12},
                    'showarrow': False,
                    'text': column.upper(),
                    'x': col_domain_start + (col_domain_end-col_domain_start)/2,
                    'xanchor': 'center',
                    'xref': 'paper',
                    'y': 1,
                    'yanchor': 'bottom',
                    'yref': 'paper'})
            
            plot_index += 1
    
    fig = go.Figure(data=traces, layout=layout)
    
    return fig


def plot_series(client, eventCategory):
    
    # read the data from the database
    data = list(client['production']['eventCollection'].find({
            'eventCategory': eventCategory,
            'metadata.email': {'$ne': None}}))
    
    # convert to a dataframe and group by day
    data = pd.DataFrame(json_normalize(data))
    data['date'] = data['created'].apply(lambda x: str(x)[0:10])
    daily = data.groupby(['date','eventAction'], as_index=False)['created'].count()
    
    
    # create the plot data
    traces = [go.Scatter(dict(x=daily[daily['eventAction'] == action]['date'], 
                        y=daily[daily['eventAction'] == action]['created'],
                        name='{} {} Per Day'.format(eventCategory.title(),action.title()),
                        mode='line')) for action in daily.eventAction.unique()]
    
    # create the plot layout
    layout = go.Layout(dict(title='{} Per Day'.format(eventCategory.title())))
    
    # create the time series figure
    fig = go.Figure(data=traces, layout=layout)
    
    # CREATE THE DISTRIBUTION PLOT
    
    # just get the data
    dist_data = [this['y'] for this in traces]
    dist_names = [this['name'] for this in traces]
    
    dist_fig = ff.create_distplot(hist_data=dist_data, group_labels=list(dist_names), show_hist=False)
    
    
    # Create the User Distribution Plot
    
    users = data.groupby(['metadata.email','eventAction'], as_index=False)['created'].count()
    
    plot_actions = users.groupby('eventAction', as_index=False)['metadata.email'].count()
    plot_actions = plot_actions[plot_actions['metadata.email'] > 10]['eventAction'].values
    
    user_dist_data = [users[users.eventAction == this_action]['created'].values for this_action in plot_actions]
    user_dist_labels = users.eventAction.unique()
    
    user_dist_fig = ff.create_distplot(hist_data=user_dist_data, group_labels=plot_actions, show_hist=False)
    
    # make user plots
    bar_users = data.groupby(['metadata.email','eventAction','eventLabel'], as_index=False)['created'].count()
    bar_grid_fig = facet_grid(df=bar_users, col_category='eventAction', row_category='eventLabel', xaxis_col='created', yaxis_col='metadata.email')
    bar_grid_fig['layout']['title'] = 'Top 5 Users by eventAction and eventLabel'
    
    # make facet grid by fulfilled and currency
    daily_action_label = data.groupby(['date','eventAction','eventLabel'], as_index=False)['created'].count()
    time_grid_fig = facet_grid_line(df=daily_action_label, col_category='eventAction', row_category='eventLabel', xaxis_col='date', yaxis_col='created')
    time_grid_fig['layout']['title'] = 'Time Series of Events'
 
    # plot in jupyter notebook
    iplot(fig)
    iplot(dist_fig)
    iplot(user_dist_fig)
    plot(bar_grid_fig)
    plot(time_grid_fig)

In [None]:
interac = get_events_by_category(client, 'interac')

counts = interac.groupby(['date','eventAction', 'eventLabel'], as_index=False)['_id'].count().rename(index=str, columns={'_id': 'count'})

time_fig = facet_grid(df=counts, x='date', y='count', col_category='eventAction', row_category='eventLabel', plot_method=go.Scatter, plot_args={'mode': 'lines'})
time_fig['layout']['height'] = 1600
time_fig['layout']['width'] = 1600

user_counts = interac.groupby(['metadata.email','eventAction', 'eventLabel'], as_index=False)['_id'].count().rename(index=str, columns={'_id': 'count'})
user_counts_hist = facet_grid(df=counts, y='date', x='count', col_category='eventAction', row_category='eventLabel', plot_method=density1D, shared_x=False)
user_counts_hist['layout']['height'] = 1600
user_counts_hist['layout']['width'] = 1600

user_counts_top = user_counts.sort_values(by=['eventAction','eventLabel','count'], ascending=[True, True, False]).groupby(['eventAction','eventLabel']).head()

user_fig = facet_grid(df=user_counts_top, y='metadata.email', x='count', col_category='eventAction', row_category='eventLabel', plot_method=go.Bar, plot_args={'orientation': 'h'}, yaxis_args={'showticklabels': False})
user_fig['layout']['height'] = 1600
user_fig['layout']['width'] = 1600

iplot(user_counts_hist)
iplot(time_fig)
iplot(user_fig)

In [None]:
plot_series(client, 'buy')

In [None]:
plot_series(client, 'interac')

In [None]:
plot_series(client, 'trade')

In [None]:
plot_series(client, 'user-flow')

In [None]:
plot_series(client, 'user-interaction')

In [None]:
list(client['production']['eventCollection'].find().sort([('created',-1)]).limit(1))[0]['created']

In [None]:
datetime.datetime.today()

In [None]:
datetime.datetime.now()

In [None]:
from pymongo import MongoClient
from pandas.io.json import json_normalize

# load the database credentials from file
with open('../creds/creds.json') as json_data:
    creds = json.load(json_data)

def get_user_events(user_email):
    
    # set up a database with credentials
    client = MongoClient(creds['connection_string'])

    # get the full history of interac requests
    events = list(client['production']['eventCollection'].find({
            'metadata.email': user_email}))
    
    df = pd.DataFrame(json_normalize(events))
    df = df.sort_values(by='created', ascending=False)
    
    return df

def display_all_user_events(user_email):

    df = get_user_events(user_email)

    with pd.option_context('display.max_rows', 1000):

        display(df[['created','eventCategory','eventAction','eventLabel','metadata.amount','value','metadata.ip']])

display_all_user_events('david9074@gmail.com')

In [None]:
ev = get_user_events('bioviral@gmail.com')

def plot_user_events(ev):
    ev['date'] = ev.created.apply(lambda x: str(x)[0:10])

    gb = ev.groupby(['date','eventCategory','eventAction','eventLabel'], as_index=False)['_id'].count().rename(index=str, columns={'_id': 'count'})

    for category in gb.eventCategory.unique():
        for action in gb.eventAction.unique():

            this_df = gb[(gb.eventCategory == category) & (gb.eventAction == action)]

            if this_df.shape[0] > 0:

                traces = [go.Scatter(x=this_df[this_df.eventLabel == label]['date'], 
                                    y=this_df[this_df.eventLabel == label]['count'], 
                                    name=label) for label in this_df.eventLabel.unique()]

                all_this = this_df.groupby('date', as_index=False)['count'].sum().sort_values(by='date')

                traces.append(go.Scatter(x=all_this['date'], 
                                    y=all_this['count'], 
                                    name='all'))

                layout = go.Layout({'title': (category+" "+action).title(), 'xaxis': {'range': [gb.date.min(),gb.date.max()]}})

                iplot(go.Figure(data=traces, layout=layout))
                
plot_user_events(ev)

In [None]:
event_counts = ev.groupby(['eventCategory','eventAction','eventLabel'], as_index=False)['_id'].count().rename(index=str, columns={'_id': 'count'}).sort_values(by=['eventCategory','eventAction','count'])
event_counts['event_type'] = event_counts['eventCategory']+"_"+event_counts['eventAction']


event_counts_fig = ff.create_facet_grid(df=event_counts, 
                                        y='eventLabel', 
                                        x='count', 
                                        facet_row='event_type', 
                                        trace_type='bar', 
                                        width=600, 
                                        height=1600, 
                                        orientation='h')

# event_counts_fig = facet_grid(df=event_counts, y='eventLabel', x='count', col_category='eventCategory', row_category='eventAction', plot_method=go.Bar, plot_args={'orientation': 'h'}, yaxis_args={'showticklabels': False})
# event_counts_fig['layout']['height'] = 1600
# event_counts_fig['layout']['width'] = 1600

iplot(event_counts_fig)

In [None]:
event_counts = ev.groupby(['date','eventCategory','eventAction','eventLabel'], as_index=False)['_id'].count()

event_counts_fig = ff.create_facet_grid(df=ev, 
                                        y='eventLabel', 
                                        x='count', 
                                        facet_row='event_type', 
                                        trace_type='bar', 
                                        width=600, 
                                        height=1600, 
                                        orientation='h')

# event_counts_fig = facet_grid(df=event_counts, y='eventLabel', x='count', col_category='eventCategory', row_category='eventAction', plot_method=go.Bar, plot_args={'orientation': 'h'}, yaxis_args={'showticklabels': False})
# event_counts_fig['layout']['height'] = 1600
# event_counts_fig['layout']['width'] = 1600

iplot(event_counts_fig)