In [1]:
import os 
import sys

In [2]:
os.getcwd()

'/home/owner/Documents/Github_projects/Tinder_dash_demo/Notebooks'

In [3]:
sys.path.append('/home/owner/Documents/Github_projects/Tinder_dash_demo/Scripts/')
sys.path.append('/home/owner/Documents/Github_projects/Tinder_dash_demo/')

In [4]:
import dash_table

In [5]:
import dash
import pandas as pd
import numpy as np
import json
from Scripts import message_df_fx as msg_fx


In [6]:
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)


In [7]:
data_path = '/home/owner/Documents/Github_projects/Tinder_dash_demo/Data/data.json'

In [8]:
with open(data_path, "rb") as inp: 
    data = json.load(inp)

In [9]:
list_of_dfs = [msg_fx.get_msg_df(msg_dict) for msg_dict in data["Messages"]]
all_msg_df = pd.concat(list_of_dfs, axis=0)


In [10]:
all_msg_df['date'] = all_msg_df['sent_date'].dt.date

In [11]:
all_msg_df['date2'] = all_msg_df['sent_date'].apply(msg_fx.flatten_date)

In [12]:
flag_col = ['explicit_word_in_msg', 'funny_word_in_msg', 'question_mark_in_msg', 'question_word_in_msg',
            "exclamation_mark_in_msg"]

In [13]:
    dt_gb = all_msg_df.groupby('date')
    n_msg_over_time = dt_gb.apply(len)

In [14]:
n_msg_over_time.head()

date
2014-03-26    4
2014-03-27    3
2014-04-02    1
2014-04-07    5
2014-06-07    1
dtype: int64

In [15]:
def create_word_per_message_graph():
    dt_gb = all_msg_df.groupby('date')
    n_msg_over_time = dt_gb.apply(len)
    total_trace = go.Scatter(
        x=n_msg_over_time.index, 
        y=n_msg_over_time.values,
        name="Total Number of Messages"
        )
    print(n_msg_over_time.shape)

    def create_plots(flag_over_time, flag_name):
        trace = go.Scatter(
            x=flag_over_time.index,
            y=flag_over_time,
            name= flag_name
        )
        return(trace)
    traces = [create_plots(dt_gb[flag].sum(), flag) for flag in flag_col]
    traces.insert(0,total_trace)

    layout = dict(title = 'Number of Message Types over Time',
                  xaxis = dict(title='Date'),
                  yaxis = dict(title='Number of Messages')
                  )
    fig = go.Figure(data= traces, layout=layout)
    return(fig)


In [16]:
fig1 = create_word_per_message_graph()

(214,)


In [17]:
iplot(fig1)

## Dash example of a table 

In [18]:
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/solar.csv')

app = dash.Dash(__name__)

app.layout = dash_table.DataTable(
    id='table',
    columns=[{"name": i, "id": i} for i in df.columns],
    data=df.to_dict("rows"),
)


In [19]:
df.head()

Unnamed: 0,State,Number of Solar Plants,Installed Capacity (MW),Average MW Per Plant,Generation (GWh)
0,California,289,4395,15.3,10826
1,Arizona,48,1078,22.5,2550
2,Nevada,11,238,21.6,557
3,New Mexico,33,261,7.9,590
4,Colorado,20,118,5.9,235


## Apply table knowledge

In [20]:
from Scripts import usage_analysis_fx as usage


In [43]:
with open(data_path, "rb") as inp: 
    data = json.load(inp)

In [44]:
usage_df = pd.DataFrame(data["Usage"])
usage_df['total_swipes'] = usage_df['swipes_likes'] + usage_df['swipes_passes']

In [40]:
usage_df.dtypes

app_opens            int64
matches              int64
messages_received    int64
messages_sent        int64
swipes_likes         int64
swipes_passes        int64
dtype: object

In [22]:
max_usage = usage.gather_max_usage(usage_df)

In [23]:
max_usage

Unnamed: 0,metric,date,max of index
0,app_opens,2017-03-06,348
1,matches,2015-09-25,10
2,messages_received,2015-09-25,26
3,messages_sent,2015-09-25,36
4,swipes_likes,2015-09-26,153
5,swipes_passes,2016-06-12,87


In [24]:
trace_tbl = go.Table(header=dict(values=max_usage.columns), 
                     cells=dict(values=max_usage.values.T),
                     name="Max Usage Metrics")
data = [trace_tbl]

In [25]:
iplot(data)

In [26]:
derived_metrics = usage.gather_usage_stats(usage_df)


In [27]:
derived_metrics = pd.Series(derived_metrics)

In [28]:
derived_metrics

like_to_pass_ratio           3.73
n_avg_msg_rec_per_match      3.13
n_avg_msg_sent_per_match     3.87
swipes/app_open              4.21
swipes_per_act_day          24.28
swipes_per_tot_cal_day       11.0
total_swipes                16196
dtype: object

In [29]:
trace_tbl = go.Table(header=dict(values=derived_metrics.index), 
                     cells=dict(values=derived_metrics.values),
                     name="Derived Usage Metrics")
data = [trace_tbl]
layout = dict(width=1300, height=300)
fig = dict(data=data, layout=layout)

In [30]:
iplot(fig)

In [31]:
usage_df2 = usage_df.copy()
usage_df2.index = pd.to_datetime(usage_df.index, format="%Y-%m-%d")
usage_df2.index = pd.Series(usage_df2.index).apply(msg_fx.flatten_date)

# Create aggregated time series data and plots
funcs = {"mean": np.mean, "max": np.max}  # , "std":np.std}
for usage_col in usage_df2.columns:
    gb_dt = pd.Series(usage_df2[usage_col]).groupby(usage_df2.index)
    agg_series = {usage_col + " " + func_names: gb_dt.apply(func) for func_names, func in funcs.items()}

In [32]:
traces = []
for usage_col in usage_df2.columns:
    gb_dt = pd.Series(usage_df2[usage_col]).groupby(usage_df2.index)
    temp_traces = []
    for func_names, func in funcs.items():
        temp_series = gb_dt.apply(func)
        temp_name = usage_col + " " + func_names
        temp_traces.append(create_plots(temp_series, temp_name))
    traces.append(temp_traces)

NameError: name 'create_plots' is not defined

In [None]:
len(traces)

In [33]:
for trace2 in traces:
    iplot(trace2)

In [35]:
def create_plots(flag_over_time, flag_name):
    trace = go.Scatter(
            x=flag_over_time.index,
            y=flag_over_time,
            name= flag_name
        )
    return(trace)


## Usage Plots

In [36]:
usage_df.head()

Unnamed: 0,app_opens,matches,messages_received,messages_sent,swipes_likes,swipes_passes
2014-11-22,3,0,0,0,32,0
2014-11-23,1,0,0,0,39,0
2014-11-24,1,0,0,0,3,0
2014-11-25,4,2,0,0,46,0
2014-11-26,19,1,0,0,67,4


In [87]:
from calendar import monthrange

In [60]:
usage_df.index = pd.to_datetime(usage_df.index)

In [61]:
usage_gb = usage_df.groupby(pd.Grouper(freq='M'))

In [63]:
sum_per_month = usage_gb.sum()

In [64]:
max_per_month = usage_gb.max()

In [86]:
max_per_month.head()

Unnamed: 0,app_opens,matches,messages_received,messages_sent,swipes_likes,swipes_passes,total_swipes
2014-11-30,19.0,2.0,0.0,0.0,67.0,4.0,71.0
2014-12-31,25.0,4.0,0.0,0.0,96.0,3.0,98.0
2015-01-31,6.0,2.0,0.0,0.0,68.0,2.0,69.0
2015-02-28,5.0,1.0,0.0,0.0,85.0,1.0,85.0
2015-03-31,5.0,1.0,0.0,0.0,67.0,4.0,67.0


In [75]:
days_0 = usage_gb.get_group(name='2015-03-31')

In [105]:
days_0

Unnamed: 0,app_opens,matches,messages_received,messages_sent,swipes_likes,swipes_passes,total_swipes
2015-03-05,1,1,0,0,0,0,0
2015-03-06,2,0,0,0,67,0,67
2015-03-15,2,0,0,0,37,0,37
2015-03-23,1,0,0,0,13,0,13
2015-03-24,1,0,0,0,55,4,59
2015-03-25,4,0,0,0,0,0,0
2015-03-28,3,0,0,0,47,1,48
2015-03-29,5,0,0,0,0,0,0


In [149]:
# Not working 3/30
def calc_sum_0(month_gb):
    tmp = month_gb.apply(lambda col: np.where(col == 0, 1, 0))
#     print(tmp)
    sum_tmp = tmp.sum()
    return(sum_tmp)

In [None]:
# Not working 3/30
tmp2 = usage_gb.apply(calc_sum_0)

**Working workflow **

In [151]:
calc_sum_0 = lambda month_gb: sum(month_gb.apply(lambda col: np.where(col == 0, 1, 0)))

In [152]:
days_0.apply(calc_sum_0)

app_opens            0
matches              7
messages_received    8
messages_sent        8
swipes_likes         3
swipes_passes        6
total_swipes         3
dtype: int64

In [153]:
sums  = []
for k,v in usage_gb:
    print(k)
    sums.append(v.apply(calc_sum_0))

2014-11-30 00:00:00
2014-12-31 00:00:00
2015-01-31 00:00:00
2015-02-28 00:00:00
2015-03-31 00:00:00
2015-04-30 00:00:00
2015-05-31 00:00:00
2015-06-30 00:00:00
2015-07-31 00:00:00
2015-08-31 00:00:00
2015-09-30 00:00:00
2015-10-31 00:00:00
2015-11-30 00:00:00
2015-12-31 00:00:00
2016-01-31 00:00:00
2016-02-29 00:00:00
2016-03-31 00:00:00
2016-04-30 00:00:00
2016-05-31 00:00:00
2016-06-30 00:00:00
2016-07-31 00:00:00
2016-08-31 00:00:00
2016-09-30 00:00:00
2016-10-31 00:00:00
2016-11-30 00:00:00
2016-12-31 00:00:00
2017-01-31 00:00:00
2017-02-28 00:00:00
2017-03-31 00:00:00
2017-04-30 00:00:00
2017-05-31 00:00:00
2017-06-30 00:00:00
2017-07-31 00:00:00
2017-08-31 00:00:00
2017-09-30 00:00:00
2017-10-31 00:00:00
2017-11-30 00:00:00
2017-12-31 00:00:00
2018-01-31 00:00:00
2018-02-28 00:00:00
2018-03-31 00:00:00
2018-04-30 00:00:00
2018-05-31 00:00:00
2018-06-30 00:00:00
2018-07-31 00:00:00
2018-08-31 00:00:00
2018-09-30 00:00:00
2018-10-31 00:00:00
2018-11-30 00:00:00
2018-12-31 00:00:00


In [154]:
totals = pd.concat(sums,axis=1)

In [155]:
totals= totals.transpose()

In [156]:
totals.index = usage_gb.groups.keys()

In [157]:
totals.shape

(50, 7)

In [158]:
totals.head()

Unnamed: 0,app_opens,matches,messages_received,messages_sent,swipes_likes,swipes_passes,total_swipes
2014-11-30,0.0,6.0,9.0,9.0,0.0,7.0,0.0
2014-12-31,0.0,12.0,16.0,16.0,3.0,10.0,3.0
2015-01-31,0.0,15.0,21.0,21.0,12.0,18.0,12.0
2015-02-28,0.0,8.0,10.0,10.0,5.0,9.0,5.0
2015-03-31,0.0,7.0,8.0,8.0,3.0,6.0,3.0


In [162]:
# Not working 3/30 -- wish it were 
usage_gb[list(usage_df.columns)].apply(calc_sum_0)

TypeError: unsupported operand type(s) for +: 'int' and 'str'

In [None]:
monthrange(name1.month)

In [90]:
usage_gb.groups

{Timestamp('2014-11-30 00:00:00', freq='M'): 9,
 Timestamp('2014-12-31 00:00:00', freq='M'): 25,
 Timestamp('2015-01-31 00:00:00', freq='M'): 46,
 Timestamp('2015-02-28 00:00:00', freq='M'): 56,
 Timestamp('2015-03-31 00:00:00', freq='M'): 64,
 Timestamp('2015-04-30 00:00:00', freq='M'): 67,
 Timestamp('2015-05-31 00:00:00', freq='M'): 73,
 Timestamp('2015-06-30 00:00:00', freq='M'): 79,
 Timestamp('2015-07-31 00:00:00', freq='M'): 87,
 Timestamp('2015-08-31 00:00:00', freq='M'): 95,
 Timestamp('2015-09-30 00:00:00', freq='M'): 115,
 Timestamp('2015-10-31 00:00:00', freq='M'): 139,
 Timestamp('2015-11-30 00:00:00', freq='M'): 150,
 Timestamp('2015-12-31 00:00:00', freq='M'): 163,
 Timestamp('2016-01-31 00:00:00', freq='M'): 181,
 Timestamp('2016-02-29 00:00:00', freq='M'): 204,
 Timestamp('2016-03-31 00:00:00', freq='M'): 228,
 Timestamp('2016-04-30 00:00:00', freq='M'): 241,
 Timestamp('2016-05-31 00:00:00', freq='M'): 243,
 Timestamp('2016-06-30 00:00:00', freq='M'): 263,
 Timestamp(

In [37]:
import dash_core_components as dcc

In [45]:
usage_df.columns

Index(['app_opens', 'matches', 'messages_received', 'messages_sent',
       'swipes_likes', 'swipes_passes', 'total_swipes'],
      dtype='object')

In [48]:
dict_metrics_checklist = {
    'label': 'Application Opened', 'value': 'app_opens', 
    'label': 'Matches', 'value': 'matches',
    'label': 'Messages Recieved', 'value': 'messages_received', 
    'label': 'Messages Sent', 'value': 'messages_sent',
    'label': 'Swipes Right', 'value': 'swipes_likes',
    'label': 'Swipes Left', 'value': 'swipes_passes',
    'label': 'Total Swipes', 'value': 'total_swipes'
}

In [50]:
dcc.Checklist(options = dict_metrics_checklist, 
              values=['app_opens'])

Checklist(options={'label': 'Total Swipes', 'value': 'total_swipes'}, values=['app_opens'])

In [None]:
dict_analysis_types = {
    'label': 'Days 0', 'value': ''
}