In [None]:
#Import libraries
%matplotlib inline
import datetime as dt
import os
import sys
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import mysql.connector

In [None]:
# Create database engine
dbname = os.environ["DBNAME"]
uname = os.environ["UNAME"]
passwd = os.environ["PASSWD"]
portnum = os.environ["PORTNUM"]
engine = create_engine('mysql+mysqlconnector://mydb_user:'+uname+'@localhost:'+portnum+'/'+dbname, echo=False)
print engine.url
# Connect to database
conn = mysql.connector.connect(
         user=uname,
         password=passwd,
         host='localhost',
         database=dbname)

In [None]:
dfClientNote = pickle.load(open( "client_note_expanded.p", "rb" ))

In [None]:
dfClientSubscription = pickle.load(open( "client_subscription_expanded.p", "rb" ))

In [None]:
dfClientNote.head()

In [None]:
dfClientSubscription.head()

# Merge results

In [None]:
def MergeNoteSubscription(client_id):
    if client_id != None:
        dfClientNoteTemp = dfClientNote[dfClientNote['client_id']==client_id]
        dfClientSubscriptionTemp = dfClientSubscription[dfClientSubscription['client_id']==client_id]
    else:
        dfClientNoteTemp = dfClientNote
        dfClientSubscriptionTemp = dfClientSubscription
    #dfClientSubscriptionTemp = dfClientSubscriptionTemp.reset_index()
    #dfClientSubscriptionTemp = dfClientSubscriptionTemp.reset_index()
    #print dfClientSubscriptionTemp
    dfMerge = pd.merge(dfClientSubscriptionTemp,dfClientNoteTemp,left_on='client_id',right_on='client_id')
    print len(dfMerge)
    dfMerge = dfMerge[dfMerge['date']>= dfMerge['start_date']]
    dfMerge = dfMerge[dfMerge['date']<= dfMerge['end_date']]
    print len(dfMerge)
    dfMerge
    return dfMerge

In [None]:
client_id=None #210
dfClient = MergeNoteSubscription(client_id)
dfClient.head()

# Reindex


In [None]:
#Data indexed by tenant, client, subscription and interaction type
dfClientMultiIndex = dfClient.set_index(['tenant_id','client_id','created_date','interaction_type_id'])
dfClientMultiIndex.head()

In [None]:
#Data indexed by tenant, client, yearmonth, and interaction type
dfClientMultiIndex = dfClient.set_index(['tenant_id','client_id','yearmonth','interaction_type_id'])
dfClientMultiIndex.head()

# Counts For Each Interaction Type By Tenant-Client-Subscription

In [None]:
# For each tenant and client, subscription period and interaction type, do a count 
grouped = dfClient.groupby(['tenant_id','client_id','created_date','interaction_type_id'])
groupedagg = grouped.agg({'interaction_type_id': ['count']})
groupedaggreset = groupedagg.reset_index()
groupedaggreset.columns=['tenant_id','client_id','created_date','interaction_type_id','count']
groupedaggreset.head()

In [None]:
# aggregate counts into list
grouped = groupedaggreset.groupby(['tenant_id','client_id','created_date'])
groupedagg = grouped.agg({'count': lambda x: tuple(x)})
groupedagg = groupedagg.reset_index()
groupedagg.columns=['tenant_id','client_id','created_date','count (3,4,5)']
groupedagg.head()

In [None]:
dfPivot = groupedagg.pivot(index='client_id',columns='created_date',values='count (3,4,5)')
dfPivot.head()

# Interaction Trajectories
One row per client, with interactions in columns

In [None]:
def FillInDates(df):  
    if len(df) > 1:
        df = df[['date','interaction_type_id']]
        df = df.sort_values(by='date',ascending=True)
        df['date'] = pd.to_datetime(df['date'])        
        dfCurrent = df.groupby(['date']).count()
        dfCurrent = dfCurrent.reset_index()
        dfCurrent = dfCurrent.rename(columns={'interaction_type_id':'interaction_count'})
#         print dfCurrent['date']
        
        dateList = list(df['date'])
        start = list(df['date'])[1]
        end = list(df['date'])[-1]
        dates = pd.date_range(start,end)
        dates = pd.DataFrame(dates,columns=['date',])
        dates['date'] = pd.to_datetime(dates['date'])
#         print dates['date']
        
        dfDateMerge = pd.merge(dates,dfCurrent,left_on='date',right_on='date',how='left')
        dfDateMerge = dfDateMerge.fillna(0)
#         for item in dfDateMerge['interaction_count']:
#             print item
        dfDateMerge['index'] = dfDateMerge.index
#         print dfDateMerge
        return dfDateMerge
    else:
        df['date'] = pd.to_datetime(df['date'])
        df = df[['date']]
        df['interaction_count'] = 1
        return df

In [None]:
# For each tenant and client find interactions per day for all days 
grouped = dfClient.groupby(['tenant_id','client_id']) #,'interaction_type_id'])
newdf = pd.DataFrame()
for item in grouped:
    tenant_id = item[0][0]
    client_id = item[0][1]
    df = FillInDates(item[1])
    df['tenant_id']=tenant_id
    df['client_id']=client_id
    newdf = newdf.append(df)
newdf = newdf.reindex()
#newdf.plot('index','interaction_count','line')

In [None]:
newdf.head()

In [None]:
#Restructure table to be wide
dfPivot = newdf.pivot(index='client_id',columns='index',values='interaction_count')

In [None]:
dfPivot.head()

In [None]:
pickle.dump(dfPivot, open("clients_daily_interactions.p","wb"))
dfPivot.to_csv('clients_daily_interactions.tsv',sep='\t')