# Bring in Data

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
import types
import pandas as pd
df = pd.read_csv("Event.csv")
df.head()

Unnamed: 0,Opportunity,Time,Stage,Action
0,OP001,43456.6055,1,Emailed Client
1,OP001,43466.19231,1,Emailed Client
2,OP001,43471.51592,2,Meeting with Client
3,OP001,43479.39509,3,Presented to Client
4,OP001,43483.32009,3,Presented to Client


In [3]:
df['Event Key'] = df['Stage'].astype(str) +'-'+ df['Action']

In [4]:
events = list(df['Event Key'].unique())
ev = events.pop(8)
events.append(ev)
events = ['0-Dummy'] + events

In [5]:
events

['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support']

In [6]:
df.head()

Unnamed: 0,Opportunity,Time,Stage,Action,Event Key
0,OP001,43456.6055,1,Emailed Client,1-Emailed Client
1,OP001,43466.19231,1,Emailed Client,1-Emailed Client
2,OP001,43471.51592,2,Meeting with Client,2-Meeting with Client
3,OP001,43479.39509,3,Presented to Client,3-Presented to Client
4,OP001,43483.32009,3,Presented to Client,3-Presented to Client


# Prepare Data

In [7]:
series_length = 5
categories = len(events)
time_steps = series_length -1 

In [8]:
df.set_index('Opportunity', inplace=True)

In [9]:
history = []
for op in df.index:
    for x in range(len(df['Stage'].loc[op].values)-series_length):
        history.append(df['Stage'].loc[op].values[x:x+series_length])

In [10]:
history[:5]

[array([1, 1, 2, 3, 3], dtype=int64),
 array([1, 2, 3, 3, 3], dtype=int64),
 array([2, 3, 3, 3, 4], dtype=int64),
 array([3, 3, 3, 4, 5], dtype=int64),
 array([3, 3, 4, 5, 5], dtype=int64)]

In [11]:
from tensorflow.keras.utils import to_categorical

In [12]:
X= np.array([row[:time_steps] for row in history])

In [13]:
X = to_categorical(X)

In [14]:
y=np.array([row[time_steps] for row in history])

In [15]:
y = to_categorical(y).reshape(-1,categories)

In [16]:
X.shape

(16176, 4, 12)

In [17]:
X

array([[[0., 1., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 1., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 1., 0., 0.]]], dtype=float32)

In [18]:
y.shape

(16176, 12)

# Create Train and Test Set

In [19]:
from sklearn.model_selection import train_test_split

  LARGE_SPARSE_SUPPORTED = LooseVersion(scipy_version) >= '0.14.0'


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [21]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(11323, 4, 12) (4853, 4, 12) (11323, 12) (4853, 12)


# Modelling

In [22]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [23]:
model = Sequential()

In [24]:
model.add(LSTM(10, activation='relu', return_sequences=True, input_shape=(time_steps,categories)))
model.add(LSTM(10, activation='relu'))
model.add(Dense(categories, activation='softmax'))
model.compile('adam', loss='categorical_crossentropy')

In [25]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 4, 10)             920       
                                                                 
 lstm_1 (LSTM)               (None, 10)                840       
                                                                 
 dense (Dense)               (None, 12)                132       
                                                                 
Total params: 1,892
Trainable params: 1,892
Non-trainable params: 0
_________________________________________________________________


In [26]:
model.fit(X_train,y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1d7c4cbd5f8>

# Generate Predictions

In [27]:
prediction = model.predict(X_test)



In [28]:
# Next event is
events[np.argmax(prediction[1])]

'9-Negotiation Meeting'

In [29]:
np.argmax(y_test[1])

9

In [30]:
np.argmax(prediction[1])

8

In [31]:
model.save('tf_model')

INFO:tensorflow:Assets written to: tf_model\assets


# Evaluate Accuracy

In [32]:
import tensorflow as tf
from tensorflow.keras.metrics import top_k_categorical_accuracy

In [33]:
print('Correct event was in top prediction:', top_k_categorical_accuracy(y_test,prediction, k=1).numpy().sum()/top_k_categorical_accuracy(y_test,prediction, k=2).numpy().size)
print('Correct event was in top two prediction:', top_k_categorical_accuracy(y_test,prediction, k=2).numpy().sum()/top_k_categorical_accuracy(y_test,prediction, k=3).numpy().size)

Correct event was in top prediction: 0.5427570574902122
Correct event was in top two prediction: 0.8969709458067175


In [34]:
X[1]

array([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [69]:
from ipywidgets import widgets
lbl_act_1 = widgets.Label('select first activity in sequence from the dropdown')
display(lbl_act_1)
w_act_1 = widgets.Dropdown(
    options=['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support'],
    value='0-Dummy',
    description='Activity_1:',
)

def on_change_act_1(change):
    if change['type'] == 'change' and change['name'] == 'value':
        global act_1_val
        act_1_val = change['new']

w_act_1.observe(on_change_act_1)

display(w_act_1)

Label(value='select first activity in sequence from the dropdown')

Dropdown(description='Activity_1:', options=('0-Dummy', '1-Emailed Client', '2-Meeting with Client', '3-Presen…

In [70]:
lbl_act_2 = widgets.Label('select second activity in sequence from the dropdown')
display(lbl_act_2)
w_act_2 = widgets.Dropdown(
    options=['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support'],
    value='0-Dummy',
    description='Activity_2:',
)

def on_change_act_2(change):
    if change['type'] == 'change' and change['name'] == 'value':
        global act_2_val
        act_2_val = change['new']

w_act_2.observe(on_change_act_2)

display(w_act_2)

Label(value='select second activity in sequence from the dropdown')

Dropdown(description='Activity_2:', options=('0-Dummy', '1-Emailed Client', '2-Meeting with Client', '3-Presen…

In [71]:
lbl_act_3 = widgets.Label('select third activity in sequence from the dropdown')
display(lbl_act_3)
w_act_3 = widgets.Dropdown(
    options=['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support'],
    value='0-Dummy',
    description='Activity_3:',
)

def on_change_act_3(change):
    if change['type'] == 'change' and change['name'] == 'value':
        global act_3_val
        act_3_val = change['new']

w_act_3.observe(on_change_act_3)

display(w_act_3)

Label(value='select third activity in sequence from the dropdown')

Dropdown(description='Activity_3:', options=('0-Dummy', '1-Emailed Client', '2-Meeting with Client', '3-Presen…

In [72]:
lbl_act_4 = widgets.Label('select fourth activity in sequence from the dropdown')
display(lbl_act_4)
w_act_4 = widgets.Dropdown(
    options=['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support'],
    value='0-Dummy',
    description='Activity_4:',
)

def on_change_act_4(change):
    if change['type'] == 'change' and change['name'] == 'value':
        global act_4_val
        act_4_val = change['new']

w_act_4.observe(on_change_act_4)

display(w_act_4)

Label(value='select fourth activity in sequence from the dropdown')

Dropdown(description='Activity_4:', options=('0-Dummy', '1-Emailed Client', '2-Meeting with Client', '3-Presen…

In [73]:
act_lst = [int(act_1_val[0]), int(act_2_val[0]), int(act_3_val[0]), int(act_4_val[0])]

In [74]:
act_lst

[1, 3, 4, 5]

In [75]:
act_arr = np.array([act_lst])

In [76]:
act_arr.shape

(1, 4)

In [77]:
to_categorical(act_arr, 12)

array([[[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]]], dtype=float32)

In [78]:
to_categorical(act_arr, 12).shape

(1, 4, 12)

In [79]:
model.predict(to_categorical(act_arr, 12))



array([[1.4357687e-09, 2.4832747e-10, 1.4915275e-05, 2.3579933e-06,
        4.1683125e-03, 6.0076416e-01, 3.5540947e-01, 3.7901882e-02,
        1.6294440e-03, 1.0727534e-04, 2.1235412e-06, 1.4101728e-09]],
      dtype=float32)

In [80]:
events[np.argmax(model.predict(to_categorical(act_arr, 12)))]



'5-Prepared POC'