# Bring in Data

In [4]:
import numpy as np

In [5]:
import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_e6ecf2033ae54d92b08944d13be35195 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='yourapikey',
    ibm_auth_endpoint="https://iam.ng.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='yourendpoint')

body = client_e6ecf2033ae54d92b08944d13be35195.get_object(Bucket='yourbucket',Key='Event.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df = pd.read_csv(body)
df.head()

Unnamed: 0,Opportunity,Time,Stage,Action
0,OP001,43456.6055,1,Emailed Client
1,OP001,43466.19231,1,Emailed Client
2,OP001,43471.51592,2,Meeting with Client
3,OP001,43479.39509,3,Presented to Client
4,OP001,43483.32009,3,Presented to Client


In [6]:
df['Event Key'] = df['Stage'].astype(str) +'-'+ df['Action']

In [7]:
events = list(df['Event Key'].unique())
ev = events.pop(8)
events.append(ev)
events = ['0-Dummy'] + events

In [8]:
events

['0-Dummy',
 '1-Emailed Client',
 '2-Meeting with Client',
 '3-Presented to Client',
 '4-Prepared Custom Demonstration',
 '5-Prepared POC',
 '6-Provided Pre-Sales Support',
 '8-Prepared Commercials',
 '9-Negotiation Meeting',
 '7-Prepared RFP Responses',
 '10-Commercials Signed',
 '11-Post Sales Support']

In [9]:
df.head()

Unnamed: 0,Opportunity,Time,Stage,Action,Event Key
0,OP001,43456.6055,1,Emailed Client,1-Emailed Client
1,OP001,43466.19231,1,Emailed Client,1-Emailed Client
2,OP001,43471.51592,2,Meeting with Client,2-Meeting with Client
3,OP001,43479.39509,3,Presented to Client,3-Presented to Client
4,OP001,43483.32009,3,Presented to Client,3-Presented to Client


# Prepare Data

In [10]:
series_length = 5
categories = len(events)
time_steps = series_length -1 

In [11]:
df.set_index('Opportunity', inplace=True)

In [12]:
history = []
for op in df.index:
    for x in range(len(df['Stage'].loc[op].values)-series_length):
        history.append(df['Stage'].loc[op].values[x:x+series_length])

In [13]:
np.array(history).shape

(16176, 5)

In [14]:
history[:5]

[array([1, 1, 2, 3, 3]),
 array([1, 2, 3, 3, 3]),
 array([2, 3, 3, 3, 4]),
 array([3, 3, 3, 4, 5]),
 array([3, 3, 4, 5, 5])]

In [22]:
from tensorflow.keras.utils import to_categorical

In [23]:
X= np.array([row[:time_steps] for row in history])

In [24]:
X = to_categorical(X)

In [25]:
y=np.array([row[time_steps] for row in history])

In [26]:
y = to_categorical(y).reshape(-1,categories)

In [27]:
X.shape

(16176, 4, 12)

In [28]:
y.shape

(16176, 12)

# Create Train and Test Set

In [29]:
from sklearn.model_selection import train_test_split

In [30]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)

In [31]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(11323, 4, 12) (4853, 4, 12) (11323, 12) (4853, 12)


# Modelling

In [32]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [33]:
#del model

In [34]:
model = Sequential()

In [35]:
model.add(LSTM(10, activation='relu', return_sequences=True, input_shape=(time_steps,categories)))
model.add(LSTM(10, activation='relu'))
model.add(Dense(categories, activation='softmax'))
model.compile('adam', loss='categorical_crossentropy')

In [36]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 4, 10)             920       
_________________________________________________________________
lstm_3 (LSTM)                (None, 10)                840       
_________________________________________________________________
dense_1 (Dense)              (None, 12)                132       
Total params: 1,892
Trainable params: 1,892
Non-trainable params: 0
_________________________________________________________________


In [37]:
model.fit(X_train,y_train, epochs=10)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fed0bf61940>

# Generate Predictions

In [29]:
prediction = model.predict(X_test)

In [50]:
# Next event is
events[np.argmax(prediction[1])]

'10-Commercials Signed'

In [48]:
np.argmax(y_test[1])

10

In [49]:
np.argmax(prediction[1])

10

# Evaluate Accuracy

In [37]:
import tensorflow as tf
from tensorflow.keras.metrics import top_k_categorical_accuracy

In [53]:
with tf.Session() as sess:  
    print('Correct event was top prediction:', top_k_categorical_accuracy(y_test,prediction, k=1).eval())
    print('Correct event was in top two prediction:', top_k_categorical_accuracy(y_test,prediction, k=2).eval())
    print('Correct event was in top three prediction:', top_k_categorical_accuracy(y_test,prediction, k=3).eval())

Correct event was top prediction: 0.5641871
Correct event was in top two prediction: 0.90253454
Correct event was in top three prediction: 0.98578197
