## Train Model

In [1]:
import numpy as np
import pandas as pd

from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import metrics

In [2]:
maxLen = 128
epochs = 50
dim_embedding = 50
batch_size = 256
dropout_rate = 0.25
num_LSTM_cell = 64
trainDataFileName = 's3://smle-experiments/datasets/phishing_email/train.json'

In [3]:
xTrain = []
yTrain = []
df = pd.read_json(trainDataFileName,  lines = True)
for idx, row in df.iterrows():
    label = np.zeros(1)
    if row['isPhishing'] == 'True':
        label[0] = 1
    yTrain.append(label)
    aMessage = row['From'] + ' ' + row['Subject'] + ' ' + row['Content']
    anEvent = np.array([32]*maxLen)
    p = 0
    for c in aMessage:
        v = ord(c)
        if v < 32 or v > 126:
            continue
        anEvent[p] = v
        p += 1
        if p >= maxLen:
            break
    xTrain.append(anEvent)
xTrain = np.array(xTrain)
yTrain = np.array(yTrain)

In [4]:
model = Sequential()
model.add(Embedding(128, dim_embedding, input_length=maxLen))
model.add(SpatialDropout1D(dropout_rate))
model.add(LSTM(num_LSTM_cell, dropout=dropout_rate, recurrent_dropout=dropout_rate))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(xTrain, yTrain, epochs=epochs, batch_size=batch_size, validation_split=0.2, 
                    callbacks=[EarlyStopping(monitor='val_loss',patience=7, min_delta=0.00001)])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on 40000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


## Publish Model

In [5]:
import os
import smle
import warnings

warnings.filterwarnings('ignore')
from smle.context import Context
cwd = os.getcwd()

config = {  
    'username': '',
    
    'model_storage_type': 's3', 
    'model_storage_address': "s3.us-west-2.amazonaws.com",
    'model_storage_bucket': 'smle-experiments',
    'model_storage_access_key': '',
    'model_storage_secret_key': '',
    'model_storage_secure': False,
}
smle_context = Context(config)

%load_ext spl2_kernel

In [6]:
model_path = "models/phishing_email"
model_name = "phishing_email"

!rm -rf /tmp/{model_name}
sample_data = pd.read_csv('s3://smle-experiments/datasets/phishing_email/sample_file.csv')
smle_context.publish(model, model_name=model_name, path = model_path, sample= sample_data)

Instructions for updating:
Please use `model.save(..., save_format="tf")` or `tf.keras.models.save_model(..., save_format="tf")`.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: None
INFO:tensorflow:Signatures INCLUDED in export for Train: ['train']
INFO:tensorflow:Signatures INCLUDED in export for Eval: 

In [7]:
import json
import onnxruntime as rt
from pprint import pprint


def inspect_model(dir_path, model_name):

    metadata = json.load(open(dir_path + "metadata.json"))
    
    print("Metadata:")
    pprint(metadata)
    print()
    
    model_path = dir_path + model_name + ".onnx"
    sess = rt.InferenceSession(model_path)

    onnx_inputs = sess.get_inputs()
    onnx_outputs = sess.get_outputs()

    inputs = [{"name": node.name, "type": node.type, "shape": node.shape} for node in onnx_inputs]
    outputs = [{"name": node.name, "type": node.type, "shape": node.shape} for node in onnx_outputs]
    onnx_model_specs = {"inputs": inputs, "outputs": outputs}
    print("ONNX model specs:")
    
    pprint(onnx_model_specs)


inspect_model("/tmp/", model_name)

Metadata:
{'inputFields': [{'name': 'embedding_input:0',
                  'size': 128,
                  'type': 'floatTensor'}],
 'modelName': 'phishing_email',
 'outputFields': [{'name': 'dense/Sigmoid:0',
                   'size': 1,
                   'type': 'floatTensor'}]}

ONNX model specs:
{'inputs': [{'name': 'embedding_input:0',
             'shape': ['unk__236', 128],
             'type': 'tensor(float)'}],
 'outputs': [{'name': 'dense/Sigmoid:0',
              'shape': ['unk__237', 1],
              'type': 'tensor(float)'}]}


In [8]:
%%spl2_add_params from_python
phishing_email = dict(
    model_path = "s3://smle-experiments/models/phishing_email",
    model_name = "phishing_email",
    input_field = "embedding_input:0",
)

In [9]:
%%spl2 -q phishing_email
| from read_json("s3://smle-experiments/datasets/phishing_email/test.json")
| eval eventLine=concat(From, " ", Subject, " ", Content, " ", "                                                                                                                                ")
| eval mapC = {" ":32,"!":33,"\"":34,"#":35,"$$":36,"%":37,"&":38,"'":39,"(":40,")":41,"*":42,"+":43,",":44,"-":45,".":46,"/":47,"0":48,"1":49,"2":50,"3":51,"4":52,"5":53,"6":54,"7":55,"8":56,"9":57,":":58,";":59,"<":60,"=":61,">":62,"?":63,"@":64,"A":65,"B":66,"C":67,"D":68,"E":69,"F":70,"G":71,"H":72,"I":73,"J":74,"K":75,"L":76,"M":77,"N":78,"O":79,"P":80,"Q":81,"R":82,"S":83,"T":84,"U":85,"V":86,"W":87,"X":88,"Y":89,"Z":90,"[":91,"\\":92,"]":93,"^":94,"_":95,"`":96,"a":97,"b":98,"c":99,"d":100,"e":101,"f":102,"g":103,"h":104,"i":105,"j":106,"k":107,"l":108,"m":109,"n":110,"o":111,"p":112,"q":113,"r":114,"s":115,"t":116,"u":117,"v":118,"w":119,"x":120,"y":121,"z":122,"{":123,"|":124,"}":125,"~":126}
| eval 'embedding_input:0' = for_each(
        iterator(mvrange(1,129), "i"),
        cast(map_get(mapC, substr(eventLine, i, 1)), "float") )
| apply_model connection_id="" path="$model_path" name="$model_name" 
| rename 'dense/Sigmoid:0' AS probability 
| where mvindex(probability, 0) > 0.5 
| select eventLine, probability 
;

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))

 Finished.                     

Unnamed: 0,eventLine,probability
0,karem ahmed <karemahmed-18@hotmail.fr> PLEA...,[0.9720325]
1,nkomo robert <nkomo002@5fm.za.com> FAMILY R...,[0.9768889]
2,Mrs Mariam Taylor <familyboxjanetfamilybo...,[0.9780622999999999]
3,Mr.Fred Chima <fredi@fastermail.com> busi...,[0.97669697]
4,Alexander Afadia <eeaesq@123.com> Please repl...,[0.9775347999999999]
...,...,...
82,Comfort Somba. <comfort_somba11@yahoo.fr> F...,[0.97327405]
83,bintu pat <pat_bintu004@hotmail.com> TRUSTI...,[0.96824765]
84,FR RICHARD DAVID <unofice@katamail.com> FRO...,[0.96306074]
85,<joseph_m333@micasilla.net> Farmers Greetings...,[0.97275084]





<spl2_kernel.spl2_runner.SPL2Job at 0x7f4c14e9a2d0>

In [10]:
df = _.df

In [11]:
df.iloc[0]['eventLine']

" karem ahmed  <karemahmed-18@hotmail.fr>  PLEASE VERY URGENT. FROM THE DESK OF Mr,KAREM AHMED.BILL AND EXCHANGE MANAGER,BANK OF AFRICA (B.O.A) OUAGADOUGOU,BURKINA FASO WEST AFRICA.PHONE CONTACT  00226.78.89.77.34DEAR FRIEND,I know you may be surprise to recieve this e-mail ; I got your contact address from the internet while I was searching for my friend that has similar name with you.I am the manager of bill and exchange BANK OF AFRICA (B.O.A) at foreign remittance department.There is a business I would want you to champion for me, in my department I discovered an abandoned sum of ($31.500.000 U.S) thirty one million five hundred thousands US dollars)In an account that belongs to one of our foreign customer who died along with his entire family on 25TH JULY, 2000 CONCORDE PLANE CRASH [Flight AF4590] with the whole passengers aboard. The name of the deceased man was(MR.ANDREAS SCHRANNER from Munich Germany)N.B. In other for you to believe me honestly, visit the web site to enable you 