In [10]:
import os
import sys
import json
import numpy as np

sys.path.append('..')
import fake_lightgbm as lgb

_ = np.seterr(all='raise')

In [11]:
# load samples

samp_dir = './samples'
filenames = [f for f in os.listdir(samp_dir) if os.path.isfile(f'{samp_dir}/{f}')]
samples = []
for filename in filenames:
    with open(f'{samp_dir}/{filename}') as fp:
        samples.append(fp.read())
        
print(samples[0])

{
"ts": 1622637967.6218433,
"uid": 147757848,
"number": "00000125",
"type": "out",
"traffic": {
  "about": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "services": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "contacts": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "roaming": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "tariffs": [14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "simcards": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "balance": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "internet": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "messaging": [0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0],
  "support": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]},
"balance": null,
"roaming": 

In [12]:
# load model

bst = lgb.Booster(model_file='../model.txt')  # init model

classes = ["about", "services", "contacts", "roaming", "tariffs",
           "simcards", "balance", "internet", "messaging", "support"]

In [13]:
# simple inference code

reference_results = []

for sample in samples:
    # 1. extract feature from json message
    mes = json.loads(sample)
    features = np.array([mes['traffic'][cls] for cls in classes]).flatten()
    
    # 2. make prediction
    pred_vals = bst.predict([features, ])[0]
    try:
        scores = pred_vals/pred_vals.sum()
    except:
        pass
    reason = classes[scores.argmax()]
    
    scores_dict = {cls: round(scores[i], 2) for i, cls in enumerate(classes)}
    res = dict(ts=mes['ts'], uid=mes['uid'], number=mes['number'], classes=scores_dict, reason=reason)
    reference_results.append(json.dumps(res, indent=2))

In [14]:
print(reference_results[0])

{
  "ts": 1622637967.6218433,
  "uid": 147757848,
  "number": "00000125",
  "classes": {
    "about": 0.0,
    "services": 0.0,
    "contacts": 0.0,
    "roaming": 0.0,
    "tariffs": 0.84,
    "simcards": 0.0,
    "balance": 0.0,
    "internet": 0.0,
    "messaging": 0.16,
    "support": 0.0
  },
  "reason": "tariffs"
}


In [16]:
# simple inference code

reference_results = []

for sample in samples:
    ## 1. extract features from json message
    
    # 1.1. parse message and get features
    mes = json.loads(sample)
    features = np.array([mes['traffic'][cls] for cls in classes]).flatten()
    
    ## 2. make prediction result
    
    # 2.1 get prediction
    pred_vals = bst.predict([features, ])[0]
    
    # 2.2 normalize scores, get class
    try:
        scores = pred_vals/pred_vals.sum()
    except:
        scores = pred_vals
    reason = classes[scores.argmax()]
    
    # 2.3 make and serialize message
    scores_dict = {cls: round(scores[i], 2) for i, cls in enumerate(classes)}
    res = dict(ts=mes['ts'], uid=mes['uid'], number=mes['number'], classes=scores_dict, reason=reason)
    reference_results.append(json.dumps(res, indent=2))
    
    

In [17]:
# splitted inference code

splitted_results = []



feature_msgs = []

for sample in samples:
    ## 1. extract features from json message
    
    # 1.1. parse message and get features
    mes = json.loads(sample)
    features = np.array([mes['traffic'][cls] for cls in classes]).flatten()
    
    # 1.2. create intermediate message
    data = dict(ts=mes['ts'], uid=mes['uid'], number=mes['number'], features=features.tolist())

    feature_msgs.append(data)
    

    
## 2. make prediction result


features_batch = np.array([data['features'] for data in feature_msgs])

# 2.1 get prediction
pred_batch = bst.predict(features_batch) # microbatch prediction

for i, mes in enumerate(feature_msgs):
    scores = pred_batch[i]
    
    # 2.2 normalize scores, get class
    try:
        scores = scores / scores.sum() # try to normalize
    except:
        pass

    reason_num = scores.argmax()
    reason = classes[reason_num]
    
    
    # 2.3 make and serialize message
    scores_dict = {cls: round(scores[i], 2) for i, cls in enumerate(classes)}
    res = dict(ts=mes['ts'], uid=mes['uid'], number=mes['number'], classes=scores_dict, reason=reason)
    splitted_results.append(json.dumps(res, indent=2))


assert set(splitted_results) == set(reference_results)