# IMPORT

In [1]:
import tempfile
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score, roc_curve
from sklearn.pipeline import Pipeline

In [5]:
import unboxapi

# LOAD DATA, TRAIN MODEL

In [6]:
df = pd.read_csv("./training.csv")
df = df.sample(frac=1)

In [7]:
df["category"] = df.category.astype('category')
df["label_code"] = df.category.cat.codes

In [37]:
label_dict = dict(zip(df.category.cat.codes, df.category))

label_list = [None] * len(label_dict)
for index, label in label_dict.items():
    label_list[index] = label
# classes = list(label_dict.values())
# classes[41], classes[54], label_dict[41], label_dict[54]
label_list[21], label_list[54]
# label_list[42], label_list[55]
# label_list

('declined_transfer', 'transfer_not_received_by_recipient')

In [9]:
df_train = df[:7000]
df_test = df[7000:]

In [10]:
df_train

Unnamed: 0,text,category,label_code
7130,I was unable to transfer to another account.,beneficiary_not_allowed,5
3849,Why do I have to give you my drivers license,why_verify_identity,60
5012,my card was frozen due to putting in the wrong...,declined_cash_withdrawal,20
7619,Can I transfer money to my card via bank-trans...,transfer_into_account,53
7522,Hi I'm sending some money over to an investmen...,failed_transfer,28
...,...,...,...
3664,details need to be modified,edit_personal_details,24
1001,I need assistance with understanding which fia...,fiat_currency_support,29
114,I still haven't gotten my new card. When will...,card_arrival,8
6472,Looking at my statement I see that I was doubl...,transaction_charged_twice,51


In [11]:
model = Pipeline([
                         ('count_vect', CountVectorizer(ngram_range=(1,2),
                                                        stop_words='english')), 
                         ('lr', LogisticRegression())])
model.fit(df_train.text, df_train.label_code)

Pipeline(steps=[('count_vect',
                 CountVectorizer(ngram_range=(1, 2), stop_words='english')),
                ('lr', LogisticRegression())])

In [12]:
x_test, y_test = df_test.text, df_test.label_code
print(classification_report(y_test, model.predict(x_test)))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      0.94      0.97        18
           2       0.87      0.93      0.90        14
           3       1.00      1.00      1.00        11
           4       0.77      0.81      0.79        21
           5       0.92      0.92      0.92        13
           6       0.79      0.79      0.79        14
           7       0.44      0.57      0.50         7
           8       0.60      0.90      0.72        20
           9       0.67      0.50      0.57        12
          10       0.92      1.00      0.96        12
          11       0.94      0.71      0.81        21
          12       1.00      0.87      0.93        30
          13       0.67      0.80      0.73        15
          14       0.95      0.90      0.93        21
          15       1.00      1.00      1.00         4
          16       1.00      0.68      0.81        19
          17       0.76    

In [13]:
model.predict(['good', 'bad'])

array([38, 38], dtype=int8)

# UNBOX

In [14]:
client = unboxapi.UnboxClient(email="rishramanathan@gmail.com", password="00000000")

## Create function

In [15]:
def predict_function(model, text_list):
    return model.predict_proba(text_list)

In [16]:
texts = ['some new text, sweet noodles', 'happy time', 'sad day']

predict_function(model, texts)

array([[0.00852357, 0.01669756, 0.01068027, 0.0228188 , 0.0089844 ,
        0.01406004, 0.00781095, 0.00566106, 0.03103156, 0.01397135,
        0.0084006 , 0.00551908, 0.00665079, 0.00639764, 0.00522564,
        0.00405348, 0.01197911, 0.00618045, 0.03428782, 0.00832405,
        0.0098909 , 0.00663925, 0.00642163, 0.00419048, 0.04736751,
        0.01073335, 0.00992688, 0.01244332, 0.00918442, 0.01340178,
        0.00755564, 0.0150688 , 0.0042628 , 0.00992986, 0.01100459,
        0.02882817, 0.00618531, 0.00530631, 0.17106178, 0.00573017,
        0.00845502, 0.01572657, 0.02002863, 0.00509546, 0.00816318,
        0.00550121, 0.01270336, 0.02182341, 0.05706666, 0.03840284,
        0.02281725, 0.01485796, 0.00778001, 0.00471963, 0.02116457,
        0.00764416, 0.01711816, 0.01796821, 0.01659056, 0.01222613,
        0.01927908, 0.01247674],
       [0.01051825, 0.01628611, 0.01214672, 0.04459304, 0.00824011,
        0.0122483 , 0.00713119, 0.00710198, 0.00849703, 0.06595936,
        0.00333

# Package (function, model) & Upload to Unbox Server

In [28]:
print('Uploading model...')
response = client.add_model(function=predict_function, 
                            model=model,
                            class_names=label_list,
                            name='banking fixed',
                            description='this is my sklearn banking model')
print(f'Complete. Response:')
response.json()

Uploading model...
Packaged bento content


  0%|          | 8.00k/4.92M [00:00<01:31, 56.2kB/s]

Connecting to Unbox server


100%|██████████| 4.92M/4.92M [00:00<00:00, 5.72MB/s]


Complete. Response:


{'_links': {'datasets': '/api/models/fe6f9a91-0d46-453a-8b44-1ce2c7faebab/datasets',
  'inferenceRuns': '/api/models/fe6f9a91-0d46-453a-8b44-1ce2c7faebab/inference-runs',
  'runReports': '/api/models/fe6f9a91-0d46-453a-8b44-1ce2c7faebab/run-reports',
  'self': '/api/models/fe6f9a91-0d46-453a-8b44-1ce2c7faebab'},
 'classNames': ['Refund_not_showing_up',
  'age_limit',
  'atm_support',
  'automatic_top_up',
  'balance_not_updated_after_cheque_or_cash_deposit',
  'beneficiary_not_allowed',
  'cancel_transfer',
  'card_acceptance',
  'card_arrival',
  'card_delivery_estimate',
  'card_linking',
  'card_not_working',
  'card_payment_fee_charged',
  'card_payment_not_recognised',
  'card_payment_wrong_exchange_rate',
  'card_swallowed',
  'change_pin',
  'compromised_card',
  'contactless_not_working',
  'declined_card_payment',
  'declined_cash_withdrawal',
  'declined_transfer',
  'direct_debit_payment_not_recognised',
  'disposable_card_limits',
  'edit_personal_details',
  'exchange_rate

In [29]:
print('\nUploading dataset (from file)...')
with tempfile.TemporaryDirectory() as tmpdir:
    df_test.to_csv(f'{tmpdir}/dataset_cloud.csv')
    response = client.add_dataset(file_path=f'{tmpdir}/dataset_cloud.csv', 
                                  name="banking fixed",
                                  description='banking validation dataset',
                                  class_names=label_list,
                                  label_column_name='label_code',
                                  text_column_name='text')
print(f'Complete. Response:')
response.json()

  0%|          | 0.00/86.1k [00:00<?, ?B/s]


Uploading dataset (from file)...


100%|██████████| 86.1k/86.1k [00:00<00:00, 271kB/s]


Complete. Response:


{'_links': {'models': '/api/datasets/4/models',
  'rows': '/api/datasets/4/rows',
  'self': '/api/datasets/4',
  'tags': '/api/datasets/4/tags'},
 'classNames': ['Refund_not_showing_up',
  'age_limit',
  'atm_support',
  'automatic_top_up',
  'balance_not_updated_after_cheque_or_cash_deposit',
  'beneficiary_not_allowed',
  'cancel_transfer',
  'card_acceptance',
  'card_arrival',
  'card_delivery_estimate',
  'card_linking',
  'card_not_working',
  'card_payment_fee_charged',
  'card_payment_not_recognised',
  'card_payment_wrong_exchange_rate',
  'card_swallowed',
  'change_pin',
  'compromised_card',
  'contactless_not_working',
  'declined_card_payment',
  'declined_cash_withdrawal',
  'declined_transfer',
  'direct_debit_payment_not_recognised',
  'disposable_card_limits',
  'edit_personal_details',
  'exchange_rate',
  'exchange_via_app',
  'extra_charge_on_statement',
  'failed_transfer',
  'fiat_currency_support',
  'get_physical_card',
  'getting_spare_card',
  'getting_virtua

In [19]:
df_train

Unnamed: 0,text,category,label_code
7130,I was unable to transfer to another account.,beneficiary_not_allowed,5
3849,Why do I have to give you my drivers license,why_verify_identity,60
5012,my card was frozen due to putting in the wrong...,declined_cash_withdrawal,20
7619,Can I transfer money to my card via bank-trans...,transfer_into_account,53
7522,Hi I'm sending some money over to an investmen...,failed_transfer,28
...,...,...,...
3664,details need to be modified,edit_personal_details,24
1001,I need assistance with understanding which fia...,fiat_currency_support,29
114,I still haven't gotten my new card. When will...,card_arrival,8
6472,Looking at my statement I see that I was doubl...,transaction_charged_twice,51


In [20]:
print('\nUploading dataset (from data frame)...')
response = client.add_dataframe(df=df_test,
                                name="df_test_name", 
                                description='this is my sentiment test dataset',
                                class_names=['negative', 'positive'],
                                label_column_name='polarity',
                                text_column_name='text')
print(f'Complete. Response:')
response.json()


Uploading dataset (from data frame)...


NameError: name 'df_test_name' is not defined