<a href="https://colab.research.google.com/github/nsomabalint/intent-detection/blob/modelling/infer_models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install wandb transformers colab-env openai

Collecting wandb
  Downloading wandb-0.12.16-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 4.4 MB/s 
[?25hCollecting transformers
  Downloading transformers-4.19.2-py3-none-any.whl (4.2 MB)
[K     |████████████████████████████████| 4.2 MB 42.8 MB/s 
[?25hCollecting colab-env
  Downloading colab-env-0.2.0.tar.gz (4.7 kB)
Collecting openai
  Downloading openai-0.18.1.tar.gz (42 kB)
[K     |████████████████████████████████| 42 kB 1.2 MB/s 
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
    Preparing wheel metadata ... [?25l[?25hdone
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting setproctitle
  Downloading setproctitle-1.2.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_6

# Test BERT model

In [35]:
import wandb
import numpy as np
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from transformers import TextClassificationPipeline

In [4]:
run = wandb.init()
artifact = run.use_artifact('nsoma/intent-detection/model-2fpcisbh:v0', type='model')
artifact_dir = artifact.download()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[34m[1mwandb[0m: Downloading large artifact model-2fpcisbh:v0, 413.47MB. 3 files... Done. 0:0:0


In [5]:
%ls artifacts

[0m[01;34mmodel-2fpcisbh:v0[0m/


In [6]:
api = wandb.Api()
run = api.run("nsoma/intent-detection/2fpcisbh")
run

In [7]:
labels = run.summary['labels']

def restore_label(preds):
  preds_ = []
  for pred in preds:
    pred["restored_label"] = labels[int(pred['label'].split('_')[1])]
    preds_.append(pred)

  return preds_

In [8]:
model = AutoModelForSequenceClassification.from_pretrained("./artifacts/model-2fpcisbh:v0/")
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')

pipe = TextClassificationPipeline(model=model,
                                  tokenizer=tokenizer)

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [9]:
restore_label(pipe("I've lost my card"))

[{'label': 'LABEL_42',
  'restored_label': 'lost_or_stolen_card',
  'score': 0.9656215310096741}]

In [10]:
restore_label(pipe("I'd like to order a taxi. Can you help me with that?"))

[{'label': 'LABEL_44',
  'restored_label': 'order_physical_card',
  'score': 0.4878021776676178}]

# Test GPT-3 model

In [21]:
import colab_env
import openai
import os

colab_env.RELOAD()

openai.api_key = os.getenv("OPENAI_API_KEY")

Mounted at /content/gdrive


In [32]:
label_order = ['Refund_not_showing_up', 'activate_my_card', 'age_limit', 'apple_pay_or_google_pay', 'atm_support', 'automatic_top_up', 'balance_not_updated_after_bank_transfer', 'balance_not_updated_after_cheque_or_cash_deposit', 'beneficiary_not_allowed', 'cancel_transfer', 'card_about_to_expire', 'card_acceptance', 'card_arrival', 'card_delivery_estimate', 'card_linking', 'card_not_working', 'card_payment_fee_charged', 'card_payment_not_recognised', 'card_payment_wrong_exchange_rate', 'card_swallowed', 'cash_withdrawal_charge', 'cash_withdrawal_not_recognised', 'change_pin', 'compromised_card', 'contactless_not_working', 'country_support', 'declined_card_payment', 'declined_cash_withdrawal', 'declined_transfer', 'direct_debit_payment_not_recognised', 'disposable_card_limits', 'edit_personal_details', 'exchange_charge', 'exchange_rate', 'exchange_via_app', 'extra_charge_on_statement', 'failed_transfer', 'fiat_currency_support', 'get_disposable_virtual_card', 'get_physical_card', 'getting_spare_card', 'getting_virtual_card', 'lost_or_stolen_card', 'lost_or_stolen_phone', 'order_physical_card', 'passcode_forgotten', 'pending_card_payment', 'pending_cash_withdrawal', 'pending_top_up', 'pending_transfer', 'pin_blocked', 'receiving_money', 'request_refund', 'reverted_card_payment?', 'supported_cards_and_currencies', 'terminate_account', 'top_up_by_bank_transfer_charge', 'top_up_by_card_charge', 'top_up_by_cash_or_cheque', 'top_up_failed', 'top_up_limits', 'top_up_reverted', 'topping_up_by_card', 'transaction_charged_twice', 'transfer_fee_charged', 'transfer_into_account', 'transfer_not_received_by_recipient', 'transfer_timing', 'unable_to_verify_identity', 'verify_my_identity', 'verify_source_of_funds', 'verify_top_up', 'virtual_card_not_working', 'visa_or_mastercard', 'why_verify_identity', 'wrong_amount_of_cash_received', 'wrong_exchange_rate_for_cash_withdrawal']


def pred_to_label(pred):
    try:
        ind = int(pred.lstrip())
        return label_order[ind]
    except:
        return "NA"


def classify_finetuned(text):

    result = dict(openai.Completion.create(
        model="curie:ft-personal-2022-05-16-20-40-34",
        prompt=text + " ->",
        max_tokens=1,
        logprobs=77
    ))

    result['restored_label'] = pred_to_label(result["choices"][0]["text"])
        
    return result

In [36]:
res = classify_finetuned("I've lost my card")
res

{'choices': [<OpenAIObject at 0x7fb8db6f1950> JSON: {
  "finish_reason": "length",
  "index": 0,
  "logprobs": {
    "text_offset": [
      20
    ],
    "token_logprobs": [
      -0.00023130498
    ],
    "tokens": [
      " 42"
    ],
    "top_logprobs": [
      {
        " 41": -11.800586,
        " 42": -0.00023130498,
        " 43": -9.034862,
        " 44": -12.124188,
        "42": -9.388988
      }
    ]
  },
  "text": " 42"
}],
 'created': 1652818865,
 'id': 'cmpl-58xyja5kKvaUYYGk53JeijWJc91NR',
 'model': 'curie:ft-personal-2022-05-16-20-40-34',
 'object': 'text_completion',
 'restored_label': 'lost_or_stolen_card'}

In [56]:
{k: np.exp(float(v)) for k, v in dict(res["choices"][0]["logprobs"]["top_logprobs"][0]).items()}

{' 41': 7.500161532404566e-06,
 ' 42': 0.9997687217689345,
 ' 43': 0.00011918162117430975,
 ' 44': 5.426653024358503e-06,
 '42': 8.364005715991686e-05}

In [57]:
res = classify_finetuned("I'd like to order a taxi. Can you help me with that?")
res

{'choices': [<OpenAIObject at 0x7fb8db45d890> JSON: {
  "finish_reason": "length",
  "index": 0,
  "logprobs": {
    "text_offset": [
      55
    ],
    "token_logprobs": [
      -0.40016606
    ],
    "tokens": [
      " 38"
    ],
    "top_logprobs": [
      {
        " 38": -0.40016606,
        " 39": -1.992884,
        " 40": -3.744227,
        " 41": -2.3580327,
        " 44": -3.5532992
      }
    ]
  },
  "text": " 38"
}],
 'created': 1652819217,
 'id': 'cmpl-58y4PPt8N5cfKjgvJDkDUqFcPmp7j',
 'model': 'curie:ft-personal-2022-05-16-20-40-34',
 'object': 'text_completion',
 'restored_label': 'get_disposable_virtual_card'}

In [58]:
{k: np.exp(float(v)) for k, v in dict(res["choices"][0]["logprobs"]["top_logprobs"][0]).items()}

{' 38': 0.6702087419306303,
 ' 39': 0.1363017637715281,
 ' 40': 0.023653906452239058,
 ' 41': 0.09460615893711945,
 ' 44': 0.02863002748115179}