In [67]:
import requests
import json

# Example ticket-tagging-api App usage.

This notebook shows how to use the ticket-tagging-api app.  It does not use the Steamship SDK to make it easier to port to other languages.

Each instance of the app has its own built-in context where examples are collected, labels are stored, etc.

## Prerequisites

* Using this assumes that you have already created an instance of the app (this is shown with the SDK in the app_instance_creation notebook.
* You must have the api_key associated with the deployed app instance.



## First, create a new App Instance

In this particular app, think of one "App Instance" as one "classifier" that you can specialize and use.

Use the `app_instance_creation.ipynb` to create an App Instance and then copy its `base_url` into the cell below:

In [189]:
#The base_url for the Steamship app, from the other notebook
base_url = 'https://assembled.steamship.run/ticket-tagging-api-20220626/ticket-tagging-api-20220626/'

In [190]:
# Your Steamship API key.
#
# This has either been provided to you, or you can run:
#
#    npm install -g @steamship/cli
#    ship login
#    ship user:info
#
# to view it.
api_key = ""

In [191]:
headers = {"Authorization": f"Bearer {api_key}"}

In [192]:
# Tiny wrapper for post calls of json data to the app.
def app_call(route: str, parameters: dict):
    return requests.post(base_url + route, headers=headers, json=parameters)

## You can always check the status of your classifier with `specialize_status`
* A response of `UNSPECIALIZED` means the app will classify data points using a large language model which doesn't need any training.
* A response of `SPECIALIZED` means the app will classify with a trained classifier based on a dataset you have provided.

In [193]:
# We haven't specialized our model yet, so this should return UNSPECIALIZED
response = app_call('specialize_status', {})
response.json()

{'status': 'UNSPECIALIZED'}

## First - a toy example for demonstrating the basic API

In [194]:
# clear any previous examples
app_call('clear_examples',{}).text

'Removed 0 examples'

In [195]:
# Set the labels we'll use for classification
response = app_call('set_labels',{'labels':['lion','tiger','elephant']})
response.text

'Labels accepted'

In [196]:
# Try a ticket classification (no training yet, so this is zero-shot)
response = app_call('tag_ticket',{'ticket_text':'I like big cats'})
response.json()

{'tiger': 0.6951351165771484,
 'lion': 0.6889989376068115,
 'elephant': 0.1538177877664566}

In [197]:
# Create some example tickets
examples = [('This is a ticket about lions.', ['lion']), ('This is a ticket about tigers.',['tiger']), ('This is a ticket about an elephant.',['elephant'])]

In [198]:
# Push the examples to the app
for example in examples:
    response = app_call('add_example', {'ticket_text':example[0], 'labels':example[1]})
    print(response.text)

Example accepted
Example accepted
Example accepted


In [199]:
# How many examples have we added?
response = app_call('count_examples', {})
response.json()

{'total_examples': 3, 'label_examples': {'elephant': 1, 'lion': 1, 'tiger': 1}}

In [216]:
# We haven't specialized our model yet, so this should return UNSPECIALIZED
response = app_call('specialize_status', {})
response.json()

{'status': 'SPECIALIZATION_IN_PROGRESS'}

In [201]:
# Remove the three toy examples
app_call('clear_examples',{}).text

'Removed 3 examples'

## Now, let's attempt with real data

In [202]:
# Read data adapted from test dataset, stripped to just the text to be
# tagged and lists of 'cleaned' tags (no underscores or leading or trailing spaces)
with open('test_data.json') as data_file:
    data = json.load(data_file)

len(data)

2133

In [203]:
# Find all unique tag labels
labels = list(set([label for row in data for label in row['tags']]))

In [204]:
# Set the app's labels
response = app_call('set_labels',{'labels':labels})
response.text

'Labels accepted'

In [205]:
# Tag an example ticket.  We haven't specialized yet, so this is zero-shot
response = app_call('tag_ticket',{'ticket_text':data[0]['text']})
response.json()

{'non-support question': 0.7193117141723633,
 'people': 0.7233811616897583,
 'realtime overview': 0.2153659462928772,
 'needs review': 0.988425076007843,
 'alerts': 0.5230134129524231,
 'integratons': 0.7087873220443726,
 'closed by merge': 0.008600217290222645,
 'spam': 0.003957767505198717,
 'login': 0.5004848837852478,
 'recruiting': 0.1867390275001526,
 'feature request': 0.8226466774940491,
 'api': 0.8333175778388977,
 'security': 0.23934422433376312,
 'how do i': 0.7680777907371521,
 'performance': 0.853554368019104,
 'self-served': 0.002928373636677861,
 'why isn t this working': 0.6911810636520386,
 'system credit card redaction': 0.06164867430925369,
 'time off': 0.06952428817749023,
 'ticket mapping/queues': 0.07419668883085251,
 'web widget': 0.24844679236412048,
 'assembled action needed': 0.9657627940177917,
 'not a support issue': 0.0005713388090953231,
 'reports': 0.5989463925361633,
 'bug': 0.5133242607116699,
 'alerts acknowledged': 0.9112119674682617,
 'datadog alerts

## Next lets specialize the model

There were not enough examples of all of the ticket types in the dataset, so we'll demonstrate here with only the top 10 ticket types.

In [206]:
top_10_tags = ['how do i',
 'n/a',
 'why isn t this working',
 'not a support issue',
 'datadog alerts support',
 'datadog alerts',
 'forecasting and staffing',
 'scheduling',
 'inbound sales prospect',
 'people']

In [207]:
# Set the labels in the app
response = app_call('set_labels',{'labels':top_10_tags})
response.text

'Labels accepted'

In [208]:
# Create dataset with only the top tags
top_tag_data = [dict(text=row['text'], tags=[tag for tag in row['tags'] if tag in top_10_tags]) for row in data] 

In [209]:
# Add examples to the app.
#
# NOTE:
# This cell will take a while to run, as we're adding each example one at a time.
for row in top_tag_data:
    response = app_call('add_example', {'ticket_text':row['text'], 'labels':row['tags']})
    

In [210]:
# Count the total examples per ticket.
# Note the total here is less than the total in the dataset, because after filtering to the top 10 tags, some examples don't have any tags anymore.
response = app_call('count_examples', {})
response.json()

{'total_examples': 1908,
 'label_examples': {'how do i': 716,
  'not a support issue': 255,
  'people': 188,
  'inbound sales prospect': 191,
  'scheduling': 191,
  'why isn t this working': 386,
  'datadog alerts support': 194,
  'forecasting and staffing': 191,
  'n/a': 450,
  'datadog alerts': 194}}

In [211]:
# Start the specialization (training) process
#
# NOTE:
#  - This process will take up to FIVE HOURS for full training.
#  - During that time, you can continue using the app (via UNSPECIALIZED mode)
#  - While specializing, the status will be reported as: `SPECIALIZATION_IN_PROGRESS`
#
response = app_call('start_specialize',{})
response.text

'Started specialization. Please poll specialize_status'

In [229]:
# Check with the app and make sure training started successfully.
response = app_call('specialize_status', {})
response.json()


{'status': 'UNSPECIALIZED'}

In [228]:
# Can still tag tickets with zero-shot while waiting
response = app_call('tag_ticket',{'ticket_text':'Another example ticket about Adherence'})
response.json()

{'not a support issue': 0.0013767130440101027,
 'n/a': 0.0007463483489118516,
 'datadog alerts': 0.001233422663062811,
 'why isn t this working': 0.009248224087059498,
 'scheduling': 0.0002453002380207181,
 'how do i': 0.22573964297771454,
 'forecasting and staffing': 0.0004400373436510563,
 'datadog alerts support': 0.0009663355886004865,
 'people': 0.007066971622407436,
 'inbound sales prospect': 0.010559424757957458}

In [214]:
# Now need to wait ~5 hours, or poll continuously
response = app_call('specialize_status', {})
response.json()

{'status': 'SPECIALIZATION_IN_PROGRESS'}

In [215]:
# Tag an example ticket.  We've specialized, so now we're using the trained model under the hood.
response = app_call('tag_ticket',{'ticket_text':data[0]['text']})
response.json()

{'scheduling': 0.06945469975471497,
 'datadog alerts': 0.5168055891990662,
 'datadog alerts support': 0.6092717051506042,
 'inbound sales prospect': 0.2414204329252243,
 'n/a': 0.04905194044113159,
 'why isn t this working': 0.6911810636520386,
 'how do i': 0.7680777907371521,
 'forecasting and staffing': 0.013277684338390827,
 'not a support issue': 0.0005713388090953231,
 'people': 0.7233811616897583}