In [1]:
# load packages
import pandas as pd
# !pip install deep_translator
from deep_translator import GoogleTranslator, exceptions
# !pip install luga
from luga import language
import numpy as np
# !pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

### Load and prepare data

In [18]:
# load data
raw = pd.read_parquet('translated.parquet.gzip')

In [19]:
# access your multilingual data and ensure it is lowercase
data = pd.DataFrame()
data['input'] = raw.trans_text.str.lower()

### Translate multilingual tickets into English for processing

In [21]:
def translate_str(text):
    lang = language(text).name
    if lang == 'zh':
        lang = 'zh-CN'
    try:
        trans = GoogleTranslator(source=lang, target='en').translate(text)
    except exceptions.LanguageNotSupportedException:
        trans = text
        
    return trans, lang.upper()

In [26]:
# translate multilingual to english
data['input_english'] = data.input.apply(translate_str).str[0]

### Create initial urgency ratings based on sentiment and strength of the negative sentiment

In [27]:
# run sentiment analysis to find negatives and potential negatives
sia = SentimentIntensityAnalyzer()

def v_polarity(text):
    return sia.polarity_scores(text)['compound']

def v_negativity(text):
    return sia.polarity_scores(text)['neg']

data['polarity'] = data['input_english'].apply(v_polarity)
data['negativity'] = data['input_english'].apply(v_negativity)

In [28]:
# label tickets as positive, negative, or neutral
min_positive = 0.3
min_neutral = 0

data['sentiment'] = np.select([((data['polarity'] > min_positive) & ((data['negativity'] == 0) | (data['polarity'] > 0.7))),
                                ((data['polarity'] < min_neutral) | (data['negativity'] > 0)),
                                ((min_positive >= data['polarity']) & (data['polarity'] >= min_neutral))],
                                ['Positive', 'Negative', 'Neutral'])

In [29]:
# classify urgency based on level of negativity
high_urgency_max = -0.5
mid_urgency_max = -0.35
low_urgency_min = -0.2

data['urgency_polarity'] = np.select([(data['polarity'] < high_urgency_max),
                                      ((data['polarity'] > low_urgency_min) & (data['sentiment'] != "Positive")),
                                      ((mid_urgency_max <= data['polarity']) & (data['polarity'] <= low_urgency_min)),
                                      ((high_urgency_max <= data['polarity']) & (data['polarity'] <= mid_urgency_max))],
                                     [4, 1, 2, 3],
                                     0).astype(int)

### Adjust urgency rating based on words and characters used in the ticket

In [30]:
# keywords for urgency; score based on number of words in text
urgent_words = ['need', 'urgent', 'urgency', 'urgently', 'please', 'help', 'useless', 'immediate', 'immediately',
                'dire', 'asap', 'pay', 'paid', 'worst', 'worse', 'terrible', 'terribly', 'broke', 'broken',
                'disappoint', 'disappointed', 'disappointingly', 'disappointing', 'quick', 'fast',
                'dying', 'dead', 'death', 'kill', 'important', 'serious', 'pressing', 'now']
urgent_symbols = ['!', '$', '?']


def urgent_word_count(text):
    count = 0
    for symbol in urgent_symbols:
        count += text.count(symbol)

    text = ''.join([a for a in text if a.isalpha() or a == " "])

    to_check = text.split()
    for word in urgent_words:
        count += to_check.count(word)

    return count

In [31]:
# get counts of urgent text
data['urgency_text'] = data['input_english'].apply(urgent_word_count)

In [32]:
# set levels for increasing urgency based on text counts
if len(data) > 20:
    urgency_text_level2 = data.urgency_text.iloc[round(len(data.urgency_text) * .1)]
    urgency_text_level1 = data.urgency_text.iloc[round(len(data.urgency_text) * .25)]
else:
    urgency_text_level2 = 4
    urgency_text_level1 = 2

In [33]:
# increase urgency level based on category and text count score
data['rating'] = np.where(((data['urgency_text'] > urgency_text_level2) & (data['urgency_polarity'] > 0)),
                                      data['urgency_polarity'] + 1,
                                      data['urgency_polarity'])

data['rating'] = np.where(((data['urgency_text'] > urgency_text_level1) & (data['urgency_polarity'] > 0)),
                                  data['rating'] + 1,
                                  data['rating'])

data['rating'] = np.where((data['polarity'] == 'Positive'),
                                  data['rating'] * 2,
                                  data['rating'])

data['rating'] = np.where((data['polarity'] == 'Neutral'),
                                  (data['rating']-1)*2 + 1,
                                  data['rating'])

### Create final urgency classification based on above

In [34]:
# create classification based on urgency ratings
data['classification'] = np.select([(data['rating'] == 6),
                                        (data['rating'] == 5),
                                        (data['rating'] == 4),
                                        (data['rating'] == 3),
                                        (data['rating'] == 2),
                                        ((data['rating'] == 1) & (data['urgency_text'] > 0)),
                                        ((data['rating'] == 1) & (data['urgency_text'] == 0))],
                                       ['HIGH', 'HIGH', 'Mid-High', 'Low-Mid', 'Low', 'Lowest', 'No Negativity or Urgency'],
                                       'No Negativity or Urgency')

In [35]:
# make urgency classes sortable
data.classification = pd.Categorical(data.classification,
                                         categories=['No Negativity or Urgency', 'Lowest', 'Low', 'Low-Mid', 'Mid-High', 'HIGH'],
                                         ordered=True)
data.sentiment = pd.Categorical(data.sentiment,
                                categories=['Negative', 'Neutral', 'Positive'],
                                ordered=True)

data.sort_values(by='rating', ascending=False, inplace=True)
data = data[['input', 'input_english', 'classification', 'sentiment', 'negativity', 'rating']]
data.columns = data.columns.str.capitalize()

### View the final dataset to evaluate tickets based on final classification

In [36]:
data

Unnamed: 0,Input,Input_english,Classification,Sentiment,Negativity,Rating
378,もう アマゾンは使わない! 商品が「出荷」になったので 一日中待つと 間違った国に送っただけです!,I don't use Amazon anymore! I waited all day b...,HIGH,Negative,0.199,6
361,is het mogelijk om amzl te verhinderen mijn pa...,is it possible to prevent amzl from sending my...,HIGH,Negative,0.363,6
377,どうしたんだ?どうしたの?,What's wrong? What's wrong?,HIGH,Negative,0.766,6
232,"это что на моем экране? не уверен, что это хбо...",what's on my screen? not sure if it's xbox sid...,HIGH,Negative,0.175,6
31,somebody from please help meeeeee 😩😩😩😩 i'm hav...,somebody from please help meeeeee 😩😩😩😩 i'm hav...,HIGH,Negative,0.362,6
...,...,...,...,...,...,...
233,5 приложение на xboxes не работает код ошибки ...,"5 app on xboxes not working error code 200, di...",No Negativity or Urgency,Positive,0.000,0
230,выкупил код для fifa точек сегодня вечером и д...,redeemed the code for fifa points tonight and ...,No Negativity or Urgency,Positive,0.000,0
225,"kinda нужно убедиться, что у меня есть 680$ cd...",kinda need to make sure i have 680$ cdn in my ...,No Negativity or Urgency,Positive,0.000,0
223,"когда кто-то или знает, когда xbox один x кора...",when someone or knows when xbox one x ships le...,No Negativity or Urgency,Positive,0.000,0
