In [25]:
# load packages
import pandas as pd
# !pip install deep_translator
from deep_translator import GoogleTranslator, exceptions
# !pip install luga
from luga import language
import numpy as np
# !pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

### Load and prepare data

In [3]:
# load data
raw = pd.read_parquet('translated.parquet.gzip')

In [4]:
# access your multilingual data and ensure it is lowercase
multi = pd.DataFrame()
multi['trans_text'] = raw.trans_text.str.lower()

### Translate multilingual tickets into English for processing

In [9]:
def translate_str(text):
    lang = language(text).name
    if lang == 'zh':
        lang = 'zh-CN'
    try:
        trans = GoogleTranslator(source=lang, target='en').translate(text)
    except exceptions.LanguageNotSupportedException:
        trans = text
        
    return trans, lang.upper()

In [10]:
# translate multilingual to english
multi['en_text'] = multi.trans_text.apply(translate_str).str[0]

### Create initial urgency ratings based on sentiment and strength of the negative sentiment

In [12]:
# run sentiment analysis to find negatives and potential negatives
sia = SentimentIntensityAnalyzer()

def v_polarity(text):
    return sia.polarity_scores(text)['compound']

def v_negativity(text):
    return sia.polarity_scores(text)['neg']

multi['polarity'] = multi['en_text'].apply(v_polarity)
multi['negativity'] = multi['en_text'].apply(v_negativity)

In [13]:
# label tickets as positive, negative, or neutral
min_positive = 0.3
min_neutral = 0

multi['polarity_class'] = np.select([((multi['polarity'] > min_positive) & ((multi['negativity'] == 0) | (multi['polarity'] > 0.7))), 
                                     ((multi['polarity'] < min_neutral) | (multi['negativity'] > 0)), 
                                     ((min_positive >= multi['polarity']) & (multi['polarity'] >= min_neutral))],
                                    ['Positive', 'Negative', 'Neutral'])

In [14]:
# classify urgency based on level of negativity
high_urgency_max = -0.5
mid_urgency_max = -0.35
low_urgency_min = -0.2

multi['urgency_polarity'] = np.select([(multi['polarity'] < high_urgency_max),
                                       ((multi['polarity'] > low_urgency_min) & (multi['polarity_class'] != "Positive")),
                                       ((mid_urgency_max <= multi['polarity']) & (multi['polarity'] <= low_urgency_min)),
                                       ((high_urgency_max <= multi['polarity']) & (multi['polarity'] <= mid_urgency_max))],
                                     [4, 1, 2, 3],
                                     -5).astype(int)

### Adjust urgency rating based on words and characters used in the ticket

In [15]:
# keywords for urgency; score based on number of words in text
urgent_words = ['need', 'urgent', 'urgency', 'urgently', 'please', 'help', 'useless', 'immediate', 'immediately', 'dire', 'asap', 'pay', 'paid', 'worst', 'worse', 'terrible', 'terribly', 'broke', 'broken', 'disappoint', 'disappointed', 'disappointingly', 'disappointing', 'quick', 'fast']
urgent_symbols = ['!', '$', '?']

def urgent_word_count(text):
    count = 0
    for symbol in urgent_symbols:
        count += text.count(symbol)/2
    
    text = ''.join([a for a in text if a.isalpha() or a == " "])
    
    to_check = text.split()
    for word in urgent_words:
        count += to_check.count(word)
  
    return count

In [16]:
# get counts of urgent text
multi['urgency_text'] = multi['en_text'].apply(urgent_word_count)

In [17]:
# get key urgency stats
urgency_text_90th_p = multi.urgency_text.iloc[round(len(multi.urgency_text)*.1)]
urgency_text_75th_p = multi.urgency_text.iloc[round(len(multi.urgency_text)*.25)]

In [18]:
# increase urgency level based on num of urgency text score
multi['urgency_rating'] = np.where(((multi['polarity'] != 'Positive') & (multi['urgency_text'] > urgency_text_75th_p) & (multi['urgency_polarity'] > 0)),
                                  multi['urgency_polarity'] + 1,
                                  multi['urgency_polarity'])

multi['urgency_rating'] = np.where(((multi['polarity'] != 'Positive') & (multi['urgency_text'] > urgency_text_90th_p) & (multi['urgency_polarity'] > 0)),
                                  multi['urgency_rating'] + 1,
                                  multi['urgency_rating'])

### Create final urgency classification based on above

In [19]:
# create classification based on urgency ratings
multi['urgency_class'] = np.select([(multi['urgency_rating'] == 6), (multi['urgency_rating'] == 5), (multi['urgency_rating'] == 4), (multi['urgency_rating'] == 3), (multi['urgency_rating'] == 2), ((multi['urgency_rating'] == 1) & (multi['urgency_text'] > 0)), ((multi['urgency_rating'] == 1) & (multi['urgency_text'] == 0))],
                                  ['HIGH', 'HIGH', 'Mid-High', 'Low-Mid', 'Low', 'Lowest', 'None'],
                                  'None')

In [23]:
# make urgency classes sortable
multi.urgency_class = pd.Categorical(multi.urgency_class,
                                    categories=['None', 'Lowest', 'Low', 'Low-Mid', 'Mid-High', 'HIGH'],
                                    ordered=True)

multi.sort_values(by='urgency_rating', ascending=False, inplace=True)

### View the final dataset to evaluate tickets based on final classification

In [24]:
multi

Unnamed: 0,trans_text,en_text,polarity,negativity,polarity_class,urgency_polarity,urgency_text,urgency_rating,urgency_class
361,is het mogelijk om amzl te verhinderen mijn pa...,is it possible to prevent amzl from sending my...,-0.8176,0.363,Negative,4,1.5,6,HIGH
232,"это что на моем экране? не уверен, что это хбо...",what's on my screen? not sure if it's xbox sid...,-0.6921,0.175,Negative,4,2.5,6,HIGH
31,somebody from please help meeeeee 😩😩😩😩 i'm hav...,somebody from please help meeeeee 😩😩😩😩 i'm hav...,-0.5423,0.362,Negative,4,3.0,6,HIGH
378,もう アマゾンは使わない! 商品が「出荷」になったので 一日中待つと 間違った国に送っただけです!,I don't use Amazon anymore! I waited all day b...,-0.6372,0.199,Negative,4,1.0,5,HIGH
136,"porque é que havia uma """" half gig update """" p...","why was there a """"half gig update"""" for world ...",-0.5994,0.281,Negative,4,0.5,5,HIGH
...,...,...,...,...,...,...,...,...,...
233,5 приложение на xboxes не работает код ошибки ...,"5 app on xboxes not working error code 200, di...",0.5511,0.000,Positive,-5,1.5,-5,
230,выкупил код для fifa точек сегодня вечером и д...,redeemed the code for fifa points tonight and ...,0.3182,0.000,Positive,-5,0.5,-5,
225,"kinda нужно убедиться, что у меня есть 680$ cd...",kinda need to make sure i have 680$ cdn in my ...,0.6249,0.000,Positive,-5,1.5,-5,
223,"когда кто-то или знает, когда xbox один x кора...",when someone or knows when xbox one x ships le...,0.4215,0.000,Positive,-5,0.0,-5,
