In [10]:
# load packages
import pandas as pd
from easynmt import EasyNMT
import numpy as np
!pip install vaderSentiment
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

### Load and prepare data

In [2]:
# load data
raw = pd.read_parquet('translated.parquet.gzip')

In [27]:
# access your multilingual data and ensure it is lowercase
multi = pd.DataFrame()
multi['trans_text'] = raw.trans_text.str.lower()

### Translate multilingual tickets into English for processing

In [54]:
# build translation model
trans = EasyNMT('mbart50_m2en')

1.47kB [00:00, 94.3kB/s]                                                                                               


Downloading (‚Ä¶)lve/main/config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Downloading (‚Ä¶)"pytorch_model.bin";:   0%|          | 0.00/2.44G [00:00<?, ?B/s]

Downloading (‚Ä¶)neration_config.json:   0%|          | 0.00/268 [00:00<?, ?B/s]

Downloading (‚Ä¶)okenizer_config.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

Downloading (‚Ä¶)ncepiece.bpe.model";:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (‚Ä¶)cial_tokens_map.json:   0%|          | 0.00/649 [00:00<?, ?B/s]

In [55]:
# translate multilingual to english
strings = multi.trans_text.tolist()

multi['en_text'] = trans.translate(strings, target_lang='en', max_new_tokens=200)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250


### Create initial urgency ratings based on sentiment and strength of the negative sentiment

In [120]:
# run sentiment analysis to find negatives and potential negatives
sia = SentimentIntensityAnalyzer()

def v_polarity(text):
    return sia.polarity_scores(text)['compound']

def v_negativity(text):
    return sia.polarity_scores(text)['neg']

multi['polarity'] = multi['en_text'].apply(v_polarity)
multi['negativity'] = multi['en_text'].apply(v_negativity)

In [313]:
# label tickets as positive, negative, or neutral
min_positive = 0.3
min_neutral = 0

multi['polarity_class'] = np.select([((multi['polarity'] > min_positive) & ((multi['negativity'] == 0) | (multi['polarity'] > 0.7))), 
                                     ((multi['polarity'] < min_neutral) | (multi['negativity'] > 0)), 
                                     ((min_positive >= multi['polarity']) & (multi['polarity'] >= min_neutral))],
                                    ['Positive', 'Negative', 'Neutral'])

In [314]:
# find positive tickets and use for later
positives = multi[multi['polarity_class'] == 'Positive']

In [315]:
# measure 'urgency' or severity of the message to prioritize certain messages
non_positive = multi[multi['polarity_class'] != 'Positive'].sort_values(by='polarity')

##### level of negativity, for sure
##### keywords?

In [348]:
# classify urgency based on level of negativity
high_urgency_max = -0.5
mid_urgency_max = -0.35
low_urgency_min = -0.2

multi['urgency_polarity'] = np.select([(multi['polarity'] < high_urgency_max),
                                       ((multi['polarity'] > low_urgency_min) & (multi['polarity_class'] != "Positive")),
                                       ((mid_urgency_max <= multi['polarity']) & (multi['polarity'] <= low_urgency_min)),
                                       ((high_urgency_max <= multi['polarity']) & (multi['polarity'] <= mid_urgency_max))],
                                     [4, 1, 2, 3],
                                     -5).astype(int)

### Adjust urgency rating based on words and characters used in the ticket.

In [349]:
# keywords for urgency; score based on number of words in text
urgent_words = ['need', 'urgent', 'urgency', 'urgently', 'please', 'help', 'useless', 'immediate', 'immediately', 'dire', 'asap', 'pay', 'paid', 'worst', 'worse', 'terrible', 'terribly', 'broke', 'broken', 'disappoint', 'disappointed', 'disappointingly', 'disappointing', 'quick', 'fast']
urgent_symbols = ['!', '$', '?']

def urgent_word_count(text):
    count = 0
    for symbol in urgent_symbols:
        count += text.count(symbol)/2
    
    text = ''.join([a for a in text if a.isalpha() or a == " "])
    
    to_check = text.split()
    for word in urgent_words:
        count += to_check.count(word)
  
    return count

In [350]:
# get counts of urgent text
multi['urgency_text'] = multi['en_text'].apply(urgent_word_count)

In [352]:
# get key urgency stats
urgency_text_90th_p = multi.urgency_text.iloc[round(len(multi.urgency_text)*.1)]
urgency_text_75th_p = multi.urgency_text.iloc[round(len(multi.urgency_text)*.25)]

In [353]:
# increase urgency level based on num of urgency text score
multi['urgency_rating'] = np.where(((multi['polarity'] != 'Positive') & (multi['urgency_text'] > urgency_text_75th_p) & (multi['urgency_polarity'] > 0)),
                                  multi['urgency_polarity'] + 1,
                                  multi['urgency_polarity'])

multi['urgency_rating'] = np.where(((multi['polarity'] != 'Positive') & (multi['urgency_text'] > urgency_text_90th_p) & (multi['urgency_polarity'] > 0)),
                                  multi['urgency_rating'] + 1,
                                  multi['urgency_rating'])

In [354]:
multi.sort_values(by='urgency_rating', ascending=False)

Unnamed: 0,trans_text,lang_detected,en_text,tb_polarity,v_polarity,p_polarity,polarity,polarity_class,urgency_polarity,negativity,positivity,urgency_text,urgency_rating,urgency_class
171,ok. to by≈Ço bardzo frustrujƒÖce. Mam wiele zada...,pl,ok. it was very frustrating. I have a lot of t...,-0.01,-0.6653,-0.01,-0.6653,Negative,4,0.223,0.0,2.0,6,HIGH
232,"–≠—Ç–æ —á—Ç–æ –Ω–∞ –º–æ–µ–º —ç–∫—Ä–∞–Ω–µ? –ù–µ —É–≤–µ—Ä–µ–Ω, —á—Ç–æ —ç—Ç–æ —Ö–±–æ...",ru,Is this what's on my screen? Not sure that it ...,-0.216667,-0.6921,-0.216667,-0.6921,Negative,4,0.156,0.0,2.5,6,HIGH
31,somebody from please help meeeeee üò©üò©üò©üò© i'm hav...,en,somebody from please help meeeeee üò©üò©üò©üò© i'm hav...,-1.0,-0.5423,-0.8,-0.5423,Negative,4,0.362,0.232,3.0,6,HIGH
445,Âó®! ÊÄé‰πàÂõû‰∫ã? YouTube‰∏¢‰∫ÜÂÆÉÂêó? ÊÄé‰πàÂÅö? Ë∞¢Ë∞¢‰Ω†ÁöÑÊîØÊåÅ!,zh-cn,Hey! Hey! What's the matter? Did YouTube lose ...,0.0,-0.4471,0.0,-0.4471,Negative,3,0.171,0.258,3.5,5,HIGH
361,Is het mogelijk om amzl te verhinderen mijn pa...,nl,Is it possible to prevent amzl from sending my...,-0.2,-0.8176,-0.2,-0.8176,Negative,4,0.363,0.046,1.5,5,HIGH
53,est la pire isp que j‚Äôai jamais eue,fr,is the worst isp I‚Äôve ever had,-1.0,-0.6249,-1.0,-0.6249,Negative,4,0.406,0.0,1.0,5,HIGH
377,„Å©„ÅÜ„Åó„Åü„Çì„Å†?„Å©„ÅÜ„Åó„Åü„ÅÆ?,ja,what's wrong? what's wrong?,-0.5,-0.7622,-0.5,-0.7622,Negative,4,0.766,0.0,1.0,5,HIGH
393,„Éì„Éá„Ç™„Ç≤„Éº„É†„ÅÆ„Éó„É™„Ç™„Éº„ÉÄ„Éº„Çí20%„Ç™„Éï„Å´„Åô„Çã„ÅÆ„Çí„ÇÑ„ÇÅ„Åü„Çâ Ê∑∑‰π±„Åó„Åæ„Åó„Åü„Åã? „Å™„Çì„Åß?,ja,Did you get confused when you stopped getting ...,-0.4,-0.5514,-0.4,-0.5514,Negative,4,0.233,0.0,1.0,5,HIGH
193,"Proszƒô mi powiedzieƒá, dlaczego m√≥j up≈ÇynƒÖ≈Ç, al...",pl,Please tell me why mine expired but I am still...,0.0,-0.3632,0.0,-0.3632,Negative,3,0.229,0.119,2.0,5,HIGH
378,„ÇÇ„ÅÜ „Ç¢„Éû„Çæ„É≥„ÅØ‰Ωø„Çè„Å™„ÅÑ! ÂïÜÂìÅ„Åå„ÄåÂá∫Ëç∑„Äç„Å´„Å™„Å£„Åü„ÅÆ„Åß ‰∏ÄÊó•‰∏≠ÂæÖ„Å§„Å® ÈñìÈÅï„Å£„ÅüÂõΩ„Å´ÈÄÅ„Å£„Åü„Å†„Åë„Åß„Åô!,ja,i don't use the amazon anymore! The goods were...,-0.625,-0.6372,-0.625,-0.6372,Negative,4,0.191,0.0,1.0,5,HIGH


In [365]:
multi['urgency_class'] = np.select([(multi['urgency_rating'] == 6), (multi['urgency_rating'] == 5), (multi['urgency_rating'] == 4), (multi['urgency_rating'] == 3), (multi['urgency_rating'] == 2), ((multi['urgency_rating'] == 1) & (multi['urgency_text'] > 0)), ((multi['urgency_rating'] == 1) & (multi['urgency_text'] == 0))],
                                  ['HIGH', 'HIGH', 'Mid-High', 'Low-Mid', 'Low', 'Lowest', 'None'],
                                  'None')

In [370]:
multi.urgency_class = pd.Categorical(multi.urgency_class,
                                    categories=['None', 'Lowest', 'Low', 'Low-Mid', 'Mid-High', 'HIGH'],
                                    ordered=True)

In [372]:
multi.sort_values(by='urgency_class', ascending=False)

Unnamed: 0,trans_text,lang_detected,en_text,tb_polarity,v_polarity,p_polarity,polarity,polarity_class,urgency_polarity,negativity,positivity,urgency_text,urgency_rating,urgency_class
445,Âó®! ÊÄé‰πàÂõû‰∫ã? YouTube‰∏¢‰∫ÜÂÆÉÂêó? ÊÄé‰πàÂÅö? Ë∞¢Ë∞¢‰Ω†ÁöÑÊîØÊåÅ!,zh-cn,Hey! Hey! What's the matter? Did YouTube lose ...,0.0,-0.4471,0.0,-0.4471,Negative,3,0.171,0.258,3.5,5,HIGH
6,is the worst customer service,en,is the worst customer service,-1.0,-0.6249,-1.0,-0.6249,Negative,4,0.506,0.0,1.0,5,HIGH
393,„Éì„Éá„Ç™„Ç≤„Éº„É†„ÅÆ„Éó„É™„Ç™„Éº„ÉÄ„Éº„Çí20%„Ç™„Éï„Å´„Åô„Çã„ÅÆ„Çí„ÇÑ„ÇÅ„Åü„Çâ Ê∑∑‰π±„Åó„Åæ„Åó„Åü„Åã? „Å™„Çì„Åß?,ja,Did you get confused when you stopped getting ...,-0.4,-0.5514,-0.4,-0.5514,Negative,4,0.233,0.0,1.0,5,HIGH
361,Is het mogelijk om amzl te verhinderen mijn pa...,nl,Is it possible to prevent amzl from sending my...,-0.2,-0.8176,-0.2,-0.8176,Negative,4,0.363,0.046,1.5,5,HIGH
31,somebody from please help meeeeee üò©üò©üò©üò© i'm hav...,en,somebody from please help meeeeee üò©üò©üò©üò© i'm hav...,-1.0,-0.5423,-0.8,-0.5423,Negative,4,0.362,0.232,3.0,6,HIGH
53,est la pire isp que j‚Äôai jamais eue,fr,is the worst isp I‚Äôve ever had,-1.0,-0.6249,-1.0,-0.6249,Negative,4,0.406,0.0,1.0,5,HIGH
378,„ÇÇ„ÅÜ „Ç¢„Éû„Çæ„É≥„ÅØ‰Ωø„Çè„Å™„ÅÑ! ÂïÜÂìÅ„Åå„ÄåÂá∫Ëç∑„Äç„Å´„Å™„Å£„Åü„ÅÆ„Åß ‰∏ÄÊó•‰∏≠ÂæÖ„Å§„Å® ÈñìÈÅï„Å£„ÅüÂõΩ„Å´ÈÄÅ„Å£„Åü„Å†„Åë„Åß„Åô!,ja,i don't use the amazon anymore! The goods were...,-0.625,-0.6372,-0.625,-0.6372,Negative,4,0.191,0.0,1.0,5,HIGH
377,„Å©„ÅÜ„Åó„Åü„Çì„Å†?„Å©„ÅÜ„Åó„Åü„ÅÆ?,ja,what's wrong? what's wrong?,-0.5,-0.7622,-0.5,-0.7622,Negative,4,0.766,0.0,1.0,5,HIGH
232,"–≠—Ç–æ —á—Ç–æ –Ω–∞ –º–æ–µ–º —ç–∫—Ä–∞–Ω–µ? –ù–µ —É–≤–µ—Ä–µ–Ω, —á—Ç–æ —ç—Ç–æ —Ö–±–æ...",ru,Is this what's on my screen? Not sure that it ...,-0.216667,-0.6921,-0.216667,-0.6921,Negative,4,0.156,0.0,2.5,6,HIGH
370,"Ik betaalde voor vandaag, kwam niet. Waarom ni...",nl,"I paid for today, didn't come. Why not? I paid...",-0.3,-0.4871,-0.3,-0.4871,Negative,3,0.143,0.0,3.0,5,HIGH
