# 6. MELD+Tweets, Word-embeddings

## 6.1 Data preparation and filtering

(a) Loading the MELD data and dropping the 'Neutral' label

In [1]:
import pandas as pd
filepath = './data/MELD/train_sent_emo.csv'
meld_dftrain = pd.read_csv(filepath)
meld_dftrain['Utterance'] = meld_dftrain['Utterance'].str.replace("\x92|\x97|\x91|\x93|\x94|\x85", "'")

filepath = './data/MELD/test_sent_emo.csv'
meld_dftest = pd.read_csv(filepath)
meld_dftest['Utterance'] = meld_dftest['Utterance'].str.replace("\x92|\x97|\x91|\x93|\x94|\x85", "'")

meld_dftrain = meld_dftrain.set_index("Emotion", drop=False)
meld_dftrain = meld_dftrain.drop("neutral", axis=0)

meld_dftest = meld_dftest.set_index("Emotion", drop=False)
meld_dftest = meld_dftest.drop("neutral", axis=0)

  meld_dftrain['Utterance'] = meld_dftrain['Utterance'].str.replace("\x92|\x97|\x91|\x93|\x94|\x85", "'")
  meld_dftest['Utterance'] = meld_dftest['Utterance'].str.replace("\x92|\x97|\x91|\x93|\x94|\x85", "'")


(b) Loading the Tweets data

In [2]:
filepath = 'data/wassa/training/all.train.tsv'
tweets_dftrain = pd.read_csv(filepath, sep='\t')

filepath = 'data/wassa/testing/all.test.tsv'
tweets_dftest = pd.read_csv(filepath, sep='\t')

(c) Combining the two data sets
  -  Rename the following axes the same names:
      -  MELD  "Sr No.": "ID", "Utterance": "Sent"
      -  Tweets  "Tweet": "Sent", "Label": "Emotion"
  - Concatenate the two dataframes in a way that their IDs, sentences and labels are aligned. Two additional keys: "MELD" and "Tweets" are added to identify from where a particular entry is.

In [3]:
# Change index of MELD back to number sequence
meld_dftrain = meld_dftrain.set_index(pd.Series(list(range(len(meld_dftrain)))))
meld_dftest = meld_dftest.set_index(pd.Series(list(range(len(meld_dftest)))))

# Training data
meld_dftrain = meld_dftrain.rename(columns={"Sr No.": "ID", "Utterance": "Sent"})
tweets_dftrain = tweets_dftrain.rename(columns={"Tweet": "Sent", "Label": "Emotion"})
combined_dftrain = pd.concat([meld_dftrain, tweets_dftrain], keys=['MELD', 'Tweets'])

# Test data
meld_dftest = meld_dftest.rename(columns={"Sr No.": "ID", "Utterance": "Sent"})
tweets_dftest = tweets_dftest.rename(columns={"Tweet": "Sent", "Label": "Emotion"})
combined_dftest = pd.concat([meld_dftest, tweets_dftest], keys=['MELD', 'Tweets'])

In [4]:
# Check the changes
meld_dftrain.head()

Unnamed: 0,ID,Sent,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime
0,5,My duties? All right.,Chandler,surprise,positive,0,4,8,21,"00:16:34,452","00:16:40,917"
1,11,No don't I beg of you!,Chandler,fear,negative,0,10,8,21,"00:17:02,856","00:17:04,858"
2,13,Really?!,Chandler,surprise,positive,0,12,8,21,"00:17:13,491","00:17:16,536"
3,15,But then who? The waitress I went out with las...,Joey,surprise,negative,1,0,9,23,"00:36:40,364","00:36:42,824"
4,16,You know? Forget it!,Rachel,sadness,negative,1,1,9,23,"00:36:44,368","00:36:46,578"


In [5]:
tweets_dftrain.head()

Unnamed: 0,ID,Sent,Emotion,Score
0,10000,How the fu*k! Who the heck! moved my fridge!.....,anger,0.938
1,10001,So my Indian Uber driver just called someone t...,anger,0.896
2,10002,@DPD_UK I asked for my parcel to be delivered ...,anger,0.896
3,10003,so ef whichever butt wipe pulled the fire alar...,anger,0.896
4,10004,Don't join @BTCare they put the phone down on ...,anger,0.896


In [6]:
combined_dftrain.head()

Unnamed: 0,Unnamed: 1,ID,Sent,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,Score
MELD,0,5,My duties? All right.,Chandler,surprise,positive,0.0,4.0,8.0,21.0,"00:16:34,452","00:16:40,917",
MELD,1,11,No don't I beg of you!,Chandler,fear,negative,0.0,10.0,8.0,21.0,"00:17:02,856","00:17:04,858",
MELD,2,13,Really?!,Chandler,surprise,positive,0.0,12.0,8.0,21.0,"00:17:13,491","00:17:16,536",
MELD,3,15,But then who? The waitress I went out with las...,Joey,surprise,negative,1.0,0.0,9.0,23.0,"00:36:40,364","00:36:42,824",
MELD,4,16,You know? Forget it!,Rachel,sadness,negative,1.0,1.0,9.0,23.0,"00:36:44,368","00:36:46,578",


In [7]:
combined_dftrain.tail()

# The data sets are concatenated

Unnamed: 0,Unnamed: 1,ID,Sent,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,Score
Tweets,3608,40781,@VivienLloyd Thank you so much! Just home - st...,,sadness,,,,,,,,0.104
Tweets,3609,40782,Just put the winter duvet on ☃️❄️🌬☔️,,sadness,,,,,,,,0.104
Tweets,3610,40783,@SilkInSide @TommyJoeRatliff that's so pretty!...,,sadness,,,,,,,,0.088
Tweets,3611,40784,@BluesfestByron second artist announcement loo...,,sadness,,,,,,,,0.083
Tweets,3612,40785,I can literally eat creamy pesto pasta topped ...,,sadness,,,,,,,,0.083


(d) Tokenizing and filtering

In [23]:
import spacy
nlp = spacy.load("en_core_web_sm")

# Using spaCy to tokenize the sentences
training_data_6 = [nlp(sent) for sent in list(combined_dftrain['Sent'])]
training_labels_6 = list(combined_dftrain['Emotion'])

test_data_6 = [nlp(sent) for sent in list(combined_dftest['Sent'])]
test_labels_6 = list(combined_dftest['Emotion'])

### Filter A

In [10]:
from utils import low_high_mid_df
min_df = 2
max_df = len(training_data_6)//10

low_df, high_df, clean6A = low_high_mid_df(min_df, max_df, training_data_6)

print("Rare words with low df = ", len(low_df), "words. Examples: ", list(low_df)[:20])
print("Stop words with high df:", high_df)
vocab_6A = set()
for sent in clean6A:
    for t in sent:
        vocab_6A.add(t)
print("Size of the rest vocab:", len(vocab_6A))
print("Samples:", clean6A[10:20])

Min_df 2
Max_df 889
Rare words with low df =  5972 words. Examples:  ['@susansarandon', '@ren102e906', '@darwinwaterson', '\\nhi', 'rapidly', 'territorial', 'hentai', 'furrie', '@tombrodude', '36', 'emotionalhedge', 'mansfieldhour', 'cadres', 'best', 'us\\ni', 'irrefutable', '@annalisewrobel', '@uber_rsa', '@pandaflo22', 'keeping']
Stop words with high df: {'!', 'have', '.', 'it', '?', 'of', 'be', 'do', 'my', 'and', '#', 'a', 'the', 'i', 'you', "n't", 'to', ',', 'that'}
Size of the rest vocab: 4915
Samples: [['just', 'coffee', 'where', 'we', 'gon', 'na', 'hang', 'out', 'now'], ['got'], [], ['um', '-', 'mm', 'yeah', 'right'], ['oh', 'god', 'oh', 'god', 'poor', 'monica'], ['what', 'what', 'what'], ['what'], ['he', 'think', 'monica', 'empty', 'she', 'empty', 'vase'], ['oh', 'totally', 'oh', 'god', 'oh', 'she', 'seem', 'so', 'happy', 'too'], ['hey']]


### Filter B

In [12]:
from utils import remove_DT_PRP

min_df = 2

low_df, DTandPRP_tok, clean6B = remove_DT_PRP(min_df, training_data_6)

print("Rare words with low df = ", len(low_df), "words. Examples:", list(low_df)[:20])
vocab_6B = set()
for sent in clean6B:
    for t in sent:
        vocab_6B.add(t)
print("Size of the rest vocab:", len(vocab_6B))
print("Samples:", clean6B[10:20])

Determiner and pronouns {"y'", '@reyesaverie', '@melissajoyrd', "it's", '@weebtard', '🍁', '@british_airways', 'it', 'mine', '\\n#you', 'any', 'each', 'this', 'ba', 'eagles.\\nthey', 'yours', 'strength.\\nthey', '👅', '\\nit', ':)', 'they', 'its', '_', "'em", '\\n\\nother', '’s', 'don’t', "you're'you", 'an', 'those', 'd', '@mhchat', 'herself', "that'you", '\\nimagine', '@ntfc', 'himself', '@adsbyflaherty', "film'that", 'tbh', 'my', '🐮', '😧', "up'i", '\uf62b', 'xx', 'memphis', 'also-', '@m_t_f_72', 'i', '❤', 'yourself', 'tho', "i'i'm", 'that?s', '@kevincanwaitcbs', 'near,\\nthe', 'her', '@rowillfindyou', 'ours', 'themselves', '@relaqss', 'his', 'bridgetjonesbaby', 'another', 'either', 'some', "'s", 'em', '@fra93_bruno', 'your', '@themathofyou', 'their', '@its.finfin', '🐈', '@snub23', '😡', '#', 'no', "i'y'know", 'hbu', "was'the", 'the', 'both', 'our', '\\n\\nsam', '@blackeyed_susie', '@missmeliss465', 'scarred,\\nthis', 'ek', "i'm", 'one', 'jut', '@sargon_of_akkad', 'ty', 'all', 'the-', '@

## 5.2 Word-embedding model and training the classifiers

(a) Encoding the labels

In [13]:
from sklearn import preprocessing

label_encoder = preprocessing.LabelEncoder()
label_encoder.fit(training_labels_6+test_labels_6)
print(list(label_encoder.classes_))

['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise']


In [14]:
training_classes = label_encoder.transform(training_labels_6)
print(training_classes[:5])
print(list(combined_dftrain['Emotion'])[:5])
print(list(combined_dftrain['Sent'])[:5])

[5 2 5 5 4]
['surprise', 'fear', 'surprise', 'surprise', 'sadness']
['My duties?  All right.', "No don't I beg of you!", 'Really?!', 'But then who? The waitress I went out with last month?', 'You know? Forget it!']


(b) Loading the embedding model

In [15]:
from gensim.test.utils import datapath, get_tmpfile
from gensim.models import KeyedVectors
from gensim.scripts.glove2word2vec import glove2word2vec

from os import path

wordembeddings="glove.twitter.27B.200d.txt"
glove_file = datapath(path.abspath('../glove/glove.twitter.27B.200d.txt'))

# Create a word2vec model from the Glove text data
tmp_file = get_tmpfile("test_word2vec.txt")
_ = glove2word2vec(glove_file, tmp_file)

word_embedding_model = KeyedVectors.load_word2vec_format(tmp_file)

# Dimensions set to 200.
num_features = 200

# Converting Index2Word
index2word_set = set(word_embedding_model.index_to_key)

  _ = glove2word2vec(glove_file, tmp_file)


### Filter A

In [16]:
from utils import featureVecMethod, getAvgFeatureVecs

trainFeatureVecs_6A, embedding_words_6A, no_embedding_words_6A = \
getAvgFeatureVecs(clean6A,
                  word_embedding_model, 
                  index2word_set, 
                  num_features
                  )

Shape of our matrix is: (8892, 200)
Review 0 of 8892
Review 1000 of 8892
Review 2000 of 8892
Review 3000 of 8892
Review 4000 of 8892
Review 5000 of 8892
Review 6000 of 8892
Review 7000 of 8892
Review 8000 of 8892


In [17]:
print(embedding_words_6A[:50])
print()
print(no_embedding_words_6A[:50])

['all', 'right', 'no', 'really', 'but', 'then', 'who', 'waitress', 'go', 'out', 'with', 'last', 'month', 'know', 'forget', 'no', '-', 'no', '-', 'no', '-', 'no', 'no', 'who', 'who', 'talk', 'about', 'no', '-', '-', '-', 'actually', 'know', 'ever', 'chris', 'say', 'they', 'close', 'down', 'bar', 'no', 'way', 'just', 'coffee', 'where', 'we', 'gon', 'na', 'hang', 'out']

[' ', "y'know", '...', '...', ' ', ' ', ' ', "y'know", ' ', '15', '...', '...', '...', ' ', ' ', '...', ' ', ' ', ' ', "i'm", ' ', ' ', "y'know", ' ', ' ', ' ', ' ', '...', '  ', ' ', "nothin'", "nothin'", "it's", "y'know", '  ', '...', ' ', ' ', ' ', ' ', ' ', '  ', "y'know", '...', ' ', ' ', ' ', ' ', ' ', '...']


In [18]:
from sklearn import svm
from sklearn.calibration import CalibratedClassifierCV

linear_model = svm.LinearSVC(max_iter=2000)
svm_linear_clf_6A = CalibratedClassifierCV(linear_model , method='sigmoid', cv=10)

svm_linear_clf_6A.fit(trainFeatureVecs_6A, training_classes)

CalibratedClassifierCV(base_estimator=LinearSVC(max_iter=2000), cv=10)

### Filter B

In [19]:
trainFeatureVecs_6B, embedding_words_6B, no_embedding_words_6B = \
getAvgFeatureVecs(clean6B,
                  word_embedding_model, 
                  index2word_set, 
                  num_features
                  )

Shape of our matrix is: (8892, 200)
Review 0 of 8892
Review 1000 of 8892
Review 2000 of 8892
Review 3000 of 8892
Review 4000 of 8892
Review 5000 of 8892
Review 6000 of 8892
Review 7000 of 8892
Review 8000 of 8892


In [20]:
print(embedding_words_6B[:50])
print()
print(no_embedding_words_6B[:50])

['?', 'all', 'right', '.', 'no', 'do', "n't", 'of', '!', 'really', '?', '!', 'but', 'then', 'who', '?', 'waitress', 'go', 'out', 'with', 'last', 'month', '?', 'know', '?', 'forget', '!', 'no', '-', '-', 'no', '-', 'no', ',', 'no', '!', 'who', ',', 'who', 'be', 'talk', 'about', '?', 'no', ',', 'i', '-', '-', 'i', '-']

[' ', "y'know", '...', '...', ' ', ' ', ' ', "y'know", ' ', '15', '...', '...', '...', ' ', ' ', '...', ' ', ' ', ' ', "i'm", ' ', ' ', "y'know", ' ', ' ', ' ', ' ', '...', '  ', ' ', "nothin'", "nothin'", "y'know", '  ', '...', ' ', ' ', ' ', ' ', ' ', '  ', "y'know", '...', ' ', ' ', ' ', ' ', ' ', '...', 'goodacre']


In [21]:
linear_model = svm.LinearSVC(max_iter=2000)
svm_linear_clf_6B = CalibratedClassifierCV(linear_model , method='sigmoid', cv=10)

svm_linear_clf_6B.fit(trainFeatureVecs_6B, training_classes)

CalibratedClassifierCV(base_estimator=LinearSVC(max_iter=2000), cv=10)

## 5.3 Predicting the test data and results

Encode the labels

In [24]:
test_classes_6 = label_encoder.transform(test_labels_6)
print(test_classes_6[:5])
print(list(combined_dftest['Emotion'])[:5])
print(list(combined_dftest['Sent'])[:5])

[5 0 3 3 3]
['surprise', 'anger', 'joy', 'joy', 'joy']
["Why do all you're coffee mugs have numbers on the bottom?", "Oh. That's so Monica can keep track. That way if one on them is missing, she can be like, 'Where's number 27?!'", 'Push!', "Push 'em out, push 'em out, harder, harder.", "Push 'em out, push 'em out, way out!"]


### Filter A

(a) Prediction

In [25]:
max_df_test = len(test_data_6)//10

low_df_test_6A, high_df_test_6A, test_mid_df_6A = \
low_high_mid_df(2, max_df_test, test_data_6)

Min_df 2
Max_df 449


In [41]:
print(high_df_test_6A)

{'!', 'have', '.', 'it', '?', 'of', 'be', 'do', 'on', 'my', 'and', '#', 'a', 'the', 'i', 'you', "n't", 'in', 'to', ',', 'that'}


In [26]:
testDataVecs_6A, test_6A_known_words, test_6A_unknown_words =\
getAvgFeatureVecs(test_mid_df_6A,
                  word_embedding_model, 
                  index2word_set, 
                  num_features) 

Shape of our matrix is: (4496, 200)
Review 0 of 4496
Review 1000 of 4496
Review 2000 of 4496
Review 3000 of 4496
Review 4000 of 4496


In [27]:
y_pred_svm_6A = svm_linear_clf_6A.predict(testDataVecs_6A)

(b) Results

In [28]:
# 4. Evaluating and analyzing the result
from sklearn.metrics import classification_report

report_6A = classification_report(test_classes_6,y_pred_svm_6A,digits = 6)
print(label_encoder.classes_)
print('Embeddings SVM LINEAR: MELD+Tweets, Filter A')
print('Word embedding model used', wordembeddings)
print('Word mininum document frequency', min_df, ": max:", max_df_test)
print(report_6A)

['anger' 'disgust' 'fear' 'joy' 'sadness' 'surprise']
Embeddings SVM LINEAR: MELD+Tweets, Filter A
Word embedding model used glove.twitter.27B.200d.txt
Word mininum document frequency 2 : max: 449
              precision    recall  f1-score   support

           0   0.538326  0.552941  0.545536      1105
           1   1.000000  0.014706  0.028986        68
           2   0.674830  0.474641  0.557303      1045
           3   0.501416  0.793011  0.614370      1116
           4   0.587922  0.375709  0.458449       881
           5   0.474747  0.501779  0.487889       281

    accuracy                       0.548265      4496
   macro avg   0.629540  0.452131  0.448755      4496
weighted avg   0.573619  0.548265  0.536877      4496



In [29]:
import sklearn
print('Confusion matrix SVM, embeddings, MELD+Tweets, Filter A')
print(label_encoder.classes_)
print(sklearn.metrics.confusion_matrix(test_classes_6,y_pred_svm_6A))

Confusion matrix SVM, embeddings, MELD+Tweets, Filter A
['anger' 'disgust' 'fear' 'joy' 'sadness' 'surprise']
[[611   0  82 289  67  56]
 [ 24   1   2  25   6  10]
 [215   0 496 218  92  24]
 [ 89   0  53 885  53  36]
 [165   0  95 260 331  30]
 [ 31   0   7  88  14 141]]


In [31]:
pred_probabilities_6A = svm_linear_clf_6A.predict_proba(testDataVecs_6A)

pred_labels_6A = []
for predicted_label in y_pred_svm_6A:
    pred_labels_6A.append(label_encoder.classes_[predicted_label])

gold_labels_6A = []
for gold_label in test_classes_6:
    gold_labels_6A.append(label_encoder.classes_[gold_label])

result_frame6A = pd.DataFrame(pred_probabilities_6A*100, columns=label_encoder.classes_)

result_frame6A['Chat']= list(combined_dftest['Sent'])
result_frame6A['Prediction']=pred_labels_6A
result_frame6A['Gold']=gold_labels_6A

result_frame6A.to_csv("result_frame6A.csv")
result_frame6A.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,Chat,Prediction,Gold
0,25.199536,5.170501,3.184909,32.818911,19.760709,13.865433,Why do all you're coffee mugs have numbers on ...,joy,surprise
1,17.631798,3.001689,9.181429,39.802757,18.725642,11.656686,Oh. That's so Monica can keep track. That way ...,joy,anger
2,25.406057,2.345162,47.325382,13.044321,3.426252,8.452827,Push!,fear,joy
3,44.771639,2.06042,5.770728,26.156729,18.202186,3.038299,"Push 'em out, push 'em out, harder, harder.",anger,joy
4,46.072344,3.120442,9.699568,29.864994,8.918786,2.323866,"Push 'em out, push 'em out, way out!",anger,joy


### Filter B

In [32]:
low_df_test_6B, DTandPRP_test_6B, clean_test_6B = \
remove_DT_PRP(2, test_data_6)

Determiner and pronouns {'@jdegrom19', "y'", '@jbanks88', "i'i", 'it', 'mine', '\\nwhat', 'any', 'each', 'this', 'yours', '@adele', "they're", '@colinoccupantz', "\\n\\n'you", '@eliroth', 'ios10', 'nj@latimes', '\\nso', 'stupid?that', 'they', 'its', '_', "'em", '@ritujai18874', '#behaviour', '@barbour', '@talktalkcare', '’s', '@space_gayz', '@your', 'an', 'those', 'd', '@johnjharwood', '@the', 'said!!!!\\nthey', 'thee', 'himself', '@xmaseveevil1', '\\nindia', 'tbh', 'my', '@ryuredwings2', '😄', 'i', 'yourself', '@kristasaidthis', 'tho', '@messyourself', '@aefadul22', '»', 'blm', '@barackobama', 'that,\\ngives', 'her', 'ours', 'themselves', 'his', '🍂', 'either', 'another', 'some', 'happy\\nshe', "'s", 'em', 'your', "naya'\\n\\n'i", 'their', 'half', '@digger_forum', '😑', 'lt', 'it.\\n#funny', '#', 'no', '\\nmatt', 'the', 'our', 'both', 'tvgirl', '💦', "i'm", 'one', '@realdonaldtrump', 'all', '@sarahb45', 'isthereahelplineforthis', 'myself', '😿', '@jankhambrams', 'itself', '@bbnicole', 'boy

In [33]:
testDataVecs_6B, test_6B_known_words, test_6B_unknown_words =\
getAvgFeatureVecs(clean_test_6B,
                  word_embedding_model, 
                  index2word_set, 
                  num_features) 

Shape of our matrix is: (4496, 200)
Review 0 of 4496
Review 1000 of 4496
Review 2000 of 4496
Review 3000 of 4496
Review 4000 of 4496


In [35]:
y_pred_svm_6B = svm_linear_clf_6B.predict(testDataVecs_6B)

In [36]:
report_6B = classification_report(test_classes_6,y_pred_svm_6B,digits = 6)
print(label_encoder.classes_)
print('Embeddings SVM LINEAR: MELD+Tweets, Filter B')
print('Word embedding model used', wordembeddings)
print('Word mininum document frequency', min_df, "; DT PRP removed")
print(report_6B)

['anger' 'disgust' 'fear' 'joy' 'sadness' 'surprise']
Embeddings SVM LINEAR: MELD+Tweets, Filter B
Word embedding model used glove.twitter.27B.200d.txt
Word mininum document frequency 2 ; DT PRP removed
              precision    recall  f1-score   support

           0   0.584721  0.540271  0.561618      1105
           1   0.666667  0.029412  0.056338        68
           2   0.647123  0.570335  0.606307      1045
           3   0.540963  0.775090  0.637201      1116
           4   0.593060  0.426788  0.496370       881
           5   0.537736  0.608541  0.570952       281

    accuracy                       0.579849      4496
   macro avg   0.595045  0.491739  0.488131      4496
weighted avg   0.588300  0.579849  0.570922      4496



In [37]:
print('Confusion matrix SVM, embeddings, MELD+Tweet, Filter B')
print(label_encoder.classes_)
print(sklearn.metrics.confusion_matrix(test_classes_6,y_pred_svm_6B))

Confusion matrix SVM, embeddings, MELD+Tweet, Filter B
['anger' 'disgust' 'fear' 'joy' 'sadness' 'surprise']
[[597   1 108 258  81  60]
 [ 19   2   2  28  10   7]
 [168   0 596 158 100  23]
 [ 85   0  72 865  62  32]
 [131   0 137 212 376  25]
 [ 21   0   6  78   5 171]]


In [39]:
pred_probabilities_6B = svm_linear_clf_6B.predict_proba(testDataVecs_6B)

pred_labels_6B = []
for predicted_label in y_pred_svm_6B:
    pred_labels_6B.append(label_encoder.classes_[predicted_label])

gold_labels_6B = []
for gold_label in test_classes_6:
    gold_labels_6B.append(label_encoder.classes_[gold_label])

result_frame6B = pd.DataFrame(pred_probabilities_6B*100, columns=label_encoder.classes_)

result_frame6B['Chat']= list(combined_dftest['Sent'])
result_frame6B['Prediction']=pred_labels_6B
result_frame6B['Gold']=gold_labels_6B

result_frame6B.to_csv("result_frame6B.csv")
result_frame6B.head()

Unnamed: 0,anger,disgust,fear,joy,sadness,surprise,Chat,Prediction,Gold
0,25.369358,2.737392,2.35242,11.380595,28.313076,29.84716,Why do all you're coffee mugs have numbers on ...,surprise,surprise
1,15.117586,3.517425,7.371107,42.598438,19.432031,11.963413,Oh. That's so Monica can keep track. That way ...,joy,anger
2,39.418739,3.435926,11.054657,35.521602,1.982503,8.586573,Push!,anger,joy
3,23.502766,3.378611,3.375353,33.402619,32.220863,4.119789,"Push 'em out, push 'em out, harder, harder.",joy,joy
4,41.115883,5.157752,4.687754,33.257717,9.464384,6.31651,"Push 'em out, push 'em out, way out!",anger,joy
