### Automatically generate counterfactual sentences by substituting causal terms with antonyms

In [1]:
import io, time
from itertools import combinations, cycle, product
from IPython.display import display

import nltk
import numpy as np
import pandas as pd
import pickle, random, re
from collections import Counter
from PyDictionary import PyDictionary
dictionary=PyDictionary()

from tqdm.notebook import tqdm
pd.set_option('max_colwidth', -1)

import sklearn

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import roc_auc_score
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sklearn.metrics import classification_report, accuracy_score

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

random.seed(42)

In [2]:
data_path = 'my_data_path'

In [3]:
class Counterfactual:
    def __init__(self, df_train, df_test, moniker):
        display(df_train.head(1))
        self.moniker = moniker
        self.train = df_train
        self.test = df_test

In [4]:
def pre_process_imdb(data_file):
    """
    Pre-process to get original text and counterfactual text
    """
    
    df = pd.read_csv(data_file, sep='\t')

    
    combined_text = df.Text.values
    combined_batch_id = df.batch_id.values
    
    org_idx = [i for i in range(df.shape[0]) if(i % 2 == 0)]
    ct_idx = [i for i in range(df.shape[0]) if(i % 2 != 0)]
    
    org_batch_id = combined_batch_id[org_idx]
    ct_batch_id = combined_batch_id[ct_idx]
    if np.any(org_batch_id != ct_batch_id):
        print('Error: batch id not match!')
        return
    
    data = {}
    data['batch_id'] = org_batch_id
    data['text'] = combined_text[org_idx]
    data['ct_text_amt'] = combined_text[ct_idx]
    data['label'] = df.Sentiment.values[org_idx]
    data['ct_label'] = df.Sentiment.values[ct_idx]
    df_data = pd.DataFrame(data)
    
    map_lb = {'Positive':1, 'Negative':-1}
    df_data.replace({'label':map_lb, 'ct_label':map_lb}, inplace=True)

    return df_data

In [5]:
def select_sents(df):
    """
    - Select test sentences that contain one of the causal terms from full vocab
    """
    df_antonym_vocab = pd.read_csv(data_path+'kindle_ct/kindle_vocab_antonym_causal.csv')
    keywords = list(df_antonym_vocab[df_antonym_vocab.causal == 1].term.values)
    
    vec = CountVectorizer(min_df=5, binary=True, max_df=.8)
    X = vec.fit_transform(df.text.values)
    y = df.label.values
    
    wd_sents = {}
    sent_idx = set()
    for wd in keywords:
        try:
            s_idx = np.nonzero(X[:,vec.vocabulary_[wd]])[0]
            wd_sents[wd] = s_idx
            sent_idx.update(s_idx)
        except:
            continue
    
    return df.iloc[list(sent_idx)]

In [18]:
df_kindle = pickle.load(open(data_path+"kindle_ct/causal_sents/kindle_data.pkl",'rb'))
df_test = df_kindle[df_kindle['flag']=='test']

df_test_select = select_sents(df_test)
display(df_test_select.head())

Unnamed: 0,text,rating,label,flag
0,This was a very fun story,5,1,test
1,Not fast moving but a very well managed pace,5,1,test
2,The story line is an interesting take on zombie mythology and is a great journey,5,1,test
4,series is always a good read,5,1,test
10,Did not like it very much,1,-1,test


In [9]:
def get_data(moniker):
    """
    - get kindle or imdb from different files
    """
    if(moniker == 'kindle'):
        df_kindle = pickle.load(open(data_path+"kindle_ct/causal_sents/kindle_data.pkl",'rb'))
        df_train = df_kindle[df_kindle['flag']=='selected_train']
        df_test = df_kindle[df_kindle['flag']=='test']
        df_antonym_vocab = pd.read_csv(data_path+'kindle_ct/kindle_vocab_antonym_causal.csv')
        df_identified_causal = pd.read_csv(data_path+'kindle_ct/ITE/kindle_identified_causal.csv')
    elif(moniker == 'imdb'):
        df_train = pre_process_imdb(data_file = data_path + "imdb_ct/train_paired.tsv")
        df_test = pre_process_imdb(data_file = data_path + "imdb_ct/test_paired.tsv")
        df_antonym_vocab = pd.read_csv(data_path+'imdb_ct/imdb_vocab_antonym_causal.csv')
        df_identified_causal = pd.read_csv(data_path+'imdb_ct/imdb_identified_causal.csv')
    elif(moniker == 'imdb_sents'):
        df_train = pickle.load(open(data_path+"imdb_ct/train_paired_sents.pkl", 'rb'))
        df_test = pickle.load(open(data_path+"imdb_ct/test_paired_sents.pkl", 'rb'))
        df_antonym_vocab = pd.read_csv(data_path+'imdb_ct/imdb_vocab_antonym_causal.csv')
        df_identified_causal = pd.read_csv(data_path+'imdb_ct/imdb_identified_causal.csv')
        
    return df_train, df_test, df_antonym_vocab, df_identified_causal

In [10]:
def get_antonyms(vocab, causal_words):
    """
    - antonyms: top term with opposite coefficient;
    - get antonyms for all words in the vocab
    - Help provide more options for manually edit counterfactual examples
    - # 90 min for imdb vocab
    """
    term_antonyms = {}
    for ti, term in enumerate(causal_words):
        try:
            term_coef = vocab[term]

            ant_terms = {} # antonym and its coef
            for ant in dictionary.antonym(term):
                if (ant in vocab) and (term_coef * vocab[ant] < 0): # opposite coef, 
                    ant_terms[ant] = vocab[ant]

            if(len(ant_terms) == 0):
                for syn in dictionary.synonym(term):
                    if(len(re.findall('\w+', syn)) == 1):
                        for ant in dictionary.antonym(syn):
                            if (ant in vocab) and (ant != term) and (term_coef * vocab[ant] < 0): # 
                                ant_terms[ant] = vocab[ant]
        except:
            continue
    
        term_antonyms[term] = ant_terms
        
    return term_antonyms

In [11]:
def fit_classifier(train_text, train_label, test_text, test_label, report=True, train='comb'):
    """
    Fit a basic binary classifier
    """
    
    vec = CountVectorizer(min_df=5, binary=True, max_df=.8)
    if(train == 'comb'):
        X = vec.fit_transform(list(train_text) + list(test_text))
        X_train = vec.transform(train_text)
        X_test = vec.transform(test_text)
    elif(train == 'train'):
        X_train = vec.fit_transform(list(train_text))
        X_test = vec.transform(test_text)
        
    clf = LogisticRegression(class_weight='auto', solver='lbfgs', max_iter=1000)
    clf.fit(X_train, train_label)
    
    if(report):
        print(classification_report(test_label, clf.predict(X_test)))
        return clf, vec
    else:
        result = classification_report(test_label, clf.predict(X_test), output_dict=True)
        return float('%.3f' % result['accuracy'])

In [12]:
def get_top_terms(clf, vec, topn=0, min_coef=0.5, show_data=False):
    """
    - fit classifier
    - Select features by: topn or min_coef
    """
    df_vocab = pd.DataFrame({'term':vec.get_feature_names(),'coef':[float("%.3f" % c) for c in clf.coef_[0]]})
    
    if(topn == 0 and min_coef == 0):
        return df_vocab
    
    if(min_coef>0 and topn==0):
        df_top_terms = df_vocab[(df_vocab['coef']>= min_coef) | (df_vocab['coef'] < 0-min_coef)]
    elif(topn>0 and min_coef==0):
        df_vocab['coef_abs'] = df_vocab['coef'].apply(lambda x: abs(x))
        df_top_terms = df_vocab.sort_values(by=['coef_abs'], ascending=False).head(topn)
        df_top_terms.drop(columns=['coef_abs'],inplace=True)
    
    if(show_data):
        df_pos_terms = df_top_terms[df_top_terms['coef']>0]
        df_neg_terms = df_top_terms[df_top_terms['coef']<0]
        print("Features correlated with pos class: \n", [item['term']+'/'+str(item['coef']) for i, item in df_pos_terms.sort_values(by=['coef'], ascending=False).iterrows()])
        print("\nFeatures correlated with neg class: \n", [item['term']+'/'+str(item['coef']) for i, item in df_neg_terms.sort_values(by=['coef'], ascending=True).iterrows()])
    
    return df_top_terms

In [13]:
def identify_causal_words(df, df_causal_terms, flag='causal', show_data=True):
    """
    Identify causal words in each sentence
    - Use CSR matrix from CountVectorizer instead of regular expression
    - flag = 'causal' or flag = 'bad' or flag='top'
    """
    df[flag+'_wds'] = df['text'].apply(lambda x: [wd for wd in re.findall('\w+', x.lower()) if wd in df_causal_terms.term.values])
    df['n_'+flag+'_wds'] = df[flag+'_wds'].apply(lambda x: len(x))
    
    if(show_data):
        print("%d out of %d sentences include %d %s words" % (df[df['n_'+flag+'_wds']>0].shape[0], df.shape[0], df_causal_terms.shape[0], flag))

In [15]:
def generate_ct_sentences(df, df_causal_terms, flag='causal'):
    """
    Generate counterfactual sentences for those contain causal words:
        - substitute all the causal words to antonyms;
        - antonyms: top term with opposite coefficient;
        - If no antonyms, keep the original causal word;
    """
    random.seed(42)
    
    all_ct_wds = []
    for ri, row in df.iterrows():
        if row['n_'+flag+'_wds'] > 0:
            words = re.findall('\w+', row.text.lower())
            new_wds = []
            ct_wds = []
            for wd in words:
                if(wd in df_causal_terms.term.values):
                    # randomly select antonym that has equal coef with current word
                    sub_w = list(df_causal_terms[df_causal_terms['term'] == wd].antonyms.values[0].keys())

                    if(len(sub_w) == 1):
                        ct_wd = str(sub_w[0])
                    elif(len(sub_w) > 1):
                        ct_wd = str(random.sample(sub_w,1)[0])
                    else: # if no antonyms then remove current word
                        ct_wd = wd
                        
                    new_wds.append(ct_wd)
                    ct_wds.append(ct_wd)
                else:
                    new_wds.append(wd)
                
            if(new_wds == words): # no antonym for the causal word
                all_ct_wds.append([])
                df.loc[ri, 'ct_text_'+flag] = ' '
            else:    
                all_ct_wds.append(ct_wds)
                df.loc[ri, 'ct_text_'+flag] = ' '.join(new_wds)
        else:
            all_ct_wds.append([])
            df.loc[ri, 'ct_text_'+flag] = ' '
        
        
    df['ct_'+flag+'_wds'] = all_ct_wds       

In [16]:
def run_experiment(moniker,coef_thresh):
    """
    1. Get train and test data from file and construct Counterfactual object
    2. Get top words
    3. Annotate/predict causal words
    4. Generate antonyms for causal words
    5. Automatically generate counterfactual samples for both training and testing data
    """
    random.seed(42)
    
    print("Experiments for %s" % moniker)

    # 1. Get train and test data from file and construct Counterfactual object
    if(moniker == 'imdb_sents'):
        df_train_comb, df_test_comb, df_antonym_vocab, df_identified_causal = get_data(moniker)
        df_train = df_train_comb[df_train_comb['flag']=='original'][['batch_id','text','label']]
        df_test = df_test_comb[df_test_comb['flag']=='original'][['batch_id','text','label']]
        ds = Counterfactual(df_train, df_test, moniker)

        ds.train_ct = df_train_comb[df_train_comb['flag']=='counterfactual'][['batch_id','text','label']]
        ds.test_ct = df_test_comb[df_test_comb['flag']=='counterfactual'][['batch_id','text','label']]
    else:
        df_train, df_test, df_antonym_vocab, df_identified_causal = get_data(moniker)
        ds = Counterfactual(df_train, df_test, moniker)
        
        if(moniker == 'kindle'):
            ds.select_test = select_sents(df_test)
    
    ds.identified_causal_terms = df_identified_causal[df_identified_causal.identified_causal == 1]
        
    print('Train: %s' % str(Counter(df_train.label).items()))
    print('Test: %s' % str(Counter(df_test.label).items()))

    # 2. Get true causal terms from pre-annotated file
    clf, vec = fit_classifier(train_text = df_train.text.values, train_label = df_train.label.values,
                              test_text = df_test.text.values, test_label=df_test.label.values, 
                              report=True, train='train')
    
    vocab = get_top_terms(clf, vec, topn=0, min_coef=0, show_data=False)
    
    ds.antonym_vocab = df_antonym_vocab
    ds.all_causal_terms = ds.antonym_vocab[(ds.antonym_vocab.causal == 1) & (ds.antonym_vocab.term.isin(vocab.term.values))]
    ds.all_causal_terms['antonyms'] = ds.all_causal_terms['antonyms'].apply(lambda x: eval(x))
    
    # 3. Get top words
    clf, vec = fit_classifier(train_text = df_train.text.values, train_label = df_train.label.values,
                                   test_text = df_test.text.values, test_label=df_test.label.values, 
                              report=True, train='train')

    ds.top_terms = get_top_terms(clf, vec, topn=0, min_coef=coef_thresh, show_data=True)

    # Number of top terms not covered in the full vocab
    missing_terms = [term for term in ds.top_terms.term if term not in ds.antonym_vocab.term.values]
    print('\n%d top terms: %d pos, %d neg, %d missing from full vocab\n' % (ds.top_terms.shape[0], 
                                                ds.top_terms[ds.top_terms.coef>0].shape[0],
                                                ds.top_terms[ds.top_terms.coef<0].shape[0],
                                                                            len(missing_terms)))
    print('Missing terms:', missing_terms)

    # 3. Assign causal label to top words (load from pre-annotated file)
    ds.top_terms['causal'] = [ds.antonym_vocab[ds.antonym_vocab['term'] == item.term].causal.values[0] if item.term in ds.antonym_vocab.term.values else 0 for i, item in ds.top_terms.iterrows()]
    
    # 4. Get antonyms for causal words    
    ds.top_terms['antonyms'] = [eval(ds.antonym_vocab[ds.antonym_vocab['term'] == item.term].antonyms.values[0]) if item.term in ds.antonym_vocab.term.values else {} for i, item in ds.top_terms.iterrows()]
    ds.top_terms['n_antonyms'] = ds.top_terms['antonyms'].apply(lambda x: len(x))
    df_causal_terms = ds.top_terms[ds.top_terms['causal'] == 1]
    df_bad_terms = ds.top_terms[ds.top_terms['causal'] == 0]
    ds.identified_causal_terms['antonyms'] = [eval(ds.antonym_vocab[ds.antonym_vocab['term'] == item.term].antonyms.values[0]) if item.term in ds.antonym_vocab.term.values else {} for i, item in ds.identified_causal_terms.iterrows()]

    print('\nGet antonyms for %d out of %d causal terms' % (df_causal_terms[df_causal_terms['n_antonyms'] > 0].shape[0], ds.top_terms[ds.top_terms['causal'] == 1].shape[0]))
    print('Closest opposite match identified causal terms: %d out of %d\n' % (ds.identified_causal_terms[ds.identified_causal_terms.causal==1].shape[0],ds.identified_causal_terms.shape[0]))

    # 5. Automatically generate counterfactual samples for both training and testing data
    for flag, df_ct_terms in zip(['causal','bad','top','identified_causal','all_causal'],[df_causal_terms, df_bad_terms, ds.top_terms, ds.identified_causal_terms, ds.all_causal_terms]):
        identify_causal_words(ds.train, df_ct_terms, flag, show_data=True)
        generate_ct_sentences(ds.train, df_ct_terms, flag)

        identify_causal_words(ds.test, df_ct_terms, flag, show_data=True)
        generate_ct_sentences(ds.test, df_ct_terms,flag)
    
    
    if(moniker == 'kindle'):
        df_annotate_ct = pd.read_csv(data_path+'kindle_ct/kindle_ct_edit_500.csv')
        ds.test['ct_text_amt'] = [df_annotate_ct[df_annotate_ct['id']==idx]['ct_text_amt'].values[0] for idx in ds.test.index.values]
        ds.select_test['ct_text_amt'] = ds.test.loc[list(ds.select_test.index.values)]['ct_text_amt'].values
        ds.select_test['ct_label'] = ds.select_test['label'].apply(lambda x: 0-x)
    if(moniker == 'kindle' or moniker == 'imdb_sents'):
        ds.train['ct_label'] = ds.train['label'].apply(lambda x: 0-x)
        ds.test['ct_label'] = ds.test['label'].apply(lambda x: 0-x)

    display(ds.test.head(2))


#     if(moniker == 'kindle'):
#         pickle.dump(ds, open(data_path+"kindle_ct/causal_sents/ds_kindle.pkl", "wb"))
#     elif(moniker == 'imdb'):
#         pickle.dump(ds, open(data_path+"imdb_ct/sentiment/combined/paired/paragraph/ds_imdb.pkl", "wb"))
#     elif(moniker == 'imdb_sents'):
#         pickle.dump(ds, open(data_path+"imdb_ct/sentiment/combined/paired/split_sents/ds_imdb.pkl", "wb"))

    return ds

In [15]:
ds_imdb = run_experiment(moniker='imdb_L',coef_thresh=0.4)

Experiments for imdb


Unnamed: 0,batch_id,text,ct_text_amt,label,ct_label
0,4,"Long, boring, blasphemous. Never have I been so glad to see ending credits roll.","Long, fascinating, soulful. Never have I been so sad to see ending credits roll.",-1,1


Train: dict_items([(-1, 851), (1, 856)])
Test: dict_items([(-1, 243), (1, 245)])
              precision    recall  f1-score   support

          -1       0.81      0.81      0.81       243
           1       0.81      0.82      0.81       245

    accuracy                           0.81       488
   macro avg       0.81      0.81      0.81       488
weighted avg       0.81      0.81      0.81       488

              precision    recall  f1-score   support

          -1       0.81      0.81      0.81       243
           1       0.81      0.82      0.81       245

    accuracy                           0.81       488
   macro avg       0.81      0.81      0.81       488
weighted avg       0.81      0.81      0.81       488

Features correlated with pos class: 
 ['romantic/1.348', 'great/1.288', 'perfect/1.092', 'wonderful/1.029', 'gives/0.977', 'classic/0.92', 'enjoyed/0.839', 'especially/0.8', 'surprised/0.798', 'fun/0.773', 'love/0.767', 'romance/0.764', 'works/0.716', 'both/0.689',

Unnamed: 0,batch_id,text,ct_text_amt,label,ct_label,causal_wds,n_causal_wds,ct_text_causal,ct_causal_wds,bad_wds,...,ct_text_top,ct_top_wds,identified_causal_wds,n_identified_causal_wds,ct_text_identified_causal,ct_identified_causal_wds,all_causal_wds,n_all_causal_wds,ct_text_all_causal,ct_all_causal_wds
0,13,"If you haven't seen this, it's terrible. It is pure trash. I saw this about 17 years ago, and I'm still screwed up from it.","If you haven't seen this, it's incredible. It is pure gold. I saw this about 17 years ago, and I'm still hype about it.",-1,1,[terrible],1,if you haven t seen this it s nice it is pure trash i saw this about 17 years ago and i m still screwed up from it,[nice],[years],...,if you haven t seen this it s nice it is pure trash i saw this about 17 young ago and i m still screwed up from it,"[nice, young]",[terrible],1,if you haven t seen this it s nice it is pure trash i saw this about 17 years ago and i m still screwed up from it,[nice],"[terrible, trash]",2,if you haven t seen this it s nice it is pure heat i saw this about 17 years ago and i m still screwed up from it,"[nice, heat]"
1,46,"being a NI supporter, it's hard to objectively review a movie glorifying ulster nationalists. characters who are hard to root for, typical heavy-handed anti-violence messages, and a predictable 'poetic justice' ending makes this an awkward watch...","being a NI supporter, it's easy to objectively review a movie glorifying ulster nationalists. characters who are painless to root for, typical heavy-handed anti-violence messages, and a unpredictable 'poetic justice' ending makes this an unforgettable watch...",-1,1,[predictable],1,being a ni supporter it s hard to objectively review a movie glorifying ulster nationalists characters who are hard to root for typical heavy handed anti violence messages and a unpredictable poetic justice ending makes this an awkward watch,[unpredictable],[ending],...,being a ni supporter it s hard to objectively review a movie glorifying ulster nationalists characters who are hard to root for typical heavy handed anti violence messages and a unpredictable poetic justice block makes this an awkward watch,"[unpredictable, block]",[predictable],1,being a ni supporter it s hard to objectively review a movie glorifying ulster nationalists characters who are hard to root for typical heavy handed anti violence messages and a unpredictable poetic justice ending makes this an awkward watch,[unpredictable],[predictable],1,being a ni supporter it s hard to objectively review a movie glorifying ulster nationalists characters who are hard to root for typical heavy handed anti violence messages and a unpredictable poetic justice ending makes this an awkward watch,[unpredictable]


In [17]:
ds_imdb_sents = run_experiment(moniker='imdb_S',coef_thresh=1.0)

Experiments for imdb_sents


Unnamed: 0,batch_id,text,label
0,4,"Long, boring, blasphemous.",-1


Train: dict_items([(-1, 4059), (1, 4114)])
Test: dict_items([(-1, 1101), (1, 1144)])
              precision    recall  f1-score   support

          -1       0.71      0.70      0.70      1101
           1       0.71      0.72      0.72      1144

    accuracy                           0.71      2245
   macro avg       0.71      0.71      0.71      2245
weighted avg       0.71      0.71      0.71      2245

              precision    recall  f1-score   support

          -1       0.71      0.70      0.70      1101
           1       0.71      0.72      0.72      1144

    accuracy                           0.71      2245
   macro avg       0.71      0.71      0.71      2245
weighted avg       0.71      0.71      0.71      2245

Features correlated with pos class: 
 ['romantic/2.919', 'perfect/2.151', 'wonderful/1.927', 'sweet/1.732', 'magnificent/1.695', 'beautifully/1.694', 'journey/1.653', 'romance/1.646', 'subtle/1.613', 'appropriate/1.598', 'feature/1.535', 'excellent/1.512', 'enj

Unnamed: 0,batch_id,text,label,causal_wds,n_causal_wds,ct_text_causal,ct_causal_wds,bad_wds,n_bad_wds,ct_text_bad,...,ct_top_wds,identified_causal_wds,n_identified_causal_wds,ct_text_identified_causal,ct_identified_causal_wds,all_causal_wds,n_all_causal_wds,ct_text_all_causal,ct_all_causal_wds,ct_label
0,13,"If you haven't seen this, it's terrible.",-1,[terrible],1,if you haven t seen this it s nice,[nice],[],0,,...,[nice],[terrible],1,if you haven t seen this it s nice,[nice],[terrible],1,if you haven t seen this it s nice,[nice],1
1,13,It is pure trash.,-1,[],0,,[],[],0,,...,[],[],0,,[],[trash],1,it is pure heat,[heat],1


In [17]:
ds_kindle = run_experiment(moniker='kindle',coef_thresh=1.0)

Experiments for kindle


Unnamed: 0,text,rating,label,flag
3,"The story was good, but I was getting very irritated at all the grammatical and spelling errors",2,-1,selected_train


Train: dict_items([(-1, 5000), (1, 5000)])
Test: dict_items([(1, 250), (-1, 250)])
              precision    recall  f1-score   support

          -1       0.87      0.92      0.89       250
           1       0.91      0.86      0.88       250

    accuracy                           0.89       500
   macro avg       0.89      0.89      0.89       500
weighted avg       0.89      0.89      0.89       500

              precision    recall  f1-score   support

          -1       0.87      0.92      0.89       250
           1       0.91      0.86      0.88       250

    accuracy                           0.89       500
   macro avg       0.89      0.89      0.89       500
weighted avg       0.89      0.89      0.89       500

Features correlated with pos class: 
 ['disappoint/2.68', 'amazing/2.337', 'loved/2.329', 'wonderful/2.235', 'excellent/2.138', 'wait/1.944', 'loves/1.863', 'enjoyed/1.838', 'great/1.834', 'awesome/1.778', 'drag/1.757', 'perfect/1.735', 'beautiful/1.688', 'love/1

Unnamed: 0,text,rating,label,flag,causal_wds,n_causal_wds,ct_text_causal,ct_causal_wds,bad_wds,n_bad_wds,...,identified_causal_wds,n_identified_causal_wds,ct_text_identified_causal,ct_identified_causal_wds,all_causal_wds,n_all_causal_wds,ct_text_all_causal,ct_all_causal_wds,ct_text_amt,ct_label
0,This was a very fun story,5,1,test,[fun],1,this was a very frivolity story,[frivolity],[],0,...,[fun],1,this was a very frivolity story,[frivolity],[fun],1,this was a very frivolity story,[frivolity],This was a very sad story,-1
1,Not fast moving but a very well managed pace,5,1,test,[],0,,[],[not],1,...,[not],1,commercial fast moving but a very well managed pace,[commercial],[well],1,not fast moving but a very ill managed pace,[ill],Not fast moving but a very poorly managed pace,-1
