### Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import json
import spacy 
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer
from nltk.corpus import stopwords
from itertools import chain
from textblob.classifiers import word_tokenize, strip_punc
from nltk.classify import NaiveBayesClassifier
import collections
import random

### Importing train data

In [2]:
def data_import(selected_class):
    if selected_class == 'Bug':
        #Bug Data
        with open('Bug_tt.json') as infile_bug:
            bugdata = json.load(infile_bug)
        
        bugdata = pd.DataFrame(bugdata)
        return bugdata
        # display(bugdata.head())
    elif selected_class == "Feature":
        #Feature Data
        with open('Feature_tt.json') as infile_feature:
            featuredata = json.load(infile_feature)

        featuredata = pd.DataFrame(featuredata)
        return featuredata
        # display(featuredata.head())
    elif selected_class == 'Rating':
        #Rating Data
        with open('Rating_tt.json') as infile_rating:
            ratingdata = json.load(infile_rating)

        ratingdata = pd.DataFrame(ratingdata)
        return ratingdata
        # display(ratingdata.head())
    elif selected_class == 'UserExperience':
        #User Experience Data
        with open('UserExperience_tt.json') as infile_userexp:
            userexpdata = json.load(infile_userexp)

        userexpdata = pd.DataFrame(userexpdata)
        return userexpdata
        # display(traindata_userexp.head())"

### Importing test data

In [3]:
testdata = pd.read_excel("Dataset3.xlsx")
#testdata

### Pre-processing the test data

**Sentiment Score**

In [4]:
sentiScore = []
for pos, neg in zip(testdata['sentiScore_pos'], testdata['sentiScore_neg']):
    if abs(neg) > abs(pos):
        sentiScore.append(neg)
    else:
        sentiScore.append(pos)

**Lemmatize**

In [5]:
wordnet_lemmatizer = WordNetLemmatizer()

CUSTOM_STOPWORDS = ['i', 'me','up','my', 'myself', 'we', 'our', 'ours','ourselves', 'you', 'your', 'yours','yourself', 
                    'yourselves','he', 'him', 'his', 'himself', 'she', 'her', 'hers' ,'herself','it', 'its', 'itself', 
                    'they', 'them', 'their', 'theirs','themselves' ,'am', 'is', 'are','a', 'an', 'the', 'and','in','out', 'on','up','down', 's', 't']

s_stemmer = SnowballStemmer(language='english')

stop_words = set(stopwords.words('english'))

In [6]:
lemmatized_comment = []
stopwords_removal = []
stopwords_removal_lemmatization = []
stemmed = []
stopwords_removal_nltk = []
length = []

for row in testdata['processed_text']:
#     Length of words
    length.append(len(str(row).split()))
#     Tokenize
    sentence_words = nltk.word_tokenize(row)
#     print(sentence_words)
    
    lemmatized_comments = " "
    stopword_removal = " "
    stopword_lemmatize = " "
    stemmed_word = " "
    stopword_removal_nltk = " "
    
#     Lemmatize
    for word in sentence_words:
        lemmatize = wordnet_lemmatizer.lemmatize(word)
        lemmatized_comments = str(lemmatized_comments)+" "+lemmatize
#     print(lemmatized_comments)
 
#     Stop Words Removal - Custom
        sentence_lower = word.lower()
        if not sentence_lower in CUSTOM_STOPWORDS:
            stopword_removal = str(stopword_removal)+" "+sentence_lower
#     print(stopword_removal)

#     Stop Words Lemmatize
            stop_lemmatize = wordnet_lemmatizer.lemmatize(sentence_lower)
            stopword_lemmatize = str(stopword_lemmatize)+" "+stop_lemmatize
#     print(stopword_lemmatize)

#     Stemming
        stemmer = s_stemmer.stem(word)
        stemmed_word = str(stemmed_word)+" "+stemmer
#     print(stemmed_word)      

#     Stop Words Removal - NLTK
        sentence_lower = word.lower()
        if not sentence_lower in stop_words:
            stopword_removal_nltk = str(stopword_removal_nltk)+" "+sentence_lower
#     print(stopword_removal_nltk)

#     Appending to lists
    lemmatized_comment.append(lemmatized_comments)
    stopwords_removal.append(stopword_removal)
    stopwords_removal_lemmatization.append(stopword_lemmatize)
    stemmed.append(stemmed_word)
    stopwords_removal_nltk.append(stopword_removal_nltk)

In [7]:
# Adding all the pre-processed columns to testdata
testdata['lemmatized_comment'] = lemmatized_comment
testdata['stopwords_removal'] = stopwords_removal
testdata['stopwords_removal_lemmatization'] = stopwords_removal_lemmatization
testdata['stemmed'] = stemmed
testdata['stopwords_removal_nltk'] = stopwords_removal_nltk
testdata['sentiScore'] = sentiScore
testdata['length_words'] = length
testdata.rename(columns={'score':'rating','processed_text':'comment'}, inplace=True)

In [8]:
# Exporting the test data to excel
testdata.to_excel("Dataset3_processed.xlsx")

Test data is now similar to the format of the train data to apply it to the classifer of the author.

### Extracting the feature combinations

**Function to generate combination of features**         
- bow-comment
- bigram-comment
- bow-bigram-comment
- bow-lemmatized_comment
- bow-remove_stopwords
- bow-stopwords_removal_lemmatization
- bow-bigram-stopwords_removal_lemmatization
- rating-comment
- rating-comment-length
- rating-comment-sentiment1-length
- rating-comment-sentiment2-length
- bow-rating-lemmatized_comment
- bow-rating-comment-sentiment1
- bigram-rating-comment-sentiment1
- bigram-rating-stopwords_removal_lemmatization-sentiment2
- bow-bigram-comment-sentiment1
- bow-bigram-rating-lemmatized_comment
- bow-bigram-remove_stopwords-rating-sentiment1
- bow-rating-stopwords_removal_lemmatization-sentiment1
- bow-rating-stopwords_removal_lemmatization-sentiment2

In [9]:
# To create the configuration id (names of the above features)
def get_key_for_classifier_config(cfg):
    return "-".join([field for i,field in enumerate(cfg._fields) if not isinstance(cfg[i], str) and cfg[i]])


# Defining the namedtuples
classifier_technique_configurator = collections.namedtuple("classifier_technique_configurator",
                           [
                            "bow",
                            "bigram",
                            "remove_stopwords",
                            "rating",
                            "comment",
                            "lemmatized_comment",
                            "stopwords_removal_lemmatization",                            
                            "sentiment1",
                            "sentiment2", 
                            "length"])


# Setting the configuration parameters
def get_classifier_technique_config(bow=False, bigram=False,comment = False, lemmatized_comment = False, remove_stopwords = False, stopwords_removal_lemmatization = False, rating = False, sentiment1 = False, sentiment2 = False, length = False):

    classifier_technique_cfg = classifier_technique_configurator(
        bow = bow,
        bigram = bigram,
        remove_stopwords = remove_stopwords,
        rating = rating,
        comment = comment,
        lemmatized_comment = lemmatized_comment,
        stopwords_removal_lemmatization = stopwords_removal_lemmatization,
        sentiment1 = sentiment1,
        sentiment2 = sentiment2,
        length = length
    )

    return classifier_technique_cfg


# Creating a dictionary with configuration id and its respective combination of features
def get_combined_cfgs_journal_version():

    toreturn = {}
#    Document(Text) Matching

#    Bow
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment=True, lemmatized_comment=False, remove_stopwords=False, stopwords_removal_lemmatization=False, rating=False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg
    
#    Bigram    
    cfg = get_classifier_technique_config(bow=False, bigram=True,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Bigram    
    cfg = get_classifier_technique_config(bow=True, bigram=True,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-lematize    
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = True, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Stopwords
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = False, remove_stopwords = True,stopwords_removal_lemmatization = False, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Stopwords.Lemmatize
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = True, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Bigram-Stopwords.lematize    
    cfg = get_classifier_technique_config(bow=True, bigram=True,comment = False, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = True, rating = False, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Metadata

#    Rating
    cfg = get_classifier_technique_config(bow=False, bigram=False,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg
    
#    Rating-Length    
    cfg = get_classifier_technique_config(bow=False, bigram=False,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = False, sentiment2 = False, length = True)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg
    
#    Rating-length-Sentiment1
    cfg = get_classifier_technique_config(bow=False, bigram=False,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = True, sentiment2 = False, length = True)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg
    
#    Rating-Length-Sentiment2
    cfg = get_classifier_technique_config(bow=False, bigram=False,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = False, sentiment2 = True, length = True)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Combined (Document(Text) & Metadata)

#    Bow-Rating-Lematize
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = True, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

    
#    Bow-Rating-Sentiment1
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = True, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bigram-Rating-Sentiment1
    cfg = get_classifier_technique_config(bow=False, bigram=True,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = True, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bigram-Stowords.Lemtize-Rating-Sentiment2
    cfg = get_classifier_technique_config(bow=False, bigram=True,comment = False, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = True, rating = True, sentiment1 = False, sentiment2 = True, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Bigram-Sentiment1
    cfg = get_classifier_technique_config(bow=True, bigram=True,comment = True, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = False, sentiment1 = True, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Lematize-bigram-rating
    cfg = get_classifier_technique_config(bow=True, bigram=True,comment = False, lemmatized_comment = True, remove_stopwords = False,stopwords_removal_lemmatization = False, rating = True, sentiment1 = False, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Stropwords-Bigram-Rating-Sentiment1
    cfg = get_classifier_technique_config(bow=True, bigram=True,comment = False, lemmatized_comment = False, remove_stopwords = True,stopwords_removal_lemmatization = False, rating = True, sentiment1 = True, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Stopwords.Lematize-rating-sentiment1
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = True, rating = True, sentiment1 = True, sentiment2 = False, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

#    Bow-Stopwords.Lematize-Rating-Sentiment2
    cfg = get_classifier_technique_config(bow=True, bigram=False,comment = False, lemmatized_comment = False, remove_stopwords = False,stopwords_removal_lemmatization = True, rating = True, sentiment1 = False, sentiment2 = True, length = False)
    key = get_key_for_classifier_config(cfg)
    toreturn[key] = cfg

    
    
    
    
    return toreturn


**Creating a set of all words from the selected columns** 

In [10]:
def _get_words_from_dataset(dataset):
    """Return a set of all words in a dataset.

    :param dataset: A list of tuples of the form ``(words, label)`` where
        ``words`` is either a string of a list of tokens.
    """

    # Words may be either a string or a list of tokens. Return an iterator of tokens accordingly
    
    def tokenize(words):
        if isinstance(words, str):
            return word_tokenize(words, include_punc=False)
        else:
            return words
        
    if len(dataset[0]) == 2:
        all_words = chain.from_iterable(tokenize(words.lower()) for words,_ in dataset)
    else:
        all_words = chain.from_iterable(tokenize(words.lower()) for words in dataset)

    return set(all_words)

**Performing the below two operations for the combination of features to be passed for training**

In [11]:
def extract_bigram_words(input_data):
    return find_ngrams(input_data, 2)

def find_ngrams(input_list, n):
    return zip(*[input_list[i:] for i in range(n)])

def extractor(document, word_features, feature_data):
    
    
    
    if cfg.comment:
        score = feature_data['rating'][(feature_data['comment'] == document)].iloc[0]
        sentiScore = feature_data['sentiScore'][(feature_data['comment'] == document)].iloc[0]
        sentiScore_pos = feature_data['sentiScore_pos'][(feature_data['comment'] == document)].iloc[0]
        sentiScore_neg = feature_data['sentiScore_neg'][(feature_data['comment'] == document)].iloc[0]
        length = feature_data['length_words'][(feature_data['comment'] == document)].iloc[0]
        #print(score)
        #return (test_data['comment'] == document) == True



    elif cfg.remove_stopwords:
        score = feature_data['rating'][(feature_data['stopwords_removal_nltk'] == document)].iloc[0]
        sentiScore = feature_data['sentiScore'][(feature_data['stopwords_removal_nltk'] == document)].iloc[0]
        sentiScore_pos = feature_data['sentiScore_pos'][(feature_data['stopwords_removal_nltk'] == document)].iloc[0]
        sentiScore_neg = feature_data['sentiScore_neg'][(feature_data['stopwords_removal_nltk'] == document)].iloc[0]
        length = feature_data['length_words'][(feature_data['stopwords_removal_nltk'] == document)].iloc[0]

        
    elif cfg.stopwords_removal_lemmatization:
        score = feature_data['rating'][(feature_data['stopwords_removal_lemmatization'] == document)].iloc[0]
        sentiScore = feature_data['sentiScore'][(feature_data['stopwords_removal_lemmatization'] == document)].iloc[0]
        sentiScore_pos = feature_data['sentiScore_pos'][(feature_data['stopwords_removal_lemmatization'] == document)].iloc[0]
        sentiScore_neg = feature_data['sentiScore_neg'][(feature_data['stopwords_removal_lemmatization'] == document)].iloc[0]
        length = feature_data['length_words'][(feature_data['stopwords_removal_lemmatization'] == document)].iloc[0]


    elif cfg.lemmatized_comment:
        score = feature_data['rating'][(feature_data['lemmatized_comment'] == document)].iloc[0]
        sentiScore = feature_data['sentiScore'][(feature_data['lemmatized_comment'] == document)].iloc[0]
        sentiScore_pos = feature_data['sentiScore_pos'][(feature_data['lemmatized_comment'] == document)].iloc[0]
        sentiScore_neg = feature_data['sentiScore_neg'][(feature_data['lemmatized_comment'] == document)].iloc[0]
        length = feature_data['length_words'][(feature_data['lemmatized_comment'] == document)].iloc[0]


    
    feats = {}
    word_features = word_features
    token_list = nltk.word_tokenize(document)
    token_list = [word.lower() for word in token_list]
    length_words = len(token_list)

    if cfg.bow:
        bow_features = dict(((u'contains({0})'.format(word), (word in token_list))
                         for word in word_features))
        #return bow_features
        feats = feats.copy()
        feats.update(bow_features)
        
    if cfg.bigram:
        bigrams = extract_bigram_words(token_list)
        bigram_features = dict()
        for (w1,w2) in bigrams:
            bigram_features[u'collocation({0},{1})'.format(w1,w2)] = True
        #return feats
        feats =  feats.copy()
        feats.update(bigram_features)
        
    if cfg.length:
        feats["length({0})".format(length)] = True

    if cfg.rating:
        feats["rating({0})".format(score)] = True
    
    if cfg.sentiment1:
        feats["senti_Score({0})".format(sentiScore)] = True
    
    if cfg.sentiment2:
        feats["senti_Score_pos({0})".format(sentiScore_pos)] = True
        feats["senti_Score_neg({0})".format(sentiScore_neg)] = True
    
    #print(feats)

    return feats

In [12]:
debug_cfg = get_combined_cfgs_journal_version()


**Classifying the selected class using NaiveBayes used by the author**

In [13]:
def ReviewClassifier(cfg_id, cfg, selected_class):
    
#     Import train and test data
    train_data = data_import(selected_class)
    test_data = pd.read_excel("Dataset3_processed.xlsx")
    cfg = cfg
    
    
#     Selecting the column based on the configurations - train and test data
    if cfg.comment:
        train_data_features = []
        for row,label in zip(train_data['comment'],train_data['label']):
            train_data_features.append((row,label))
        test_data_feature = []
        for row in test_data['comment']:
            test_data_feature.append(row)
            
    elif cfg.remove_stopwords:
        train_data_features = []
        for row,label in zip(train_data['stopwords_removal_nltk'],train_data['label']):
            train_data_features.append((row,label))
        test_data_feature = []
        for row in test_data['stopwords_removal_nltk']:
            test_data_feature.append(row)
            
    elif cfg.lemmatized_comment:
        train_data_features = []
        for row,label in zip(train_data['lemmatized_comment'],train_data['label']):
            train_data_features.append((row,label))
        test_data_feature = []
        for row in test_data['lemmatized_comment']:
            test_data_feature.append(row)
             
    elif cfg.stopwords_removal_lemmatization:
        train_data_features = []
        for row,label in zip(train_data['stopwords_removal_lemmatization'],train_data['label']):
            train_data_features.append((row,label))
        test_data_feature = []
        for row in test_data['stopwords_removal_lemmatization']:
            test_data_feature.append(row)
            
    random.shuffle(train_data_features)
    
#     Set of words from the selected column of train data
    word_features = _get_words_from_dataset(train_data_features)
    
#     Dictionary that contains all the words that are present in word features and true for those present in each review that is used to train the model
    train_features_labels = [(extractor(d, word_features, train_data), c) for d, c in train_data_features]
    
    
#     Algorithm
    classifier = nltk.NaiveBayesClassifier.train(train_features_labels)
       
#     Dictionary that contains all the words that are present in word features and true for those present in each review that is used to test the model
    test_features_labels = [(extractor(d, word_features, test_data)) for d in test_data_feature]

    testsets = collections.defaultdict(set)
    predicted_labels = []
    for i, (feats) in enumerate(test_features_labels):
        observed = classifier.classify(feats)
        predicted_labels.append(observed)
        testsets[observed].add(i)
    
    test_data['Predicted_labels'] = predicted_labels 
    
    filename = f'{selected_class}_{cfg_id}_predicted.csv'
    
    test_data.to_csv(filename)

    
    return testsets


In [16]:
# Choose the class file to classify using NaivesBayes algorithm
selected_class = input("Select the class Bug, Feature, UserExperience, Rating: " )
testset_dict ={}
for cfg_id,cfg in debug_cfg.items():
    print(cfg_id)
    testset =  ReviewClassifier(cfg_id,cfg,selected_class)
    testset_dict[cfg_id] = testset



Select the class Bug, Feature, UserExperience, Rating: UserExperience
bow-comment
bigram-comment
bow-bigram-comment
bow-lemmatized_comment
bow-remove_stopwords
bow-stopwords_removal_lemmatization
bow-bigram-stopwords_removal_lemmatization
rating-comment
rating-comment-length
rating-comment-sentiment1-length
rating-comment-sentiment2-length
bow-rating-lemmatized_comment
bow-rating-comment-sentiment1
bigram-rating-comment-sentiment1
bigram-rating-stopwords_removal_lemmatization-sentiment2
bow-bigram-comment-sentiment1
bow-bigram-rating-lemmatized_comment
bow-bigram-remove_stopwords-rating-sentiment1
bow-rating-stopwords_removal_lemmatization-sentiment1
bow-rating-stopwords_removal_lemmatization-sentiment2


### Evaluation Metrics

In [17]:
metrics_data = pd.DataFrame(columns=['Classification Techniques','Precision','Recall','F1'])
cfg_list = []
precision_list = []
recall_list = []
f1score_list = []

for cfg_id,cfg in debug_cfg.items():
    filename = f'{selected_class}_{cfg_id}_predicted.csv'

    refset = collections.defaultdict(set)

    
    ref_data = pd.read_excel("Dataset3.xlsx")
    if selected_class == "Bug":
        for i, label in enumerate(ref_data['label_Bug']):
            refset[label].add(i)
    elif selected_class == "Feature":
        for i, label in enumerate(ref_data['label_Feature']):
            refset[label].add(i)
            
    elif selected_class == "UserExperience":
        for i, label in enumerate(ref_data['label_UserExperience']):
            refset[label].add(i)
    elif selected_class == "Rating":
        for i, label in enumerate(ref_data['label_Rating']):
            refset[label].add(i)
    #print(cfg_id)
    #print(testset_dict[cfg_id][label])
    p = nltk.precision(refset[label],testset_dict[cfg_id][label])
#     print("Precision: " , p)

    r = nltk.recall(refset[label],testset_dict[cfg_id][label])
#     print("Recall: ", r)

    f = nltk.f_measure(refset[label],testset_dict[cfg_id][label])
#     print("F1_Score: ", f)
    
    cfg_list.append(cfg_id)
    precision_list.append(p)
    recall_list.append(r)
    f1score_list.append(f)

metrics_data['Classification Techniques'] = cfg_list
metrics_data['Precision'] = precision_list
metrics_data['Recall'] = recall_list
metrics_data['F1'] = f1score_list
print("Metrics for: ", selected_class, "Reports")
metrics_data

Metrics for:  UserExperience Reports


Unnamed: 0,Classification Techniques,Precision,Recall,F1
0,bow-comment,0.641509,0.201183,0.306306
1,bigram-comment,0.487261,0.905325,0.63354
2,bow-bigram-comment,0.648352,0.349112,0.453846
3,bow-lemmatized_comment,0.642857,0.213018,0.32
4,bow-remove_stopwords,0.68,0.100592,0.175258
5,bow-stopwords_removal_lemmatization,0.666667,0.153846,0.25
6,bow-bigram-stopwords_removal_lemmatization,0.680556,0.289941,0.406639
7,rating-comment,0.455556,0.727811,0.560364
8,rating-comment-length,0.57971,0.473373,0.521173
9,rating-comment-sentiment1-length,0.541935,0.497041,0.518519
