In [1]:
import pandas as pd
import sys
sys.path.append('../../')
from config import Config
from experiments_utils import *
import pickle as pkl
from IPython.display import display, HTML

In [2]:
lexical_feats = ['ngram_feats', 'tfidf_feats']

pos_feats =  ['pos_feats']

len_feats = ['length_feats']

argumentation_feats = ['argumentation_feats']

named_entity_feats = ['named_entity_feats']

text_quality_feats = ['text_quality_feats']

content_value_feats = ['perspective_content_value_feats']

aggressiveness_feats = ['perspective_aggressiveness_feats']

toxicity_feats = ['perspecitive_toxicity_feats']

crowd_annotated_feats = ['constructiveness_chars_feats',
                         'non_constructiveness_chars_feats',
                         'toxicity_chars_feats']

perspective_feats = ['perspective_content_value_feats',
                     'perspective_aggressiveness_feats',
                     'perspecitive_toxicity_feats']

all_feats = (lexical_feats + pos_feats + len_feats + 
            argumentation_feats + named_entity_feats + 
            text_quality_feats + content_value_feats + 
            aggressiveness_feats + toxicity_feats)
            

#no_len_feats = lexical_feats + discourse_feats + text_quality_feats + named_entity_feats + perspective_feats 

In [3]:
lower_upper_ranges = [[10,20], [20, 30], [30, 40], [40, 50], [50, 60], [60, 70], [70, 80]]

In [4]:
def get_balanced_df(df):
    balanced_dfs = []
    for (lower, upper) in lower_upper_ranges:
        print('LOWER: ', lower)
        print('UPPER: ', upper)    
        subset_df = df[(df['comment_len'] >= lower) & (df['comment_len'] < upper)]
        d = subset_df['constructive'].value_counts().to_dict()
        lower_count = min(list(d.values()))
        con_subset_df = subset_df[subset_df['constructive'] == 1.0].sample(n=lower_count)
        print('Number of constructive samples: ', con_subset_df.shape[0])
        non_con_subset_df = subset_df[subset_df['constructive'] == 0.0].sample(n=lower_count)
        print('Number of non-constructive samples: ', non_con_subset_df.shape[0])
        balanced_dfs.extend([con_subset_df, non_con_subset_df])
        
    result_df = pd.concat(balanced_dfs)    
    return result_df

In [5]:
def evaluate_on_balanced_data(train_df, balanced_df, feat_sets):
    for feat_set in feat_sets:
        print('FEATURE SET: ', feat_set)
        model_path = Config.MODEL_PATH + 'tmp.pkl'
        print('TRAINING ON ', train_df.shape)
        train_and_save_model(train_df, model_path, feat_set)    
        predicted, targets = get_predictions_with_model(model_path, balanced_df)
        print('Performance on the balanced test set: ')
        get_eval_results(predicted, targets)

        print('\n\n**************************\n\n')

In [6]:
feat_sets = [lexical_feats,
             pos_feats,
             len_feats,
             argumentation_feats,
             named_entity_feats,
             text_quality_feats,
             content_value_feats, 
             aggressiveness_feats,
             toxicity_feats,
             #crowd_annotated_feats,
             all_feats
            ] 

## Train: Subset of CTC + SOCC*  Test: Balanced dataset of CTC + SOCC* 

In [7]:
all_SOCC_df = pd.read_csv(Config.ALL_SOCC_FEATURES_FILE_PATH)
all_SOCC_df['comment_len'] = all_SOCC_df['pp_comment_text'].apply(lambda x: len(x.split()))
SOCC_df = all_SOCC_df[all_SOCC_df['source'] == 'SOCC']

In [8]:
balanced_df = get_balanced_df(all_SOCC_df)

LOWER:  10
UPPER:  20
Number of constructive samples:  17
Number of non-constructive samples:  17
LOWER:  20
UPPER:  30
Number of constructive samples:  65
Number of non-constructive samples:  65
LOWER:  30
UPPER:  40
Number of constructive samples:  111
Number of non-constructive samples:  111
LOWER:  40
UPPER:  50
Number of constructive samples:  307
Number of non-constructive samples:  307
LOWER:  50
UPPER:  60
Number of constructive samples:  62
Number of non-constructive samples:  62
LOWER:  60
UPPER:  70
Number of constructive samples:  18
Number of non-constructive samples:  18
LOWER:  70
UPPER:  80
Number of constructive samples:  7
Number of non-constructive samples:  7


In [9]:
balanced_df.shape

(1174, 37)

In [10]:
train_df = all_SOCC_df[~all_SOCC_df['comment_counter'].isin(balanced_df['comment_counter'].tolist())]

In [11]:
train_df.shape

(10623, 37)

In [13]:
balanced_df.to_csv(Config.TRAIN_PATH + 'CTC_len_balanced_test.csv', index = False)
train_df.to_csv(Config.TRAIN_PATH + 'CTC-CTC_len_balanced_test.csv', index = False)

In [15]:
evaluate_on_balanced_data(train_df, balanced_df, feat_sets)

FEATURE SET:  ['ngram_feats', 'tfidf_feats']
TRAINING ON  (10623, 37)
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['ngram_feats', 'tfidf_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  10623 	Constructive ( 5870 ) 	Non constructive ( 4753 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/tmp.pkl
Size of the data:  1174 	Constructive ( 587 ) 	Non constructive ( 587 )
Performance on the balanced test set: 
Accuracy:  0.5383304940374787
Precision-recall for each class:  (array([0.77172061, 0.30494037]), array([0.5261324 , 0.57188498]), array([0.62569061, 0.39777778]), array([861, 313]))
                  precision    recall  f1-score

Results: 
macro_average => (0.5229982964224873, 0.5281945457456461, 0.4999589282400294, None)
weighted_average => (0.6151481996349062, 0.5229982964224872, 0.5460376646049451, None)
micro_average => (0.5229982964224872, 0.5229982964224872, 0.5229982964224872, None)
<class 'dict'>


**************************


FEATURE SET:  ['perspecitive_toxicity_feats']
TRAINING ON  (10623, 37)
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['perspecitive_toxicity_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  10623 	Constructive ( 5870 ) 	Non constructive ( 4753 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/tmp.pkl
Size of the data:  1174 	Con

## Train: Subset of CTC  Test: Balanced dataset of CTC

In [16]:
CTC_df = pd.read_csv(Config.CTC_FEATURES_FILE_PATH)
CTC_df['comment_len'] = CTC_df['pp_comment_text'].apply(lambda x: len(x.split()))

In [34]:
results = run_cross_validation_experiments(CTC_df, ['constructiveness_chars_feats', 'non_constructiveness_chars_feats'], scoring ='f1')    

Size of the data:  10762 	Constructive ( 5906 ) 	Non constructive ( 4856 )
IN experiments_utils...
comments_col:  pp_comment_text
Cross validation folds:  10
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['constructiveness_chars_feats', 'non_constructiveness_chars_feats']
COMMENTS COL:  pp_comment_text


In [35]:
results

{'f1 scores': array([0.88471392, 0.957795  , 0.9645507 , 0.94401378, 0.91080617,
        0.90721649, 0.90686275, 0.85759494, 0.89789303, 0.91673537]),
 'mean f1': 0.914818215526098,
 'variance': 0.0009780787264181167}

In [17]:
CTC_df.shape

(10762, 56)

In [18]:
balanced_df = get_balanced_df(CTC_df)

LOWER:  10
UPPER:  20
Number of constructive samples:  11
Number of non-constructive samples:  11
LOWER:  20
UPPER:  30
Number of constructive samples:  33
Number of non-constructive samples:  33
LOWER:  30
UPPER:  40
Number of constructive samples:  78
Number of non-constructive samples:  78
LOWER:  40
UPPER:  50
Number of constructive samples:  282
Number of non-constructive samples:  282
LOWER:  50
UPPER:  60
Number of constructive samples:  46
Number of non-constructive samples:  46
LOWER:  60
UPPER:  70
Number of constructive samples:  11
Number of non-constructive samples:  11
LOWER:  70
UPPER:  80
Number of constructive samples:  5
Number of non-constructive samples:  5


In [19]:
balanced_df.shape

(932, 56)

In [33]:
balanced_df.to_csv(Config.TRAIN_PATH + 'length_balanced_CTC.csv')

In [22]:
train_df = CTC_df[~CTC_df['comment_counter'].isin(balanced_df['comment_counter'].tolist())]

In [23]:
train_df.shape

(9830, 56)

In [24]:
evaluate_on_balanced_data(train_df, balanced_df, feat_sets)

FEATURE SET:  ['ngram_feats', 'tfidf_feats']
TRAINING ON  (9830, 56)
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['ngram_feats', 'tfidf_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  9830 	Constructive ( 5440 ) 	Non constructive ( 4390 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/tmp.pkl
Size of the data:  932 	Constructive ( 466 ) 	Non constructive ( 466 )
Performance on the balanced test set: 
Accuracy:  0.5536480686695279
Precision-recall for each class:  (array([0.7360515 , 0.37124464]), array([0.53930818, 0.58445946]), array([0.62250454, 0.45406824]), array([636, 296]))
                  precision    recall  f1-score   

Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/tmp.pkl
Size of the data:  932 	Constructive ( 466 ) 	Non constructive ( 466 )
Performance on the balanced test set: 
Accuracy:  0.528969957081545
Precision-recall for each class:  (array([0.30901288, 0.74892704]), array([0.55172414, 0.52011923]), array([0.39614856, 0.61389622]), array([261, 671]))
                  precision    recall  f1-score   support

non-constructive       0.55      0.31      0.40       466
    constructive       0.52      0.75      0.61       466

     avg / total       0.54      0.53      0.51       932

Results: 
macro_average => (0.5289699570815452, 0.5359216814841461, 0.5050223869131223, None)
weighted_average => (0.6257321925251893, 0.528969957081545, 0.5529175272499678, None)
micro_average => (0.528969957081545, 0.528969957081545, 0.528969957081545, None)
<class 'dict'>


**************************


FEATURE SET:  ['perspecitive_toxicity_feats']
TRAINING ON  (9830, 56)
C

In [31]:
feat_sets_crowd_feats = [crowd_annotated_feats, 
                         ['constructiveness_chars_feats', 'non_constructiveness_chars_feats'],                         
                         ['toxicity_chars_feats']
                        ]

In [32]:
evaluate_on_balanced_data(train_df, balanced_df, feat_sets_crowd_feats)

FEATURE SET:  ['constructiveness_chars_feats', 'non_constructiveness_chars_feats', 'toxicity_chars_feats']
TRAINING ON  (9830, 56)
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['constructiveness_chars_feats', 'non_constructiveness_chars_feats', 'toxicity_chars_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  9830 	Constructive ( 5440 ) 	Non constructive ( 4390 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/tmp.pkl
Size of the data:  932 	Constructive ( 466 ) 	Non constructive ( 466 )
Performance on the balanced test set: 
Accuracy:  0.8175965665236051
Precision-recall for each class:  (array([0.80686695, 0.82832618]), array([0.82

In [42]:
df = pd.read_csv(Config.ALL_DATASETS_ALL_FEATURES_FILE_PATH)
NYT_YNC_df = df[df['source'].isin(['NYTPicks', 'YNACC'])]

In [43]:
for feat_set in feat_sets:
    print('FEATURE SET: ', feat_set)
    model_path = Config.MODEL_PATH + 'nyt_ync_svm.pkl'
    print('TRAINING ON NYT YNC subset...')
    train_and_save_model(NYT_YNC_df, model_path, feat_set)    
    predicted, targets = get_predictions_with_model(model_path, balanced_df)
    print('Performance on SOCC: ')
    get_eval_results(predicted, targets)
    
    #predicted, targets = get_predictions_with_model(model_path, SOCC_df)
    #print('Performance on SOCC: ')
    #get_eval_results(predicted, targets)
    print('\n\n**************************\n\n')    

FEATURE SET:  ['ngram_feats', 'tfidf_feats']
TRAINING ON NYT YNC subset...
Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['ngram_feats', 'tfidf_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  30325 	Constructive ( 15147 ) 	Non constructive ( 15178 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/nyt_ync_svm.pkl
Size of the data:  1174 	Constructive ( 587 ) 	Non constructive ( 587 )
Performance on SOCC: 
Accuracy:  0.5945485519591142
Precision-recall for each class:  (array([0.81771721, 0.3713799 ]), array([0.56537102, 0.67076923]), array([0.66852368, 0.47807018]), array([849, 325]))
                  precision    recall  f1-score  

Classifier:  SGDClassifier(alpha=0.001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=50, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=0.0001, verbose=0, warm_start=False)
Feature set:  ['ngram_feats', 'tfidf_feats', 'pos_feats', 'length_feats', 'argumentation_feats', 'named_entity_feats', 'text_quality_feats', 'perspective_content_value_feats', 'perspective_aggressiveness_feats', 'perspecitive_toxicity_feats']
COMMENTS COL:  pp_comment_text
Size of the training data:  30325 	Constructive ( 15147 ) 	Non constructive ( 15178 )
Model trained and pickled in file:  /home/vkolhatk/data/Constructiveness_public/models/nyt_ync_svm.pkl
Size of the data:  1174 	Constructive ( 587 ) 	Non constructive ( 587 )
Performance on SOCC: 
Accuracy:  0.6192504258943782
Precision-recall for each class:  (array([0.50085179, 0.73764906]), array([0.65

In [15]:
L = [1, 45, 75,3]

In [16]:
min(L)

1

## SOCC_df contains instances of annotated SOCC with the new annotation scheme

In [7]:
SOCC_df = training_feats_df[training_feats_df['source'] == 'SOCC']

## all_SOCC_df contains all instances of annotated SOCC 

In [8]:
d = SOCC_df['constructive'].value_counts().to_dict()

In [9]:
all_SOCC_df = training_feats_df[training_feats_df['source'].str.endswith('SOCC')]

## Feature sets 

In [11]:
def pretty_print_results(avg_results_dict):
    for feat_set, results_dict in avg_results_dict.items():
        print('FEATURE SET: ', feat_set)
        for (test_subset, res) in results_dict.items():
            raw_html = '<h2>' + test_subset + '</h2>'
            display(HTML(raw_html))
            df = pd.DataFrame.from_dict(res, orient='index')
                               #,columns=['Recall', 'Precision', 'F-score', 'Dummy'])
            display(HTML(df.to_html()))