In [1]:
import pandas as pd
import numpy as np
import json

from numpy import random as rd
from collections import Counter

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score

   
def get_accuracy_estimate(truths, predicted):

    print(classification_report(truths, predicted))
    print(' TEXT Classifier Accuracy: ', accuracy_score(truths, predicted))

    print('f1_score micro: ', f1_score(truths, predicted, average='micro'))
    print('f1_score macro: ', f1_score(truths, predicted, average='macro'))    
    print('f1_score wieghted: ', f1_score(truths, predicted, average='weighted'))        

    return f1_score(truths, predicted, average='micro'), f1_score(truths, predicted, average='macro'), f1_score(truths, predicted, average='weighted')


    

## Load dataset

In [2]:
df = pd.read_csv('./data/Contentious_pairs_all.csv', encoding="utf-8", 
                 dtype={"response_id":str, "target_id":str,
                       "response_user":str, "target_user":str} ).fillna('')


df['present_in_train'] = None
df['np_response_text_ada_embedding'] = df['response_text_ada_embedding'].apply(eval).apply(np.array)
df['np_target_text_ada_embedding'] = df['target_text_ada_embedding'].apply(eval).apply(np.array)

df['np_target_user_embedding'] = df['target_user_embedding_8'].apply(eval).apply(np.array)
df['np_response_user_embedding'] = df['response_user_embedding_8'].apply(eval).apply(np.array)


df

Unnamed: 0,event,response_id,target_id,interaction_type,label,label_expanded,Confidence_Level,response_text,target_text,truncated,...,response_user,target_user_embedding,response_user_embedding,target_user_embedding_8,response_user_embedding_8,present_in_train,np_response_text_ada_embedding,np_target_text_ada_embedding,np_target_user_embedding,np_response_user_embedding
0,Santa_Fe_Shooting,997626508050157568,997598447376175104,Quote,Support,Implicit_Support,1.0,"Seriously, wtf is wrong with our political sys...",More children have been killed in schools this...,False,...,18655355,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.025092221796512604, -0.013276644051074982,...","[-0.008838048204779625, -0.003347944701090455,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
1,Santa_Fe_Shooting,997575042027458561,997573240380968961,Quote,Denial,Explicit_Denial,1.0,Ma calls BS! https://t.co/bodEWN5Q4C,Former GOP Rep. Jason Chaffetz: 'Politically c...,False,...,3176702526,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...",,"[-0.033440690487623215, 0.007639116141945124, ...","[-0.027197668328881264, 0.0034390492364764214,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1..."
2,Santa_Fe_Shooting,997540582846271494,997535659870117888,Quote,Denial,Explicit_Denial,1.0,"On average, there’s one fake stat about school...","On average, that’s one school shooting every w...",False,...,1646856415,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.023548724129796028, 0.022269316017627716, ...","[-0.0044773295521736145, -0.014855715446174145...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
3,General_Terms,1018569947817992192,1017909301040635904,Quote,Denial,Implicit_Denial,1.0,ONE MIGHT BE MADE UP AND NOT REAL. NOT SURE. S...,I’m so confused... - When we were attacked on ...,False,...,766475610059317248,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.004989785607904196, -0.004917326848953962, ...","[-0.013004736974835396, -0.01780531369149685, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
4,General_Terms,1019395289575239680,1017919759474622464,Quote,Denial,Explicit_Denial,1.0,"False, adult friendships are M-F, 9-5 https://...",Adult friendships https://t.co/Cn3r9l4pZJ,False,...,59621769,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.023379407823085785, 0.010180916637182236, -...","[0.01642121560871601, 0.0012021968141198158, 0...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5045,Santa_Fe_Shooting,997648170774654976,997645387669299200,Reply,Comment,Comment,2.0,@cenkuygur @realDonaldTrump @NRA NRA is funded...,"Hey #MAGA guys, I thought @realDonaldTrump was...",False,...,2948973487,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.03429972380399704, 0.0034139070194214582, ...","[-0.039812665432691574, 0.005698402877897024, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5046,Santa_Fe_Shooting,998085810636259328,997484633695469570,Reply,Comment,Comment,2.0,@ScottMcGrew There is no right answer to this ...,Guns and prayers: A man shows up to the Santa ...,False,...,103706087,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.023522792384028435, 0.003550187451764941, ...","[-0.017941860482096672, 0.003447071649134159, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5047,Santa_Fe_Shooting,998318329474400256,998316225435504640,Reply,Comment,Comment,2.0,"@cameron_kasky @NRA Sadly, there’s also so muc...",It makes me so furious how the @NRA behaves as...,False,...,412350904,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.004335987847298384, -0.008479560725390911, ...","[-0.030558191239833832, -0.010756535455584526,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5048,Santa_Fe_Shooting,999387940890578945,999321499470368768,Quote,Comment,Comment,2.0,What is freaking wrong w/the prior administrat...,BREAKING: Emails reveal cynical exchange betwe...,False,...,975247231925223424,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.005231035407632589, -0.024064065888524055,...","[-0.030978472903370857, -0.01256611943244934, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."


## Utility functions

In [3]:
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import collections
from sklearn.model_selection import train_test_split
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch import nn
import torch as T
import torch.nn.functional as F
from torch.optim import Adam
from random import sample


device = T.device('cpu')

# Set seed for reproducibility
torch.manual_seed(1234)

# X, y = get_data(...)
# y_pred = model.predict(X)
# f1_score(y, y_pred)

LEARNING_RATE = 1e-2
NUM_EPOCHS = 50
BATCH_SIZE = 32



def accuracy_fn(y_pred, y_true):
    n_correct = torch.eq(y_pred, y_true).sum().item()
    accuracy = (n_correct / len(y_pred)) * 100
    return accuracy



class LinearModel(nn.Module):
    def __init__(self, input_dim, hidden_dim,hidden_dim2, num_classes):
        super(LinearModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc12 = nn.Linear(hidden_dim, hidden_dim2)        
        self.fc13 = nn.Linear( hidden_dim2, hidden_dim2)                
        self.fc2 = nn.Linear(hidden_dim2, num_classes)

    def forward(self, x_in):
        z = F.relu(self.fc1(x_in)) # linear activation
        z1 = F.relu(self.fc12(z))        
        z12 = F.relu(self.fc13(z1))          
        z2 = self.fc2(z12)        
        return [z1, z2]
    

class ContrastiveLoss(T.nn.Module):
  def __init__(self, m=2.0):
    super(ContrastiveLoss, self).__init__()  # pre 3.3 syntax
    self.m = m  # margin or radius

  def forward(self, y1, y2, d=0):
    # d = 0 means y1 and y2 are supposed to be same
    # d = 1 means y1 and y2 are supposed to be different
    
    euc_dist = T.nn.functional.pairwise_distance(y1, y2)

    if d == 0:
      return T.mean(T.pow(euc_dist, 2))  # distance squared
    else:  # d == 1
      delta = self.m - euc_dist  # sort of reverse distance
      delta = T.clamp(delta, min=0.0, max=None)
      return T.mean(T.pow(delta, 2))  # mean over all rows
    
# -----------------------------------------------------------


# loss_func = ContrastiveLoss()


def contrastive_loss_fn(X_train, y_train, N = 200):
    
    total_loss = 0
    for i in sample(range(0, len(y_train)), N):
        y_train_val = y_train[i]
        x_train = X_train[i]

        for j  in sample(range(0, len(y_train)), N):    
            y_train1_val = y_train[j]        
            x_train1 = X_train[j]    

            if i != j:
#                 print(y_train_val, y_train1_val)
                if y_train_val == y_train1_val:
                    loss = loss_func(x_train, x_train1, 0)
                    total_loss += loss
#                     print('loss1: ', loss)
                else:
                    loss = loss_func(x_train, x_train1, 1)
                    total_loss += loss                    
#                     print('loss2:', loss)

    return total_loss/len(y_train)
    

def majority_classifier(X_train, y_train, X_test, y_test):
    
    majority_clss = Counter(y_test).most_common(1)[0][0] #np.random.choice([2,3], len(truths))
    y_pred = [majority_clss]*len(y_test)
        
    return y_pred, y_test
    
            
            
def random_classifier(X_train, y_train, X_test, y_test):
    
    y_pred = np.random.choice(list(set(y_test)), len(y_test))
        
    return y_pred, y_test
    
    
    
def NN_train(X_train, y_train, X_test, y_test, contrastive_loss = False):
    INPUT_DIM = np.asarray(X_train).shape[1] # X is 2-dimensional
    HIDDEN_DIM = 100
    HIDDEN_DIM2 = 20
    NUM_CLASSES = len(set(y_train)) # 3 classes


    # Output vectorizer
    label_encoder = LabelEncoder()

    # Fit on train data
    label_encoder = label_encoder.fit(y_train)
    classes = list(label_encoder.classes_)
#         print (f"classes: {classes}")


    # Convert labels to tokens
#         print (f"y_train[0]: {y_train[0]}")
    y_train = label_encoder.transform(y_train)
    # y_val = label_encoder.transform(y_val)
    y_test = label_encoder.transform(y_test)
#         print (f"y_train[0]: {y_train[0]}")


    # Class weights
    counts = np.bincount(y_train)
    class_weights = {i: 1.0/count for i, count in enumerate(counts)}
#         print (f"counts: {counts}\nweights: {class_weights}")


    # Define Loss
    class_weights_tensor = torch.Tensor(list(class_weights.values()))
    loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

    # Initialize model
    model = LinearModel(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, hidden_dim2 = HIDDEN_DIM2, num_classes=NUM_CLASSES)
#     print (model.named_parameters)

    # Optimizer
    optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

#     # Convert data to tensors
    X_train = torch.Tensor(X_train)
    y_train = torch.LongTensor(y_train)

    X_val = torch.Tensor(X_test)
    y_val = torch.LongTensor(y_test)
    
    fscores = []

    # Training
    for epoch in range(NUM_EPOCHS):
        # Forward pass
        z1, z2 = model(X_train)

        y_pred = z2


        loss_supervised = loss_fn(y_pred, y_train)
    
    
        if contrastive_loss:
            loss_contrastive = contrastive_loss_fn(z1, y_train, N = 50)
            
            loss = loss_supervised + 0.2*loss_contrastive
        else:
            loss = loss_supervised  #+ 0.2*loss_contrastive
            
            
        # Zero all gradients
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        best_f = 0
        best_f_y_pred = []
        best_f_y_test = []
        if epoch%1==0:
            predictions = y_pred.max(dim=1)[1] # class
            accuracy = accuracy_fn(y_pred=predictions, y_true=y_train)
            fscore = f1_score(predictions, y_train , average='macro')
#             print (f"Training -- Epoch: {epoch} | loss: {loss:.2f}, accuracy: {accuracy:.1f}, f1score: {fscore:.1f}")

            z1, z2 = model(X_val)
            y_pred = F.softmax(z2, dim=1)
            _, y_pred = y_pred.max(dim=1)
            
#             accuracy = accuracy_fn(y_pred= y_pred, y_true=y_test)
#             fscore = f1_score(y_pred,y_test )

            accuracy = accuracy_fn(y_pred=y_pred, y_true=y_val)
            fscore = f1_score(y_pred, y_val , average='macro')
            
        if fscore > best_f:
            best_f  = fscore
            best_f_y_pred = y_pred
            best_f_y_test = y_test
            
#             print ( f" Validation -- Epoch: {epoch} | loss: {loss:.2f}, accuracy: {accuracy:.2f}, f1score: {fscore:.2f}")        
            
            fscores.append(fscore)    
            
    
    return best_f_y_pred, best_f_y_test


def get_text(tweet):
    tweetText = re.sub(r"(?:\@|https?\://)\S+", "", tweet) # r'(?:@[\w_]+)'
    tweetText = tweetText.strip().replace('rt', '').replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    
    return tweetText
 


def get_statce_stats(emb, label):
    y_counts = {}
    y_Xs = {}
    y_indices = {}
    for i, (emb, label) in enumerate(zip(emb, label)):
        if label in y_counts:
            y_counts[label] = y_counts[label]  + 1
        else:
            y_counts[label] = 1    

        if label in y_Xs:
            y_Xs[label].append(emb)
        else:
            y_Xs[label] = [emb]  
            
        if label in y_indices:
            y_indices[label].append(i)
        else:
            y_indices[label] = [i]  

    print('lables : ', y_counts)
    
    return y_counts, y_Xs, y_indices

def balance_stance_classes(emb_vec, labels):
    y_counts, y_Xs, y_indices = get_statce_stats(emb_vec, labels)  
    
        
    majority_label = 0
    minority_label = 1
    majority_count = 0
    for key, val in y_counts.items():
        if val > majority_count:
            
            minority_label = majority_label
            majority_label = key
            majority_count= val
        else:
            minority_label = key
        
    new_emb_vec = []    
    new_labels = []
    for label in y_counts.keys():
        if label != majority_label:
            minority_label = label
            
            if y_counts[majority_label] > y_counts[minority_label]:
                additional_samples_count = y_counts[majority_label] - y_counts[minority_label]

                indices = rd.choice(list(y_indices[minority_label]), size=additional_samples_count, replace=True)
                for index in indices:
                    new_emb_vec.append(emb_vec[index])
                    new_labels.append(labels[index])
            

    y_counts, y_Xs, y_indices  = get_statce_stats(new_emb_vec + emb_vec, new_labels + labels)        
    

    return  new_emb_vec + emb_vec, new_labels + labels


## Define Sentiment Classifiers

In [4]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from textblob import TextBlob


def preprocess_text(text):

    # Tokenize the text

    tokens = word_tokenize(text.lower())

    # Remove stop words

    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]

    # Lemmatize the tokens

    lemmatizer = WordNetLemmatizer()

    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]

    # Join the tokens back into a string
    processed_text = ' '.join(lemmatized_tokens)

    return processed_text

# create get_sentiment function
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):

    scores = analyzer.polarity_scores(text)

    sentiment = 1 if scores['pos'] > scores['neg'] else -1
#     sentiment = -1 if scores['neg'] > 0 else 0
    return sentiment


def textblob_sentiment_analysis(text):
    
    score = TextBlob(text).sentiment.polarity
    if score < 0:
        return -1
    elif score >= 0:
        return 1
    
    return score


## Define Classifiers

In [5]:
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier

classifier = ('svm12', Pipeline([('vect', CountVectorizer()),
                                             ('tfidf', TfidfTransformer()),
                                             ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                                                       alpha=1e-3, max_iter=15, random_state=42)),
                                            ])
                                        , {'vect__ngram_range': [(1, 1), (1, 2)], 
                                          'tfidf__use_idf': (True, False),
                                          'clf-svm__alpha': (1e-2, 1e-3),
                                         })

clf_name, sgd_text_clf, __  = classifier

classifier = ('svm12', Pipeline([('vect', CountVectorizer()),
                                             ('tfidf', TfidfTransformer()),
                                             ('clf-svm', MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)),
                                            ])
                                        , {'vect__ngram_range': [(1, 1), (1, 2)], 
                                          'tfidf__use_idf': (True, False),
                                          'clf-svm__alpha': (1e-2, 1e-3),
                                         })

clf_name, mlp_text_clf, __  = classifier

mlp_clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
sgd_clf = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=15, random_state=42)



## Filter data

In [6]:




df['target_text_cleaned'] = df['target_text'].apply(preprocess_text)
df['response_text_cleaned'] = df['response_text'].apply(preprocess_text)


df['target_text_sentiment_vader'] = df['target_text_cleaned'].apply(get_sentiment)
df['response_text_sentiment_vader'] = df['response_text_cleaned'].apply(get_sentiment)


df['target_text_sentiment_textblob'] = df['target_text_cleaned'].apply(textblob_sentiment_analysis)
df['response_text_sentiment_textbolb'] = df['response_text_cleaned'].apply(textblob_sentiment_analysis)

df_filtered = df


In [7]:
## Visualize data

print(df_filtered.columns)

df_filtered

Index(['event', 'response_id', 'target_id', 'interaction_type', 'label',
       'label_expanded', 'Confidence_Level', 'response_text', 'target_text',
       'truncated', 'response_text_ada_embedding', 'target_text_ada_embedding',
       'target_user', 'response_user', 'target_user_embedding',
       'response_user_embedding', 'target_user_embedding_8',
       'response_user_embedding_8', 'present_in_train',
       'np_response_text_ada_embedding', 'np_target_text_ada_embedding',
       'np_target_user_embedding', 'np_response_user_embedding',
       'target_text_cleaned', 'response_text_cleaned',
       'target_text_sentiment_vader', 'response_text_sentiment_vader',
       'target_text_sentiment_textblob', 'response_text_sentiment_textbolb'],
      dtype='object')


Unnamed: 0,event,response_id,target_id,interaction_type,label,label_expanded,Confidence_Level,response_text,target_text,truncated,...,np_response_text_ada_embedding,np_target_text_ada_embedding,np_target_user_embedding,np_response_user_embedding,target_text_cleaned,response_text_cleaned,target_text_sentiment_vader,response_text_sentiment_vader,target_text_sentiment_textblob,response_text_sentiment_textbolb
0,Santa_Fe_Shooting,997626508050157568,997598447376175104,Quote,Support,Implicit_Support,1.0,"Seriously, wtf is wrong with our political sys...",More children have been killed in schools this...,False,...,"[-0.025092221796512604, -0.013276644051074982,...","[-0.008838048204779625, -0.003347944701090455,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",child killed school year serving military . le...,"seriously , wtf wrong political system ! polit...",-1,-1,-1,-1
1,Santa_Fe_Shooting,997575042027458561,997573240380968961,Quote,Denial,Explicit_Denial,1.0,Ma calls BS! https://t.co/bodEWN5Q4C,Former GOP Rep. Jason Chaffetz: 'Politically c...,False,...,"[-0.033440690487623215, 0.007639116141945124, ...","[-0.027197668328881264, 0.0034390492364764214,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...",former gop rep. jason chaffetz : 'politically ...,call b ! http : //t.co/bodewn5q4c,-1,-1,1,1
2,Santa_Fe_Shooting,997540582846271494,997535659870117888,Quote,Denial,Explicit_Denial,1.0,"On average, there’s one fake stat about school...","On average, that’s one school shooting every w...",False,...,"[-0.023548724129796028, 0.022269316017627716, ...","[-0.0044773295521736145, -0.014855715446174145...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","average , ’ one school shooting every week yea...","average , ’ one fake stat school shooting idio...",-1,-1,1,-1
3,General_Terms,1018569947817992192,1017909301040635904,Quote,Denial,Implicit_Denial,1.0,ONE MIGHT BE MADE UP AND NOT REAL. NOT SURE. S...,I’m so confused... - When we were attacked on ...,False,...,"[0.004989785607904196, -0.004917326848953962, ...","[-0.013004736974835396, -0.01780531369149685, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","’ confused ... - attacked 9/11 , american came...",one might made real . sure . still researching...,-1,1,-1,1
4,General_Terms,1019395289575239680,1017919759474622464,Quote,Denial,Explicit_Denial,1.0,"False, adult friendships are M-F, 9-5 https://...",Adult friendships https://t.co/Cn3r9l4pZJ,False,...,"[0.023379407823085785, 0.010180916637182236, -...","[0.01642121560871601, 0.0012021968141198158, 0...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",adult friendship http : //t.co/cn3r9l4pzj,"false , adult friendship m-f , 9-5 http : //t....",1,1,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5045,Santa_Fe_Shooting,997648170774654976,997645387669299200,Reply,Comment,Comment,2.0,@cenkuygur @realDonaldTrump @NRA NRA is funded...,"Hey #MAGA guys, I thought @realDonaldTrump was...",False,...,"[-0.03429972380399704, 0.0034139070194214582, ...","[-0.039812665432691574, 0.005698402877897024, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","hey # maga guy , thought @ realdonaldtrump win...",@ cenkuygur @ realdonaldtrump @ nra nra funded...,-1,-1,-1,1
5046,Santa_Fe_Shooting,998085810636259328,997484633695469570,Reply,Comment,Comment,2.0,@ScottMcGrew There is no right answer to this ...,Guns and prayers: A man shows up to the Santa ...,False,...,"[-0.023522792384028435, 0.003550187451764941, ...","[-0.017941860482096672, 0.003447071649134159, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",gun prayer : man show santa fe high school ( t...,@ scottmcgrew right answer man american flag l...,-1,1,1,1
5047,Santa_Fe_Shooting,998318329474400256,998316225435504640,Reply,Comment,Comment,2.0,"@cameron_kasky @NRA Sadly, there’s also so muc...",It makes me so furious how the @NRA behaves as...,False,...,"[0.004335987847298384, -0.008479560725390911, ...","[-0.030558191239833832, -0.010756535455584526,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",make furious @ nra behaves organization many n...,"@ cameron_kasky @ nra sadly , ’ also much $ $ ...",1,-1,1,1
5048,Santa_Fe_Shooting,999387940890578945,999321499470368768,Quote,Comment,Comment,2.0,What is freaking wrong w/the prior administrat...,BREAKING: Emails reveal cynical exchange betwe...,False,...,"[-0.005231035407632589, -0.024064065888524055,...","[-0.030978472903370857, -0.01256611943244934, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",breaking : email reveal cynical exchange obama...,freaking wrong w/the prior administration ... ...,-1,-1,-1,-1


## Event wise evaluation of algorithms

In [14]:

train_embedding_label = []
test_embedding_label = []
train_count = 0
test_count = 0


stance_embedding_size = 8
event_fscores = {}

for event in { 'Iran_Deal', 'Santa_Fe_Shooting', 'Student_Marches'}:        
    
    stance_df = pd.read_csv('./data/' + event + '_userstances.csv', encoding="utf-8", 
                 dtype={"response_user":str, "target_user":str} ).fillna('')

        
    df_event = df_filtered[df_filtered['event'] == event] #  & 
    
    df_event = df_event.merge(stance_df, how='left', on=['target_user', 'response_user'])
    

    
    f_scores_random = []    
    f_scores_majority = [] 
    f_scores_vader = []
    f_scores_textblob = []
    f_scores_mlp = []
    f_scores_sgd = []


    f_scores_mlp_embeddings = []
    f_scores_sgd_embeddings = []
    f_scores_nn_embeddings = []
    
    f_scores_node_embedding = []
    f_scores_stance_embedding = []
    f_scores_node_stance_embedding = []

    
    print(event) 
    
    for iteration in range(0, 10):
    
        df_event_train, df_event_test = train_test_split(df_event, test_size=0.3)

#         print(event, len(df_event_train), len(df_event_test))

        X_train = []
        X_test = []    

        X_train_node_embedding = []
        X_test_node_embedding = []    

        X_train_stance_embedding = []
        X_test_stance_embedding = []    


        X_train_node_stance_embedding = []
        X_test_node_stance_embedding = []    


        y_train = []    
        y_test = []

        vader_sentiment_test = []
        blob_sentiment_test = []        
        y_test_sentiment = []
        y_train_sentiment = []
        

        train_replies = []
        test_replies = []        
#         print(len(df_event_train))
        
        
        for idx, train_row in df_event_train.iterrows():
                        
            target_node = train_row['target_user']
            response_node = train_row['response_user']
            response_embedding = train_row['np_response_text_ada_embedding']
            target_embedding = train_row['np_target_text_ada_embedding']
            target_user_embeddings = train_row['np_target_user_embedding']
            response_user_embeddings = train_row['np_response_user_embedding']
            target_user_stance = train_row['target_user_stance']
            response_user_stance = train_row['response_user_stance']
            label = train_row['label']   
            train_replies.append(train_row['response_text_cleaned'])

            embedding = np.concatenate([response_embedding , target_embedding ], axis = 0)                
            X_train.append(embedding)

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings ], axis = 0) 
            X_train_node_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_train_stance_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_train_node_stance_embedding.append(embedding)            

            y_train.append(label)
            
            if label == 'Support':
                y_train_sentiment.append(1)

            elif label == 'Denial':
                y_train_sentiment.append(-1)
            else:
                y_train_sentiment.append(0)
            

            train_count += 1
            

        for idx, test_row in df_event_test.iterrows():

            target_node = test_row['target_user']
            response_node = test_row['response_user']
            response_embedding = test_row['np_response_text_ada_embedding']
            target_embedding = test_row['np_target_text_ada_embedding']
            target_user_embeddings = test_row['np_target_user_embedding']
            response_user_embeddings = test_row['np_response_user_embedding']
            target_user_stance= test_row['target_user_stance']
            response_user_stance = test_row['response_user_stance']
            label = test_row['label']
            test_replies.append(test_row['response_text_cleaned'])            


            embedding = np.concatenate([response_embedding , target_embedding ], axis = 0)                
            X_test.append(embedding)

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings ], axis = 0) 
            X_test_node_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_test_stance_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_test_node_stance_embedding.append(embedding)  

            y_test.append(label)
            
            if label == 'Support':
                y_test_sentiment.append(1)

            elif label == 'Denial':
                y_test_sentiment.append(-1)
            else:
                y_test_sentiment.append(0)

            
            vader_sentiment_test.append( test_row['response_text_sentiment_vader'])
            blob_sentiment_test.append( test_row['response_text_sentiment_textbolb'])
            

            test_count += 1    


        print(len(X_train), len(y_train), len(X_test), len(y_test))
        
        ## 1. Random classifier
        
        preds, y_test_out = random_classifier(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Random Classifier - fscore: ', fscore)    
        f_scores_random.append(fscore)
        
        ## 2. Majority classifier      
        
        preds, y_test_out = majority_classifier(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Majority Classifier - fscore: ', fscore)    
        f_scores_majority.append(fscore)
        
        ## 3. Vader classifier                
        
        fscore = f1_score(vader_sentiment_test, y_test_sentiment , average='macro')
        print('Vader Sentiment Classifier - fscore: ', fscore)    
        f_scores_vader.append(fscore)

        
        
        ## 4. TextBlob classifier
        
        fscore = f1_score(blob_sentiment_test, y_test_sentiment , average='macro')
        print('TextBlob Sentiment Classifier - fscore: ', fscore)    
        f_scores_textblob.append(fscore)        
        
        
        
        ## 5. Baseline Supervised Classifiers
        
        sgd_text_clf.fit(train_replies,  y_train)

        preds = sgd_text_clf.predict(y_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('SVM Text  Model - fscore: ', fscore)
        f_scores_sgd.append(fscore)
        
        
        mlp_text_clf.fit(train_replies,  y_train)

        preds = sgd_text_clf.predict(y_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('MLP Text  Model - fscore: ', fscore)
        f_scores_mlp.append(fscore)
        
                
        
        ## 6. Open AI GPT - Text Embedding Models
    
        mlp_clf.fit(X_train, y_train)
        preds = mlp_clf.predict(X_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('Text Embedding + MLP Model - fscore: ', fscore)
        f_scores_sgd_embeddings.append(fscore)
        
        
        sgd_clf.fit(X_train, y_train)
        preds = sgd_clf.predict(X_test)        
        fscore = f1_score(preds, y_test , average='macro')
        print('Text Embedding + MLP Model - fscore: ', fscore)
        f_scores_mlp_embeddings.append(fscore)        
    
    
        preds, y_test_out = NN_train(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Graph Embeddings - fscore: ', fscore)
        f_scores_nn_embeddings.append(fscore)
        
    
        ## 7. Open AI GPT Text Embeddings + User Embeddings        

        preds, y_test_out = NN_train(X_train_node_embedding, y_train, X_test_node_embedding, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Graph Embeddings - fscore: ', fscore)
        f_scores_node_embedding.append(fscore)

        preds, y_test_out = NN_train(X_train_stance_embedding, y_train, X_test_stance_embedding, y_test)    
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Opinion Embeddings - fscore: ', fscore)
        f_scores_stance_embedding.append(fscore)


        preds, y_test_out = NN_train(X_train_node_stance_embedding, y_train, X_test_node_stance_embedding, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('fText Embeddings + User Graph + Opinion Embeddings -  score: ', fscore)
        f_scores_node_stance_embedding.append(fscore)

    print(event, ' Random based: ', np.mean(f_scores_random))          
    print(event, ' Majority based: ', np.mean(f_scores_majority))      
    print(event, ' Vader Sentiment based: ', np.mean(f_scores_vader))      
    print(event, ' Textblob Sentiment based: ', np.mean(f_scores_textblob))      
    print(event, ' SVM Text based: ', np.mean(f_scores_sgd))          
    print(event, ' MLP Text based: ', np.mean(f_scores_mlp))     
    print(event, ' Text Emdedding MLP based: ', np.mean(f_scores_mlp_embeddings))     
    print(event, ' Text Embeddings SGD based: ', np.mean(f_scores_sgd_embeddings))         
    print(event, ' Text Embeddings NN based: ', np.mean(f_scores_nn_embeddings))             

    print(event, ' Text + Node embedding based: ', np.mean(f_scores_node_embedding))      
    print(event, ' Text + Stance emdedding based: ', np.mean(f_scores_stance_embedding))      
    print(event, ' Text + Node + Stance based: ', np.mean(f_scores_node_stance_embedding))  
    
    event_fscores[event] = {'f_scores_random': f_scores_random, 
                            'f_scores_majority': f_scores_majority, 
                            'f_scores_vader': f_scores_vader,
                            'f_scores_textblob': f_scores_textblob,
                            'f_scores_sgd': f_scores_sgd, 
                            'f_scores_mlp':f_scores_mlp, 
                            'f_scores_mlp_embeddings': f_scores_mlp_embeddings,
                            'f_scores_sgd_embeddings': f_scores_sgd_embeddings, 
                            'f_scores_nn_embeddings': f_scores_nn_embeddings,
                            'f_scores_node_embedding': f_scores_node_embedding, 
                            'f_scores_stance_embedding': f_scores_stance_embedding, 
                            'f_scores_node_stance_embedding': f_scores_node_stance_embedding}
    
    
    
           

Iran_Deal
858 858 368 368
Random Classifier - fscore:  0.28537758537758534
Majority Classifier - fscore:  0.18467583497053044
Vader Sentiment Classifier - fscore:  0.30300389602373196
TextBlob Sentiment Classifier - fscore:  0.3227705761416175
SVM Text  Model - fscore:  0.5587529976019184




MLP Text  Model - fscore:  0.5587529976019184
Text Embedding + MLP Model - fscore:  0.43000247820391707
Text Embedding + MLP Model - fscore:  0.4594310725620232




Text Embeddings + User Graph Embeddings - fscore:  0.5229893983159647
Text Embeddings + User Graph Embeddings - fscore:  0.5133807533779192
Text Embeddings + User Opinion Embeddings - fscore:  0.5439201915154995
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5178831671229291
858 858 368 368
Random Classifier - fscore:  0.3614493194375233
Majority Classifier - fscore:  0.18562091503267975
Vader Sentiment Classifier - fscore:  0.30331821780752893
TextBlob Sentiment Classifier - fscore:  0.33346372247974077
SVM Text  Model - fscore:  0.0
MLP Text  Model - fscore:  0.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.5153146840233266
Text Embedding + MLP Model - fscore:  0.5094017094017094
Text Embeddings + User Graph Embeddings - fscore:  0.5290143054963984
Text Embeddings + User Graph Embeddings - fscore:  0.5178024638609984
Text Embeddings + User Opinion Embeddings - fscore:  0.5675211020966232
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5688847171753487
858 858 368 368
Random Classifier - fscore:  0.30565009277871313
Majority Classifier - fscore:  0.18562091503267975
Vader Sentiment Classifier - fscore:  0.2699950216912026
TextBlob Sentiment Classifier - fscore:  0.30259721624336344
SVM Text  Model - fscore:  0.1818181818181818
MLP Text  Model - fscore:  0.1818181818181818




Text Embedding + MLP Model - fscore:  0.4241447987349627
Text Embedding + MLP Model - fscore:  0.5043139249088906




Text Embeddings + User Graph Embeddings - fscore:  0.5218482868050187
Text Embeddings + User Graph Embeddings - fscore:  0.5046113306982872
Text Embeddings + User Opinion Embeddings - fscore:  0.5186931763567241
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5357793641606104
858 858 368 368
Random Classifier - fscore:  0.3402008307088411
Majority Classifier - fscore:  0.19396274887604367
Vader Sentiment Classifier - fscore:  0.29942698385136307
TextBlob Sentiment Classifier - fscore:  0.326049295220096
SVM Text  Model - fscore:  0.0




MLP Text  Model - fscore:  0.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4369291613025407
Text Embedding + MLP Model - fscore:  0.5597272264075409
Text Embeddings + User Graph Embeddings - fscore:  0.5332998308926677
Text Embeddings + User Graph Embeddings - fscore:  0.5543794593817694
Text Embeddings + User Opinion Embeddings - fscore:  0.5606430895072438
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5727187050582244
858 858 368 368
Random Classifier - fscore:  0.30364623069084035
Majority Classifier - fscore:  0.1893644617380026
Vader Sentiment Classifier - fscore:  0.2764294529000411
TextBlob Sentiment Classifier - fscore:  0.29730707906603676
SVM Text  Model - fscore:  0.5661881977671451


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLP Text  Model - fscore:  0.5661881977671451
Text Embedding + MLP Model - fscore:  0.3909667159016701
Text Embedding + MLP Model - fscore:  0.4905178484312674




Text Embeddings + User Graph Embeddings - fscore:  0.5165788614048217
Text Embeddings + User Graph Embeddings - fscore:  0.5188236238545748
Text Embeddings + User Opinion Embeddings - fscore:  0.5387119492163905
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5267378227128342
858 858 368 368
Random Classifier - fscore:  0.3862039341734773
Majority Classifier - fscore:  0.20202020202020202
Vader Sentiment Classifier - fscore:  0.28515274034141963
TextBlob Sentiment Classifier - fscore:  0.33524726460942017
SVM Text  Model - fscore:  0.3333333333333333




MLP Text  Model - fscore:  0.3333333333333333
Text Embedding + MLP Model - fscore:  0.4407751374043509
Text Embedding + MLP Model - fscore:  0.5638360543698971




Text Embeddings + User Graph Embeddings - fscore:  0.5343909054999242
Text Embeddings + User Graph Embeddings - fscore:  0.5572944417206664
Text Embeddings + User Opinion Embeddings - fscore:  0.5376171352074967
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5745435308803605
858 858 368 368
Random Classifier - fscore:  0.32576122848543293
Majority Classifier - fscore:  0.19213410702772404
Vader Sentiment Classifier - fscore:  0.2746871702095582
TextBlob Sentiment Classifier - fscore:  0.31596512073342553
SVM Text  Model - fscore:  0.3333333333333333
MLP Text  Model - fscore:  0.3333333333333333


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4298810250429614
Text Embedding + MLP Model - fscore:  0.4747521010571973
Text Embeddings + User Graph Embeddings - fscore:  0.5328263206134773
Text Embeddings + User Graph Embeddings - fscore:  0.5333143500517131
Text Embeddings + User Opinion Embeddings - fscore:  0.5401433125676348
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5344790517290786
858 858 368 368
Random Classifier - fscore:  0.30537594949776947
Majority Classifier - fscore:  0.18467583497053044
Vader Sentiment Classifier - fscore:  0.2893643306379155
TextBlob Sentiment Classifier - fscore:  0.2941056910569106
SVM Text  Model - fscore:  0.18277449046679817




MLP Text  Model - fscore:  0.18277449046679817
Text Embedding + MLP Model - fscore:  0.48847820383247403
Text Embedding + MLP Model - fscore:  0.5549687758611962




Text Embeddings + User Graph Embeddings - fscore:  0.5391435724769057
Text Embeddings + User Graph Embeddings - fscore:  0.5459226001127006
Text Embeddings + User Opinion Embeddings - fscore:  0.5253152676993386
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5522664426971567
858 858 368 368
Random Classifier - fscore:  0.3149260446078391
Majority Classifier - fscore:  0.19213410702772404
Vader Sentiment Classifier - fscore:  0.2705618239598822
TextBlob Sentiment Classifier - fscore:  0.31755508173418623
SVM Text  Model - fscore:  0.0




MLP Text  Model - fscore:  0.0
Text Embedding + MLP Model - fscore:  0.4144738906387652
Text Embedding + MLP Model - fscore:  0.5459735494763897




Text Embeddings + User Graph Embeddings - fscore:  0.5387386360749099
Text Embeddings + User Graph Embeddings - fscore:  0.501173564800113
Text Embeddings + User Opinion Embeddings - fscore:  0.5625524715374538
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5527862808992462
858 858 368 368
Random Classifier - fscore:  0.32222396596695496
Majority Classifier - fscore:  0.19396274887604367
Vader Sentiment Classifier - fscore:  0.29313952889026335
TextBlob Sentiment Classifier - fscore:  0.31578525566929594
SVM Text  Model - fscore:  0.19396274887604367


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLP Text  Model - fscore:  0.19396274887604367
Text Embedding + MLP Model - fscore:  0.5342172062434574
Text Embedding + MLP Model - fscore:  0.44094064706079167




Text Embeddings + User Graph Embeddings - fscore:  0.5227209033194621
Text Embeddings + User Graph Embeddings - fscore:  0.5273034907635669
Text Embeddings + User Opinion Embeddings - fscore:  0.54929464348069
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5882238191893988
Iran_Deal  Random based:  0.3250815181724977
Iran_Deal  Majority based:  0.19041718755721604
Iran_Deal  Vader Sentiment based:  0.2865079166312906
Iran_Deal  Textblob Sentiment based:  0.3160846302954093
Iran_Deal  SVM Text based:  0.23501632831967542
Iran_Deal  MLP Text based:  0.23501632831967542
Iran_Deal  Text Emdedding MLP based:  0.5103862909536904
Iran_Deal  Text Embeddings SGD based:  0.4505183301328426
Iran_Deal  Text Embeddings NN based:  0.5291551020899551
Iran_Deal  Text + Node embedding based:  0.5274006078622309
Iran_Deal  Text + Stance emdedding based:  0.5444412339185094
Iran_Deal  Text + Node + Stance based:  0.5524302901625189
Student_Marches
506 506 217 217
Random Classifier - fsco



MLP Text  Model - fscore:  0.20447284345047922
Text Embedding + MLP Model - fscore:  0.5105147336484498
Text Embedding + MLP Model - fscore:  0.5514458863052499




Text Embeddings + User Graph Embeddings - fscore:  0.5432098765432098
Text Embeddings + User Graph Embeddings - fscore:  0.5634904442877756
Text Embeddings + User Opinion Embeddings - fscore:  0.5795359109393549
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5426534250063662
506 506 217 217
Random Classifier - fscore:  0.3437460592595649
Majority Classifier - fscore:  0.2015005359056806
Vader Sentiment Classifier - fscore:  0.3145833333333333
TextBlob Sentiment Classifier - fscore:  0.2874863927495506
SVM Text  Model - fscore:  0.2015005359056806
MLP Text  Model - fscore:  0.2015005359056806




Text Embedding + MLP Model - fscore:  0.4888259958071279
Text Embedding + MLP Model - fscore:  0.5189785175700669




Text Embeddings + User Graph Embeddings - fscore:  0.5337421610962101
Text Embeddings + User Graph Embeddings - fscore:  0.526158601371583
Text Embeddings + User Opinion Embeddings - fscore:  0.5763870840766356
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5609410430839001
506 506 217 217
Random Classifier - fscore:  0.3403237176644149
Majority Classifier - fscore:  0.20447284345047922
Vader Sentiment Classifier - fscore:  0.31239211854746785
TextBlob Sentiment Classifier - fscore:  0.24256410256410257
SVM Text  Model - fscore:  0.19658119658119655
MLP Text  Model - fscore:  0.19658119658119655




Text Embedding + MLP Model - fscore:  0.4401780004958958
Text Embedding + MLP Model - fscore:  0.5072498090145149




Text Embeddings + User Graph Embeddings - fscore:  0.5456484391368113
Text Embeddings + User Graph Embeddings - fscore:  0.5307292493358959
Text Embeddings + User Opinion Embeddings - fscore:  0.5793055061368548
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5767606031841114
506 506 217 217
Random Classifier - fscore:  0.3012806637806638
Majority Classifier - fscore:  0.2015005359056806
Vader Sentiment Classifier - fscore:  0.30795485165114816
TextBlob Sentiment Classifier - fscore:  0.28231077377187436
SVM Text  Model - fscore:  0.2015005359056806
MLP Text  Model - fscore:  0.2015005359056806




Text Embedding + MLP Model - fscore:  0.5144586292188622
Text Embedding + MLP Model - fscore:  0.47001040181308423




Text Embeddings + User Graph Embeddings - fscore:  0.535951723684643
Text Embeddings + User Graph Embeddings - fscore:  0.545452006702306
Text Embeddings + User Opinion Embeddings - fscore:  0.57953396785364
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5450112570568736
506 506 217 217
Random Classifier - fscore:  0.3353194644430595
Majority Classifier - fscore:  0.19543973941368076
Vader Sentiment Classifier - fscore:  0.30673530673530675
TextBlob Sentiment Classifier - fscore:  0.3205058717253839
SVM Text  Model - fscore:  0.19543973941368076




MLP Text  Model - fscore:  0.19543973941368076
Text Embedding + MLP Model - fscore:  0.5929812867367006
Text Embedding + MLP Model - fscore:  0.5841304439600679




Text Embeddings + User Graph Embeddings - fscore:  0.657369875412817
Text Embeddings + User Graph Embeddings - fscore:  0.5098341817987466
Text Embeddings + User Opinion Embeddings - fscore:  0.5745870066670598
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.6154984049622918
506 506 217 217
Random Classifier - fscore:  0.3540962665883401
Majority Classifier - fscore:  0.1923497267759563
Vader Sentiment Classifier - fscore:  0.25555555555555554
TextBlob Sentiment Classifier - fscore:  0.27872340425531916
SVM Text  Model - fscore:  0.21311475409836067


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLP Text  Model - fscore:  0.21311475409836067


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4842472342472342
Text Embedding + MLP Model - fscore:  0.5089592044868301
Text Embeddings + User Graph Embeddings - fscore:  0.5795734431749757
Text Embeddings + User Graph Embeddings - fscore:  0.609739095507809
Text Embeddings + User Opinion Embeddings - fscore:  0.609955740692528
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.6213608957795004
506 506 217 217
Random Classifier - fscore:  0.2928646306551566
Majority Classifier - fscore:  0.18921892189218922
Vader Sentiment Classifier - fscore:  0.2891533219402072
TextBlob Sentiment Classifier - fscore:  0.3003669081991998
SVM Text  Model - fscore:  0.18921892189218922
MLP Text  Model - fscore:  0.18921892189218922




Text Embedding + MLP Model - fscore:  0.5311421262250096
Text Embedding + MLP Model - fscore:  0.47818014495966255




Text Embeddings + User Graph Embeddings - fscore:  0.49346615929844967
Text Embeddings + User Graph Embeddings - fscore:  0.4934714564776335
Text Embeddings + User Opinion Embeddings - fscore:  0.5437716476991589
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5204177818175697
506 506 217 217
Random Classifier - fscore:  0.2714828094583763
Majority Classifier - fscore:  0.2015005359056806
Vader Sentiment Classifier - fscore:  0.26411483253588514
TextBlob Sentiment Classifier - fscore:  0.2732920350858616
SVM Text  Model - fscore:  0.2015005359056806
MLP Text  Model - fscore:  0.2015005359056806




Text Embedding + MLP Model - fscore:  0.5232874383817779
Text Embedding + MLP Model - fscore:  0.4847116457589915




Text Embeddings + User Graph Embeddings - fscore:  0.5315990990990991
Text Embeddings + User Graph Embeddings - fscore:  0.5990300955641135
Text Embeddings + User Opinion Embeddings - fscore:  0.5913671295373508
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.6030191638950182
506 506 217 217
Random Classifier - fscore:  0.36288172572515065
Majority Classifier - fscore:  0.18921892189218922
Vader Sentiment Classifier - fscore:  0.2995960525642992
TextBlob Sentiment Classifier - fscore:  0.29129769906468933
SVM Text  Model - fscore:  0.18921892189218922




MLP Text  Model - fscore:  0.18921892189218922
Text Embedding + MLP Model - fscore:  0.5392576555159336
Text Embedding + MLP Model - fscore:  0.5188206327604629




Text Embeddings + User Graph Embeddings - fscore:  0.5205543108455729
Text Embeddings + User Graph Embeddings - fscore:  0.538693164709425
Text Embeddings + User Opinion Embeddings - fscore:  0.5958365386856891
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5825620222625935
506 506 217 217
Random Classifier - fscore:  0.2876500193573364
Majority Classifier - fscore:  0.21878224974200208
Vader Sentiment Classifier - fscore:  0.2879210145397196
TextBlob Sentiment Classifier - fscore:  0.2928966458378223
SVM Text  Model - fscore:  0.0




MLP Text  Model - fscore:  0.0
Text Embedding + MLP Model - fscore:  0.46478236684744817
Text Embedding + MLP Model - fscore:  0.5727843040749072




Text Embeddings + User Graph Embeddings - fscore:  0.5474776654362595
Text Embeddings + User Graph Embeddings - fscore:  0.5296290896130511
Text Embeddings + User Opinion Embeddings - fscore:  0.5901140652590529
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5710372769869337
Student_Marches  Random based:  0.31875756401564637
Student_Marches  Majority based:  0.19984568543340178
Student_Marches  Vader Sentiment based:  0.29433346056467424
Student_Marches  Textblob Sentiment based:  0.28663123618764247
Student_Marches  SVM Text based:  0.17925479850451373
Student_Marches  MLP Text based:  0.17925479850451373
Student_Marches  Text Emdedding MLP based:  0.5195270990703837
Student_Marches  Text Embeddings SGD based:  0.5089675467124438
Student_Marches  Text Embeddings NN based:  0.5488592753728049
Student_Marches  Text + Node embedding based:  0.5446227385368339
Student_Marches  Text + Stance emdedding based:  0.5820394597547325
Student_Marches  Text + Node + Stance based:

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4356234461656148
Text Embedding + MLP Model - fscore:  0.582889028563447
Text Embeddings + User Graph Embeddings - fscore:  0.5534241664767979
Text Embeddings + User Graph Embeddings - fscore:  0.5567286694777946
Text Embeddings + User Opinion Embeddings - fscore:  0.5795975667766853
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5754734806305464
858 858 369 369
Random Classifier - fscore:  0.37126299041290106
Majority Classifier - fscore:  0.2210144927536232
Vader Sentiment Classifier - fscore:  0.31054420085448936
TextBlob Sentiment Classifier - fscore:  0.28576526077450576
SVM Text  Model - fscore:  0.2210144927536232




MLP Text  Model - fscore:  0.2210144927536232
Text Embedding + MLP Model - fscore:  0.4649392318911036
Text Embedding + MLP Model - fscore:  0.4330795158697125




Text Embeddings + User Graph Embeddings - fscore:  0.5713580626228487
Text Embeddings + User Graph Embeddings - fscore:  0.5761833159856312
Text Embeddings + User Opinion Embeddings - fscore:  0.5682406875199578
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5725683859832866
858 858 369 369
Random Classifier - fscore:  0.3309027898721301
Majority Classifier - fscore:  0.2185792349726776
Vader Sentiment Classifier - fscore:  0.28698982017028496
TextBlob Sentiment Classifier - fscore:  0.3017696678431078
SVM Text  Model - fscore:  0.2185792349726776




MLP Text  Model - fscore:  0.2185792349726776


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4380954913543665
Text Embedding + MLP Model - fscore:  0.5233670349912495
Text Embeddings + User Graph Embeddings - fscore:  0.5942634284483024
Text Embeddings + User Graph Embeddings - fscore:  0.5982401941643506
Text Embeddings + User Opinion Embeddings - fscore:  0.6006242688366951
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.593125833484876
858 858 369 369
Random Classifier - fscore:  0.27817005919355114
Majority Classifier - fscore:  0.22020568663036902
Vader Sentiment Classifier - fscore:  0.2935758243776982
TextBlob Sentiment Classifier - fscore:  0.2738278106941701
SVM Text  Model - fscore:  0.22020568663036902




MLP Text  Model - fscore:  0.22020568663036902


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.4802256431762451
Text Embedding + MLP Model - fscore:  0.4435944832719701
Text Embeddings + User Graph Embeddings - fscore:  0.5345393841195875
Text Embeddings + User Graph Embeddings - fscore:  0.5781844123566584
Text Embeddings + User Opinion Embeddings - fscore:  0.5781230459723242
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5818769318234974
858 858 369 369
Random Classifier - fscore:  0.33751732001520596
Majority Classifier - fscore:  0.2119531731361676
Vader Sentiment Classifier - fscore:  0.30190582916469305
TextBlob Sentiment Classifier - fscore:  0.3200737660139511
SVM Text  Model - fscore:  0.2119531731361676




MLP Text  Model - fscore:  0.2119531731361676


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.42792798278731786
Text Embedding + MLP Model - fscore:  0.6094424094424095
Text Embeddings + User Graph Embeddings - fscore:  0.5586668067986343
Text Embeddings + User Graph Embeddings - fscore:  0.5573736339678789
Text Embeddings + User Opinion Embeddings - fscore:  0.5917901823338533
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.567437550908361
858 858 369 369
Random Classifier - fscore:  0.336754978631975
Majority Classifier - fscore:  0.2144607843137255
Vader Sentiment Classifier - fscore:  0.2850179211469534
TextBlob Sentiment Classifier - fscore:  0.3144226243956702
SVM Text  Model - fscore:  0.2144607843137255




MLP Text  Model - fscore:  0.2144607843137255
Text Embedding + MLP Model - fscore:  0.44129363709292635
Text Embedding + MLP Model - fscore:  0.5763212831957069




Text Embeddings + User Graph Embeddings - fscore:  0.5691430008387145
Text Embeddings + User Graph Embeddings - fscore:  0.5546544581831722
Text Embeddings + User Opinion Embeddings - fscore:  0.5793937320263601
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5815339151241763
858 858 369 369
Random Classifier - fscore:  0.31912779832338556
Majority Classifier - fscore:  0.21529051987767586
Vader Sentiment Classifier - fscore:  0.28284063253324726
TextBlob Sentiment Classifier - fscore:  0.3016608187134503
SVM Text  Model - fscore:  0.21529051987767586
MLP Text  Model - fscore:  0.21529051987767586


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.47578294694998036
Text Embedding + MLP Model - fscore:  0.5238239539764926
Text Embeddings + User Graph Embeddings - fscore:  0.5601598573287965
Text Embeddings + User Graph Embeddings - fscore:  0.5738140816050578
Text Embeddings + User Opinion Embeddings - fscore:  0.595144421233329
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.6121542961136774
858 858 369 369
Random Classifier - fscore:  0.26975918606353394
Majority Classifier - fscore:  0.21529051987767586
Vader Sentiment Classifier - fscore:  0.2715284359229828
TextBlob Sentiment Classifier - fscore:  0.29978027555715836
SVM Text  Model - fscore:  0.21529051987767586




MLP Text  Model - fscore:  0.21529051987767586


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.48849908261672964
Text Embedding + MLP Model - fscore:  0.4771844324391871
Text Embeddings + User Graph Embeddings - fscore:  0.5443271108148008
Text Embeddings + User Graph Embeddings - fscore:  0.508914492450267
Text Embeddings + User Opinion Embeddings - fscore:  0.539077576261855
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5558461100592741
858 858 369 369
Random Classifier - fscore:  0.34059469700006145
Majority Classifier - fscore:  0.20941759603469642
Vader Sentiment Classifier - fscore:  0.26785570820277976
TextBlob Sentiment Classifier - fscore:  0.3044563279857398
SVM Text  Model - fscore:  0.20941759603469642


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLP Text  Model - fscore:  0.20941759603469642


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.46110569743482416
Text Embedding + MLP Model - fscore:  0.5715670293095136
Text Embeddings + User Graph Embeddings - fscore:  0.5534504896207024
Text Embeddings + User Graph Embeddings - fscore:  0.5789905830262525
Text Embeddings + User Opinion Embeddings - fscore:  0.5743107210338197
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5578512508892255
858 858 369 369
Random Classifier - fscore:  0.37723331422314027
Majority Classifier - fscore:  0.2169408897014016
Vader Sentiment Classifier - fscore:  0.2966427966427967
TextBlob Sentiment Classifier - fscore:  0.27647133434804666
SVM Text  Model - fscore:  0.2169408897014016
MLP Text  Model - fscore:  0.2169408897014016


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.5295872397418726
Text Embedding + MLP Model - fscore:  0.5666489334751988
Text Embeddings + User Graph Embeddings - fscore:  0.574928132150654
Text Embeddings + User Graph Embeddings - fscore:  0.5593958967649503
Text Embeddings + User Opinion Embeddings - fscore:  0.5826290466883489
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.5815600878476767
Santa_Fe_Shooting  Random based:  0.32767418273098187
Santa_Fe_Shooting  Majority based:  0.2162546836691952
Santa_Fe_Shooting  Vader Sentiment based:  0.28635894136484064
Santa_Fe_Shooting  Textblob Sentiment based:  0.29615012703783716
Santa_Fe_Shooting  SVM Text based:  0.2162546836691952
Santa_Fe_Shooting  MLP Text based:  0.2162546836691952
Santa_Fe_Shooting  Text Emdedding MLP based:  0.5307918104534888
Santa_Fe_Shooting  Text Embeddings SGD based:  0.4643080399210981
Santa_Fe_Shooting  Text Embeddings NN based:  0.5614260439219838
Santa_Fe_Shooting  Text + Node embedding based:  0

In [15]:
import pickle 


with open('./data/event_fscores_3_class.pkl', 'wb') as f:
    pickle.dump(event_fscores, f)
    

    
    




In [20]:
## Put results in a format for the paper

for result_type in ['f_scores_random', 
                    'f_scores_majority', 
                    'f_scores_vader',
                    'f_scores_textblob',
                    'f_scores_sgd', 
                    'f_scores_mlp', 
                    'f_scores_mlp_embeddings',
                    'f_scores_sgd_embeddings', 
                    'f_scores_nn_embeddings',
                    'f_scores_node_embedding', 
                    'f_scores_stance_embedding', 
                    'f_scores_node_stance_embedding']:


    print("{:.2f}&{:.2f}&{:.2f}&{:.2f}".format(np.mean(event_fscores['Student_Marches'][result_type]),
                                               np.mean(event_fscores['Santa_Fe_Shooting'][result_type]), 
                                               np.mean(event_fscores['Iran_Deal'][result_type]),
                                        np.mean([np.mean(event_fscores['Student_Marches'][result_type]),
                                               np.mean(event_fscores['Santa_Fe_Shooting'][result_type]), 
                                               np.mean(event_fscores['Iran_Deal'][result_type])])
                                              ))
    
    


0.32&0.33&0.33&0.32
0.20&0.22&0.19&0.20
0.29&0.29&0.29&0.29
0.29&0.30&0.32&0.30
0.18&0.22&0.24&0.21
0.18&0.22&0.24&0.21
0.52&0.53&0.51&0.52
0.51&0.46&0.45&0.47
0.55&0.56&0.53&0.55
0.54&0.56&0.53&0.55
0.58&0.58&0.54&0.57
0.57&0.58&0.55&0.57
