In [1]:
import pandas as pd
import numpy as np
import json

from numpy import random as rd
from collections import Counter

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.feature_selection import mutual_info_classif
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import f1_score

   
def get_accuracy_estimate(truths, predicted):

    print(classification_report(truths, predicted))
    print(' TEXT Classifier Accuracy: ', accuracy_score(truths, predicted))

    print('f1_score micro: ', f1_score(truths, predicted, average='micro'))
    print('f1_score macro: ', f1_score(truths, predicted, average='macro'))    
    print('f1_score wieghted: ', f1_score(truths, predicted, average='weighted'))        

    return f1_score(truths, predicted, average='micro'), f1_score(truths, predicted, average='macro'), f1_score(truths, predicted, average='weighted')


    

## Load dataset

In [2]:
df = pd.read_csv('./data/Contentious_pairs_all.csv', encoding="utf-8", 
                 dtype={"response_id":str, "target_id":str,
                       "response_user":str, "target_user":str} ).fillna('')


df['present_in_train'] = None
df['np_response_text_ada_embedding'] = df['response_text_ada_embedding'].apply(eval).apply(np.array)
df['np_target_text_ada_embedding'] = df['target_text_ada_embedding'].apply(eval).apply(np.array)

df['np_target_user_embedding'] = df['target_user_embedding_8'].apply(eval).apply(np.array)
df['np_response_user_embedding'] = df['response_user_embedding_8'].apply(eval).apply(np.array)


df

Unnamed: 0,event,response_id,target_id,interaction_type,label,label_expanded,Confidence_Level,response_text,target_text,truncated,...,response_user,target_user_embedding,response_user_embedding,target_user_embedding_8,response_user_embedding_8,present_in_train,np_response_text_ada_embedding,np_target_text_ada_embedding,np_target_user_embedding,np_response_user_embedding
0,Santa_Fe_Shooting,997626508050157568,997598447376175104,Quote,Support,Implicit_Support,1.0,"Seriously, wtf is wrong with our political sys...",More children have been killed in schools this...,False,...,18655355,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.025092221796512604, -0.013276644051074982,...","[-0.008838048204779625, -0.003347944701090455,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
1,Santa_Fe_Shooting,997575042027458561,997573240380968961,Quote,Denial,Explicit_Denial,1.0,Ma calls BS! https://t.co/bodEWN5Q4C,Former GOP Rep. Jason Chaffetz: 'Politically c...,False,...,3176702526,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...",,"[-0.033440690487623215, 0.007639116141945124, ...","[-0.027197668328881264, 0.0034390492364764214,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1..."
2,Santa_Fe_Shooting,997540582846271494,997535659870117888,Quote,Denial,Explicit_Denial,1.0,"On average, there’s one fake stat about school...","On average, that’s one school shooting every w...",False,...,1646856415,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.023548724129796028, 0.022269316017627716, ...","[-0.0044773295521736145, -0.014855715446174145...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
3,General_Terms,1018569947817992192,1017909301040635904,Quote,Denial,Implicit_Denial,1.0,ONE MIGHT BE MADE UP AND NOT REAL. NOT SURE. S...,I’m so confused... - When we were attacked on ...,False,...,766475610059317248,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.004989785607904196, -0.004917326848953962, ...","[-0.013004736974835396, -0.01780531369149685, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
4,General_Terms,1019395289575239680,1017919759474622464,Quote,Denial,Explicit_Denial,1.0,"False, adult friendships are M-F, 9-5 https://...",Adult friendships https://t.co/Cn3r9l4pZJ,False,...,59621769,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.023379407823085785, 0.010180916637182236, -...","[0.01642121560871601, 0.0012021968141198158, 0...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5045,Santa_Fe_Shooting,997648170774654976,997645387669299200,Reply,Comment,Comment,2.0,@cenkuygur @realDonaldTrump @NRA NRA is funded...,"Hey #MAGA guys, I thought @realDonaldTrump was...",False,...,2948973487,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.03429972380399704, 0.0034139070194214582, ...","[-0.039812665432691574, 0.005698402877897024, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5046,Santa_Fe_Shooting,998085810636259328,997484633695469570,Reply,Comment,Comment,2.0,@ScottMcGrew There is no right answer to this ...,Guns and prayers: A man shows up to the Santa ...,False,...,103706087,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.023522792384028435, 0.003550187451764941, ...","[-0.017941860482096672, 0.003447071649134159, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5047,Santa_Fe_Shooting,998318329474400256,998316225435504640,Reply,Comment,Comment,2.0,"@cameron_kasky @NRA Sadly, there’s also so muc...",It makes me so furious how the @NRA behaves as...,False,...,412350904,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[0.004335987847298384, -0.008479560725390911, ...","[-0.030558191239833832, -0.010756535455584526,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."
5048,Santa_Fe_Shooting,999387940890578945,999321499470368768,Quote,Comment,Comment,2.0,What is freaking wrong w/the prior administrat...,BREAKING: Emails reveal cynical exchange betwe...,False,...,975247231925223424,"[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",,"[-0.005231035407632589, -0.024064065888524055,...","[-0.030978472903370857, -0.01256611943244934, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21..."


## Utility functions

In [3]:
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import collections
from sklearn.model_selection import train_test_split
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import f1_score
from torch import nn
import torch as T
import torch.nn.functional as F
from torch.optim import Adam
from random import sample


device = T.device('cpu')

# Set seed for reproducibility
torch.manual_seed(1234)

# X, y = get_data(...)
# y_pred = model.predict(X)
# f1_score(y, y_pred)

LEARNING_RATE = 1e-2
NUM_EPOCHS = 50
BATCH_SIZE = 32



def accuracy_fn(y_pred, y_true):
    n_correct = torch.eq(y_pred, y_true).sum().item()
    accuracy = (n_correct / len(y_pred)) * 100
    return accuracy



class LinearModel(nn.Module):
    def __init__(self, input_dim, hidden_dim,hidden_dim2, num_classes):
        super(LinearModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc12 = nn.Linear(hidden_dim, hidden_dim2)        
        self.fc13 = nn.Linear( hidden_dim2, hidden_dim2)                
        self.fc2 = nn.Linear(hidden_dim2, num_classes)

    def forward(self, x_in):
        z = F.relu(self.fc1(x_in)) # linear activation
        z1 = F.relu(self.fc12(z))        
        z12 = F.relu(self.fc13(z1))          
        z2 = self.fc2(z12)        
        return [z1, z2]
    

class ContrastiveLoss(T.nn.Module):
  def __init__(self, m=2.0):
    super(ContrastiveLoss, self).__init__()  # pre 3.3 syntax
    self.m = m  # margin or radius

  def forward(self, y1, y2, d=0):
    # d = 0 means y1 and y2 are supposed to be same
    # d = 1 means y1 and y2 are supposed to be different
    
    euc_dist = T.nn.functional.pairwise_distance(y1, y2)

    if d == 0:
      return T.mean(T.pow(euc_dist, 2))  # distance squared
    else:  # d == 1
      delta = self.m - euc_dist  # sort of reverse distance
      delta = T.clamp(delta, min=0.0, max=None)
      return T.mean(T.pow(delta, 2))  # mean over all rows
    
# -----------------------------------------------------------


# loss_func = ContrastiveLoss()


def contrastive_loss_fn(X_train, y_train, N = 200):
    
    total_loss = 0
    for i in sample(range(0, len(y_train)), N):
        y_train_val = y_train[i]
        x_train = X_train[i]

        for j  in sample(range(0, len(y_train)), N):    
            y_train1_val = y_train[j]        
            x_train1 = X_train[j]    

            if i != j:
#                 print(y_train_val, y_train1_val)
                if y_train_val == y_train1_val:
                    loss = loss_func(x_train, x_train1, 0)
                    total_loss += loss
#                     print('loss1: ', loss)
                else:
                    loss = loss_func(x_train, x_train1, 1)
                    total_loss += loss                    
#                     print('loss2:', loss)

    return total_loss/len(y_train)
    

def majority_classifier(X_train, y_train, X_test, y_test):
    
    majority_clss = Counter(y_test).most_common(1)[0][0] #np.random.choice([2,3], len(truths))
    y_pred = [majority_clss]*len(y_test)
        
    return y_pred, y_test
    
            
            
def random_classifier(X_train, y_train, X_test, y_test):
    
    y_pred = np.random.choice(list(set(y_test)), len(y_test))
        
    return y_pred, y_test
    
    
    
def NN_train(X_train, y_train, X_test, y_test, contrastive_loss = False):
    INPUT_DIM = np.asarray(X_train).shape[1] # X is 2-dimensional
    HIDDEN_DIM = 100
    HIDDEN_DIM2 = 20
    NUM_CLASSES = len(set(y_train)) # 3 classes


    # Output vectorizer
    label_encoder = LabelEncoder()

    # Fit on train data
    label_encoder = label_encoder.fit(y_train)
    classes = list(label_encoder.classes_)
#         print (f"classes: {classes}")


    # Convert labels to tokens
#         print (f"y_train[0]: {y_train[0]}")
    y_train = label_encoder.transform(y_train)
    # y_val = label_encoder.transform(y_val)
    y_test = label_encoder.transform(y_test)
#         print (f"y_train[0]: {y_train[0]}")


    # Class weights
    counts = np.bincount(y_train)
    class_weights = {i: 1.0/count for i, count in enumerate(counts)}
#         print (f"counts: {counts}\nweights: {class_weights}")


    # Define Loss
    class_weights_tensor = torch.Tensor(list(class_weights.values()))
    loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

    # Initialize model
    model = LinearModel(input_dim=INPUT_DIM, hidden_dim=HIDDEN_DIM, hidden_dim2 = HIDDEN_DIM2, num_classes=NUM_CLASSES)
#     print (model.named_parameters)

    # Optimizer
    optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

#     # Convert data to tensors
    X_train = torch.Tensor(X_train)
    y_train = torch.LongTensor(y_train)

    X_val = torch.Tensor(X_test)
    y_val = torch.LongTensor(y_test)
    
    fscores = []

    # Training
    for epoch in range(NUM_EPOCHS):
        # Forward pass
        z1, z2 = model(X_train)

        y_pred = z2


        loss_supervised = loss_fn(y_pred, y_train)
    
    
        if contrastive_loss:
            loss_contrastive = contrastive_loss_fn(z1, y_train, N = 50)
            
            loss = loss_supervised + 0.2*loss_contrastive
        else:
            loss = loss_supervised  #+ 0.2*loss_contrastive
            
            
        # Zero all gradients
        optimizer.zero_grad()

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

        best_f = 0
        best_f_y_pred = []
        best_f_y_test = []
        if epoch%1==0:
            predictions = y_pred.max(dim=1)[1] # class
            accuracy = accuracy_fn(y_pred=predictions, y_true=y_train)
            fscore = f1_score(predictions, y_train , average='macro')
#             print (f"Training -- Epoch: {epoch} | loss: {loss:.2f}, accuracy: {accuracy:.1f}, f1score: {fscore:.1f}")

            z1, z2 = model(X_val)
            y_pred = F.softmax(z2, dim=1)
            _, y_pred = y_pred.max(dim=1)
            
#             accuracy = accuracy_fn(y_pred= y_pred, y_true=y_test)
#             fscore = f1_score(y_pred,y_test )

            accuracy = accuracy_fn(y_pred=y_pred, y_true=y_val)
            fscore = f1_score(y_pred, y_val , average='macro')
            
        if fscore > best_f:
            best_f  = fscore
            best_f_y_pred = y_pred
            best_f_y_test = y_test
            
#             print ( f" Validation -- Epoch: {epoch} | loss: {loss:.2f}, accuracy: {accuracy:.2f}, f1score: {fscore:.2f}")        
            
            fscores.append(fscore)    
            
    
    return best_f_y_pred, best_f_y_test


def get_text(tweet):
    tweetText = re.sub(r"(?:\@|https?\://)\S+", "", tweet) # r'(?:@[\w_]+)'
    tweetText = tweetText.strip().replace('rt', '').replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
    
    return tweetText
 


def get_statce_stats(emb, label):
    y_counts = {}
    y_Xs = {}
    y_indices = {}
    for i, (emb, label) in enumerate(zip(emb, label)):
        if label in y_counts:
            y_counts[label] = y_counts[label]  + 1
        else:
            y_counts[label] = 1    

        if label in y_Xs:
            y_Xs[label].append(emb)
        else:
            y_Xs[label] = [emb]  
            
        if label in y_indices:
            y_indices[label].append(i)
        else:
            y_indices[label] = [i]  

    print('lables : ', y_counts)
    
    return y_counts, y_Xs, y_indices

def balance_stance_classes(emb_vec, labels):
    y_counts, y_Xs, y_indices = get_statce_stats(emb_vec, labels)  
    
        
    majority_label = 0
    minority_label = 1
    majority_count = 0
    for key, val in y_counts.items():
        if val > majority_count:
            
            minority_label = majority_label
            majority_label = key
            majority_count= val
        else:
            minority_label = key
        
    new_emb_vec = []    
    new_labels = []
    for label in y_counts.keys():
        if label != majority_label:
            minority_label = label
            
            if y_counts[majority_label] > y_counts[minority_label]:
                additional_samples_count = y_counts[majority_label] - y_counts[minority_label]

                indices = rd.choice(list(y_indices[minority_label]), size=additional_samples_count, replace=True)
                for index in indices:
                    new_emb_vec.append(emb_vec[index])
                    new_labels.append(labels[index])
            

    y_counts, y_Xs, y_indices  = get_statce_stats(new_emb_vec + emb_vec, new_labels + labels)        
    

    return  new_emb_vec + emb_vec, new_labels + labels


## Define Sentiment Classifiers

In [4]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from textblob import TextBlob


def preprocess_text(text):

    # Tokenize the text

    tokens = word_tokenize(text.lower())

    # Remove stop words

    filtered_tokens = [token for token in tokens if token not in stopwords.words('english')]

    # Lemmatize the tokens

    lemmatizer = WordNetLemmatizer()

    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]

    # Join the tokens back into a string
    processed_text = ' '.join(lemmatized_tokens)

    return processed_text

# create get_sentiment function
analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):

    scores = analyzer.polarity_scores(text)

    sentiment = 1 if scores['pos'] > scores['neg'] else -1
#     sentiment = -1 if scores['neg'] > 0 else 0
    return sentiment


def textblob_sentiment_analysis(text):
    
    score = TextBlob(text).sentiment.polarity
    if score < 0:
        return -1
    elif score >= 0:
        return 1
    
    return score


## Define Classifiers

In [5]:
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neural_network import MLPClassifier

classifier = ('svm12', Pipeline([('vect', CountVectorizer()),
                                             ('tfidf', TfidfTransformer()),
                                             ('clf-svm', SGDClassifier(loss='hinge', penalty='l2',
                                                                       alpha=1e-3, max_iter=15, random_state=42)),
                                            ])
                                        , {'vect__ngram_range': [(1, 1), (1, 2)], 
                                          'tfidf__use_idf': (True, False),
                                          'clf-svm__alpha': (1e-2, 1e-3),
                                         })

clf_name, sgd_text_clf, __  = classifier

classifier = ('svm12', Pipeline([('vect', CountVectorizer()),
                                             ('tfidf', TfidfTransformer()),
                                             ('clf-svm', MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)),
                                            ])
                                        , {'vect__ngram_range': [(1, 1), (1, 2)], 
                                          'tfidf__use_idf': (True, False),
                                          'clf-svm__alpha': (1e-2, 1e-3),
                                         })

clf_name, mlp_text_clf, __  = classifier

mlp_clf = MLPClassifier(solver='lbfgs', alpha=1e-5,hidden_layer_sizes=(5, 2), random_state=1)
sgd_clf = SGDClassifier(loss='hinge', penalty='l2',alpha=1e-3, max_iter=15, random_state=42)



## Filter data

In [9]:

df['target_text_cleaned'] = df['target_text'].apply(preprocess_text)
df['response_text_cleaned'] = df['response_text'].apply(preprocess_text)


df['target_text_sentiment_vader'] = df['target_text_cleaned'].apply(get_sentiment)
df['response_text_sentiment_vader'] = df['response_text_cleaned'].apply(get_sentiment)


df['target_text_sentiment_textblob'] = df['target_text_cleaned'].apply(textblob_sentiment_analysis)
df['response_text_sentiment_textbolb'] = df['response_text_cleaned'].apply(textblob_sentiment_analysis)
df_filtered = df[df['label'] != 'Comment']


In [10]:
## Visualize data

print(df_filtered.columns)

df_filtered

Index(['event', 'response_id', 'target_id', 'interaction_type', 'label',
       'label_expanded', 'Confidence_Level', 'response_text', 'target_text',
       'truncated', 'response_text_ada_embedding', 'target_text_ada_embedding',
       'target_user', 'response_user', 'target_user_embedding',
       'response_user_embedding', 'target_user_embedding_8',
       'response_user_embedding_8', 'present_in_train',
       'np_response_text_ada_embedding', 'np_target_text_ada_embedding',
       'np_target_user_embedding', 'np_response_user_embedding',
       'target_text_cleaned', 'response_text_cleaned',
       'target_text_sentiment_vader', 'response_text_sentiment_vader',
       'target_text_sentiment_textblob', 'response_text_sentiment_textbolb'],
      dtype='object')


Unnamed: 0,event,response_id,target_id,interaction_type,label,label_expanded,Confidence_Level,response_text,target_text,truncated,...,np_response_text_ada_embedding,np_target_text_ada_embedding,np_target_user_embedding,np_response_user_embedding,target_text_cleaned,response_text_cleaned,target_text_sentiment_vader,response_text_sentiment_vader,target_text_sentiment_textblob,response_text_sentiment_textbolb
0,Santa_Fe_Shooting,997626508050157568,997598447376175104,Quote,Support,Implicit_Support,1.0,"Seriously, wtf is wrong with our political sys...",More children have been killed in schools this...,False,...,"[-0.025092221796512604, -0.013276644051074982,...","[-0.008838048204779625, -0.003347944701090455,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",child killed school year serving military . le...,"seriously , wtf wrong political system ! polit...",-1,-1,-1,-1
1,Santa_Fe_Shooting,997575042027458561,997573240380968961,Quote,Denial,Explicit_Denial,1.0,Ma calls BS! https://t.co/bodEWN5Q4C,Former GOP Rep. Jason Chaffetz: 'Politically c...,False,...,"[-0.033440690487623215, 0.007639116141945124, ...","[-0.027197668328881264, 0.0034390492364764214,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.2439490556716919, -0.44223514199256897, 0.1...",former gop rep. jason chaffetz : 'politically ...,call b ! http : //t.co/bodewn5q4c,-1,-1,1,1
2,Santa_Fe_Shooting,997540582846271494,997535659870117888,Quote,Denial,Explicit_Denial,1.0,"On average, there’s one fake stat about school...","On average, that’s one school shooting every w...",False,...,"[-0.023548724129796028, 0.022269316017627716, ...","[-0.0044773295521736145, -0.014855715446174145...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","average , ’ one school shooting every week yea...","average , ’ one fake stat school shooting idio...",-1,-1,1,-1
3,General_Terms,1018569947817992192,1017909301040635904,Quote,Denial,Implicit_Denial,1.0,ONE MIGHT BE MADE UP AND NOT REAL. NOT SURE. S...,I’m so confused... - When we were attacked on ...,False,...,"[0.004989785607904196, -0.004917326848953962, ...","[-0.013004736974835396, -0.01780531369149685, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","’ confused ... - attacked 9/11 , american came...",one might made real . sure . still researching...,-1,1,-1,1
4,General_Terms,1019395289575239680,1017919759474622464,Quote,Denial,Explicit_Denial,1.0,"False, adult friendships are M-F, 9-5 https://...",Adult friendships https://t.co/Cn3r9l4pZJ,False,...,"[0.023379407823085785, 0.010180916637182236, -...","[0.01642121560871601, 0.0012021968141198158, 0...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",adult friendship http : //t.co/cn3r9l4pzj,"false , adult friendship m-f , 9-5 http : //t....",1,1,1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3697,Santa_Fe_Shooting,998727686062297088,998641852202012672,Quote,Support,Implicit_Support,2.0,"Obviously, this is fake news because France ha...",#LeftistTerrorism #LeftistSedition #WhineAndCh...,False,...,"[-0.012956563383340836, 0.0036507989279925823,...","[-0.029683180153369904, -0.013163618743419647,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.08675669133663177, -0.5026119947433472, 0.2...",# leftistterrorism # leftistsedition # whinean...,"obviously , fake news france wall-to-wall gun ...",-1,-1,1,-1
3698,Santa_Fe_Shooting,999033162209443841,999032790304854021,Reply,Support,Implicit_Support,2.0,@Goss30Goss There is no such thing as a Consti...,Our founding fathers did not create a country ...,False,...,"[-0.023475689813494682, 0.022063223645091057, ...","[0.0012622424401342869, 0.0014467427972704172,...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",founding father create country wannabe authori...,@ goss30goss thing constitutional conservative...,-1,-1,-1,-1
3699,Santa_Fe_Shooting,999044683509714944,999009592867778560,Reply,Denial,Explicit_Denial,2.0,@teamtrace the trace is a @MikeBloomberg shell...,Police confronted the Santa Fe gunman four min...,False,...,"[-0.0470198318362236, -0.019923418760299683, -...","[-0.01337257120758295, 0.013017261400818825, -...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...",police confronted santa fe gunman four minute ...,@ teamtrace trace @ mikebloomberg shell compan...,-1,-1,1,1
3700,Santa_Fe_Shooting,999366932024393728,998952797637890048,Quote,Denial,Implicit_Denial,2.0,This must be a tough position for the parents ...,"“My son, to me, is not a criminal, he’s a vict...",False,...,"[-0.005462398752570152, -0.00494138989597559, ...","[-0.018083559349179268, 0.016592662781476974, ...","[0.16930730640888214, -0.4844645857810974, 0.2...","[0.1620851755142212, -0.4702264964580536, 0.21...","“ son , , criminal , ’ victim. ” father santa ...","must tough position parent however , way justi...",-1,-1,-1,-1


## Event wise evaluation of algorithms

In [11]:

train_embedding_label = []
test_embedding_label = []
train_count = 0
test_count = 0


stance_embedding_size = 8
event_fscores = {}

for event in { 'Iran_Deal', 'Santa_Fe_Shooting', 'Student_Marches'}:        
    
    stance_df = pd.read_csv('./data/' + event + '_userstances.csv', encoding="utf-8", 
                 dtype={"response_user":str, "target_user":str} ).fillna('')

        
    df_event = df_filtered[df_filtered['event'] == event] #  & 
    
    df_event = df_event.merge(stance_df, how='left', on=['target_user', 'response_user'])
    

    
    f_scores_random = []    
    f_scores_majority = [] 
    f_scores_vader = []
    f_scores_textblob = []
    f_scores_mlp = []
    f_scores_sgd = []


    f_scores_mlp_embeddings = []
    f_scores_sgd_embeddings = []
    f_scores_nn_embeddings = []
    
    f_scores_node_embedding = []
    f_scores_stance_embedding = []
    f_scores_node_stance_embedding = []

    
    print(event) 
    
    for iteration in range(0, 10):
    
        df_event_train, df_event_test = train_test_split(df_event, test_size=0.3)

#         print(event, len(df_event_train), len(df_event_test))

        X_train = []
        X_test = []    

        X_train_node_embedding = []
        X_test_node_embedding = []    

        X_train_stance_embedding = []
        X_test_stance_embedding = []    


        X_train_node_stance_embedding = []
        X_test_node_stance_embedding = []    


        y_train = []    
        y_test = []

        vader_sentiment_test = []
        blob_sentiment_test = []        
        y_test_sentiment = []
        y_train_sentiment = []
        

        train_replies = []
        test_replies = []        
#         print(len(df_event_train))
        
        
        for idx, train_row in df_event_train.iterrows():
                        
            target_node = train_row['target_user']
            response_node = train_row['response_user']
            response_embedding = train_row['np_response_text_ada_embedding']
            target_embedding = train_row['np_target_text_ada_embedding']
            target_user_embeddings = train_row['np_target_user_embedding']
            response_user_embeddings = train_row['np_response_user_embedding']
            target_user_stance = train_row['target_user_stance']
            response_user_stance = train_row['response_user_stance']
            label = train_row['label']   
            train_replies.append(train_row['response_text_cleaned'])

            embedding = np.concatenate([response_embedding , target_embedding ], axis = 0)                
            X_train.append(embedding)

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings ], axis = 0) 
            X_train_node_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_train_stance_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_train_node_stance_embedding.append(embedding)            

            y_train.append(label)
            
            if label == 'Support':
                y_train_sentiment.append(1)

            elif label == 'Denial':
                y_train_sentiment.append(-1)
            else:
                y_train_sentiment.append(0)
            

            train_count += 1
            

        for idx, test_row in df_event_test.iterrows():

            target_node = test_row['target_user']
            response_node = test_row['response_user']
            response_embedding = test_row['np_response_text_ada_embedding']
            target_embedding = test_row['np_target_text_ada_embedding']
            target_user_embeddings = test_row['np_target_user_embedding']
            response_user_embeddings = test_row['np_response_user_embedding']
            target_user_stance= test_row['target_user_stance']
            response_user_stance = test_row['response_user_stance']
            label = test_row['label']
            test_replies.append(test_row['response_text_cleaned'])            


            embedding = np.concatenate([response_embedding , target_embedding ], axis = 0)                
            X_test.append(embedding)

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings ], axis = 0) 
            X_test_node_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_test_stance_embedding.append(embedding)            

            embedding = np.concatenate([response_embedding , target_embedding, target_user_embeddings, response_user_embeddings, np.asarray(stance_embedding_size*[target_user_stance]), np.asarray(stance_embedding_size*[response_user_stance]) ], axis = 0) 
            X_test_node_stance_embedding.append(embedding)  

            y_test.append(label)
            
            if label == 'Support':
                y_test_sentiment.append(1)

            elif label == 'Denial':
                y_test_sentiment.append(-1)
            else:
                y_test_sentiment.append(0)

            
            vader_sentiment_test.append( test_row['response_text_sentiment_vader'])
            blob_sentiment_test.append( test_row['response_text_sentiment_textbolb'])
            

            test_count += 1    


        print(len(X_train), len(y_train), len(X_test), len(y_test))
        
        ## 1. Random classifier
        
        preds, y_test_out = random_classifier(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Random Classifier - fscore: ', fscore)    
        f_scores_random.append(fscore)
        
        ## 2. Majority classifier      
        
        preds, y_test_out = majority_classifier(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Majority Classifier - fscore: ', fscore)    
        f_scores_majority.append(fscore)
        
        ## 3. Vader classifier                
        
        fscore = f1_score(vader_sentiment_test, y_test_sentiment , average='macro')
        print('Vader Sentiment Classifier - fscore: ', fscore)    
        f_scores_vader.append(fscore)

        
        
        ## 4. TextBlob classifier
        
        fscore = f1_score(blob_sentiment_test, y_test_sentiment , average='macro')
        print('TextBlob Sentiment Classifier - fscore: ', fscore)    
        f_scores_textblob.append(fscore)        
        
        
        
        ## 5. Baseline Supervised Classifiers
        
        sgd_text_clf.fit(train_replies,  y_train)

        preds = sgd_text_clf.predict(y_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('SVM Text  Model - fscore: ', fscore)
        f_scores_sgd.append(fscore)
        
        
        mlp_text_clf.fit(train_replies,  y_train)

        preds = sgd_text_clf.predict(y_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('MLP Text  Model - fscore: ', fscore)
        f_scores_mlp.append(fscore)
        
                
        
        ## 6. Open AI GPT - Text Embedding Models

    
        mlp_clf.fit(X_train, y_train)
        preds = mlp_clf.predict(X_test)
        fscore = f1_score(preds, y_test , average='macro')
        print('Text Embedding + MLP Model - fscore: ', fscore)
        f_scores_sgd_embeddings.append(fscore)
        
        
        sgd_clf.fit(X_train, y_train)
        preds = sgd_clf.predict(X_test)        
        fscore = f1_score(preds, y_test , average='macro')
        print('Text Embedding + MLP Model - fscore: ', fscore)
        f_scores_mlp_embeddings.append(fscore)        
    
    
        preds, y_test_out = NN_train(X_train, y_train, X_test, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Graph Embeddings - fscore: ', fscore)
        f_scores_nn_embeddings.append(fscore)
        
        ## 7. Open AI GPT Text Embeddings + User Embeddings        

        preds, y_test_out = NN_train(X_train_node_embedding, y_train, X_test_node_embedding, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Graph Embeddings - fscore: ', fscore)
        f_scores_node_embedding.append(fscore)

        preds, y_test_out = NN_train(X_train_stance_embedding, y_train, X_test_stance_embedding, y_test)    
        fscore = f1_score(preds, y_test_out , average='macro')
        print('Text Embeddings + User Opinion Embeddings - fscore: ', fscore)
        f_scores_stance_embedding.append(fscore)


        preds, y_test_out = NN_train(X_train_node_stance_embedding, y_train, X_test_node_stance_embedding, y_test)
        fscore = f1_score(preds, y_test_out , average='macro')
        print('fText Embeddings + User Graph + Opinion Embeddings -  score: ', fscore)
        f_scores_node_stance_embedding.append(fscore)

    print(event, ' Random based: ', np.mean(f_scores_random))          
    print(event, ' Majority based: ', np.mean(f_scores_majority))      
    print(event, ' Vader Sentiment based: ', np.mean(f_scores_vader))      
    print(event, ' Textblob Sentiment based: ', np.mean(f_scores_textblob))      
    print(event, ' SVM Text based: ', np.mean(f_scores_sgd))          
    print(event, ' MLP Text based: ', np.mean(f_scores_mlp))     
    print(event, ' Text Emdedding MLP based: ', np.mean(f_scores_mlp_embeddings))     
    print(event, ' Text Embeddings SGD based: ', np.mean(f_scores_sgd_embeddings))         
    print(event, ' Text Embeddings NN based: ', np.mean(f_scores_nn_embeddings))             

    print(event, ' Text + Node embedding based: ', np.mean(f_scores_node_embedding))      
    print(event, ' Text + Stance emdedding based: ', np.mean(f_scores_stance_embedding))      
    print(event, ' Text + Node + Stance based: ', np.mean(f_scores_node_stance_embedding))  
    
    event_fscores[event] = {'f_scores_random': f_scores_random, 
                            'f_scores_majority': f_scores_majority, 
                            'f_scores_vader': f_scores_vader,
                            'f_scores_textblob': f_scores_textblob,
                            'f_scores_sgd': f_scores_sgd, 
                            'f_scores_mlp':f_scores_mlp, 
                            'f_scores_mlp_embeddings': f_scores_mlp_embeddings,
                            'f_scores_sgd_embeddings': f_scores_sgd_embeddings, 
                            'f_scores_nn_embeddings': f_scores_nn_embeddings,
                            'f_scores_node_embedding': f_scores_node_embedding, 
                            'f_scores_stance_embedding': f_scores_stance_embedding, 
                            'f_scores_node_stance_embedding': f_scores_node_stance_embedding}
    
    
    
           

Iran_Deal
646 646 277 277
Random Classifier - fscore:  0.4583072590738423
Majority Classifier - fscore:  0.3436018957345972
Vader Sentiment Classifier - fscore:  0.4658134642356241
TextBlob Sentiment Classifier - fscore:  0.5483597469510207
SVM Text  Model - fscore:  0.0
MLP Text  Model - fscore:  0.0




Text Embedding + MLP Model - fscore:  0.7472100542344597
Text Embedding + MLP Model - fscore:  0.764113246603519


  X_train = torch.Tensor(X_train)


Text Embeddings + User Graph Embeddings - fscore:  0.714563360073045
Text Embeddings + User Graph Embeddings - fscore:  0.7251984126984127
Text Embeddings + User Opinion Embeddings - fscore:  0.7292418772563177
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7328206465067779
646 646 277 277
Random Classifier - fscore:  0.46795499980400607
Majority Classifier - fscore:  0.3357314148681055
Vader Sentiment Classifier - fscore:  0.4863472291032628
TextBlob Sentiment Classifier - fscore:  0.5128871391076115
SVM Text  Model - fscore:  0.0
MLP Text  Model - fscore:  0.0




Text Embedding + MLP Model - fscore:  0.7472627737226277
Text Embedding + MLP Model - fscore:  0.7031082529474812




Text Embeddings + User Graph Embeddings - fscore:  0.7430258842590778
Text Embeddings + User Graph Embeddings - fscore:  0.7397975159169189
Text Embeddings + User Opinion Embeddings - fscore:  0.7544838373305527
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7576679506430763
646 646 277 277
Random Classifier - fscore:  0.5085064189541801
Majority Classifier - fscore:  0.3466981132075472
Vader Sentiment Classifier - fscore:  0.4172510518934081
TextBlob Sentiment Classifier - fscore:  0.537851929092805
SVM Text  Model - fscore:  0.0




MLP Text  Model - fscore:  0.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.7324300334168755
Text Embedding + MLP Model - fscore:  0.7455380577427821
Text Embeddings + User Graph Embeddings - fscore:  0.7066023302342004
Text Embeddings + User Graph Embeddings - fscore:  0.6964304352364055
Text Embeddings + User Opinion Embeddings - fscore:  0.7500686516809856
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7460978420280745
646 646 277 277
Random Classifier - fscore:  0.5414923427826654
Majority Classifier - fscore:  0.33890214797136037
Vader Sentiment Classifier - fscore:  0.45798802946593
TextBlob Sentiment Classifier - fscore:  0.49777603860953495
SVM Text  Model - fscore:  0.0
MLP Text  Model - fscore:  0.0




Text Embedding + MLP Model - fscore:  0.7036582820164909
Text Embedding + MLP Model - fscore:  0.7170804525455687




Text Embeddings + User Graph Embeddings - fscore:  0.6724219489120151
Text Embeddings + User Graph Embeddings - fscore:  0.6748869166150153
Text Embeddings + User Opinion Embeddings - fscore:  0.7250261233019855
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7362420922193962
646 646 277 277
Random Classifier - fscore:  0.5516395154553049
Majority Classifier - fscore:  0.351288056206089
Vader Sentiment Classifier - fscore:  0.47630863759896025
TextBlob Sentiment Classifier - fscore:  0.5592853416797079
SVM Text  Model - fscore:  0.0


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


MLP Text  Model - fscore:  0.0
Text Embedding + MLP Model - fscore:  0.7253418223567476
Text Embedding + MLP Model - fscore:  0.7539707419017763




Text Embeddings + User Graph Embeddings - fscore:  0.7219636828177753
Text Embeddings + User Graph Embeddings - fscore:  0.7219636828177753
Text Embeddings + User Opinion Embeddings - fscore:  0.732262277951933
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7544838373305527
646 646 277 277
Random Classifier - fscore:  0.5434065934065935
Majority Classifier - fscore:  0.3543123543123543
Vader Sentiment Classifier - fscore:  0.462518329803149
TextBlob Sentiment Classifier - fscore:  0.5449780976220275
SVM Text  Model - fscore:  0.3543123543123543




MLP Text  Model - fscore:  0.3543123543123543
Text Embedding + MLP Model - fscore:  0.7753270538984824
Text Embedding + MLP Model - fscore:  0.7748794043624161




Text Embeddings + User Graph Embeddings - fscore:  0.7544326241134751
Text Embeddings + User Graph Embeddings - fscore:  0.768079539508111
Text Embeddings + User Opinion Embeddings - fscore:  0.8013508403635273
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8013508403635273
646 646 277 277
Random Classifier - fscore:  0.5071690214547357
Majority Classifier - fscore:  0.3436018957345972
Vader Sentiment Classifier - fscore:  0.49768441022207344
TextBlob Sentiment Classifier - fscore:  0.5289115646258503
SVM Text  Model - fscore:  0.32273838630806845




MLP Text  Model - fscore:  0.32273838630806845
Text Embedding + MLP Model - fscore:  0.6668932343406881
Text Embedding + MLP Model - fscore:  0.6509423624808239




Text Embeddings + User Graph Embeddings - fscore:  0.6893322900365154
Text Embeddings + User Graph Embeddings - fscore:  0.6930767927209209
Text Embeddings + User Opinion Embeddings - fscore:  0.6893322900365154
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.707565982404692
646 646 277 277
Random Classifier - fscore:  0.5269771746640075
Majority Classifier - fscore:  0.3341346153846154
Vader Sentiment Classifier - fscore:  0.536885302529949
TextBlob Sentiment Classifier - fscore:  0.5399814313548572
SVM Text  Model - fscore:  0.3325301204819277
MLP Text  Model - fscore:  0.3325301204819277


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.7470253626970045
Text Embedding + MLP Model - fscore:  0.7544326241134751
Text Embeddings + User Graph Embeddings - fscore:  0.7147419602935616
Text Embeddings + User Graph Embeddings - fscore:  0.7147419602935617
Text Embeddings + User Opinion Embeddings - fscore:  0.7761470281543275
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7650367354395856
646 646 277 277
Random Classifier - fscore:  0.5376108502869066
Majority Classifier - fscore:  0.3482352941176471
Vader Sentiment Classifier - fscore:  0.47851605758582505
TextBlob Sentiment Classifier - fscore:  0.5889722007012199
SVM Text  Model - fscore:  0.31773399014778325




MLP Text  Model - fscore:  0.31773399014778325


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.7615805946791863
Text Embedding + MLP Model - fscore:  0.7535845107273679
Text Embeddings + User Graph Embeddings - fscore:  0.7035035505430243
Text Embeddings + User Graph Embeddings - fscore:  0.7394984326018809
Text Embeddings + User Opinion Embeddings - fscore:  0.7430258842590778
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7470253626970045
646 646 277 277
Random Classifier - fscore:  0.5374178060745225
Majority Classifier - fscore:  0.3558139534883721
Vader Sentiment Classifier - fscore:  0.4863022992838296
TextBlob Sentiment Classifier - fscore:  0.5558640647608619
SVM Text  Model - fscore:  0.0




MLP Text  Model - fscore:  0.0
Text Embedding + MLP Model - fscore:  0.7322622779519332
Text Embedding + MLP Model - fscore:  0.7002203590940501




Text Embeddings + User Graph Embeddings - fscore:  0.7216589019822783
Text Embeddings + User Graph Embeddings - fscore:  0.7019160104986877
Text Embeddings + User Opinion Embeddings - fscore:  0.6850608143839239
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7109437949937256
Iran_Deal  Random based:  0.5180481981956764
Iran_Deal  Majority based:  0.34523197410252854
Iran_Deal  Vader Sentiment based:  0.47656148117220115
Iran_Deal  Textblob Sentiment based:  0.5414867554505498
Iran_Deal  SVM Text based:  0.13273148512501337
Iran_Deal  MLP Text based:  0.13273148512501337
Iran_Deal  Text Emdedding MLP based:  0.7317870012519261
Iran_Deal  Text Embeddings SGD based:  0.7338991489314496
Iran_Deal  Text Embeddings NN based:  0.7142246533264969
Iran_Deal  Text + Node embedding based:  0.7175589698907691
Iran_Deal  Text + Stance emdedding based:  0.7385999624719146
Iran_Deal  Text + Node + Stance based:  0.7459235084626412
Student_Marches
397 397 171 171
Random Classifier - f



Text Embedding + MLP Model - fscore:  0.7426108374384235
Text Embedding + MLP Model - fscore:  0.7768543956043956




Text Embeddings + User Graph Embeddings - fscore:  0.7777701778385773
Text Embeddings + User Graph Embeddings - fscore:  0.7355397463656046
Text Embeddings + User Opinion Embeddings - fscore:  0.8067796610169492
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7833590138674884
397 397 171 171
Random Classifier - fscore:  0.5026518391787853
Majority Classifier - fscore:  0.35714285714285715
Vader Sentiment Classifier - fscore:  0.4710121192269898
TextBlob Sentiment Classifier - fscore:  0.5079473828446149
SVM Text  Model - fscore:  0.30769230769230765




MLP Text  Model - fscore:  0.30769230769230765
Text Embedding + MLP Model - fscore:  0.7597079891695515




Text Embedding + MLP Model - fscore:  0.712821743153854
Text Embeddings + User Graph Embeddings - fscore:  0.7939404372525392
Text Embeddings + User Graph Embeddings - fscore:  0.7878411910669976
Text Embeddings + User Opinion Embeddings - fscore:  0.7799840038947038
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8123456790123457
397 397 171 171
Random Classifier - fscore:  0.4962318443409154
Majority Classifier - fscore:  0.3473282442748092
Vader Sentiment Classifier - fscore:  0.48684210526315785
TextBlob Sentiment Classifier - fscore:  0.43150426705967027
SVM Text  Model - fscore:  0.3473282442748092
MLP Text  Model - fscore:  0.3473282442748092




Text Embedding + MLP Model - fscore:  0.7498606854276957
Text Embedding + MLP Model - fscore:  0.6857598557608959




Text Embeddings + User Graph Embeddings - fscore:  0.7498606854276957
Text Embeddings + User Graph Embeddings - fscore:  0.7519342359767892
Text Embeddings + User Opinion Embeddings - fscore:  0.7785051636618239
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8048552754435108
397 397 171 171
Random Classifier - fscore:  0.4887101763068358
Majority Classifier - fscore:  0.3643122676579926
Vader Sentiment Classifier - fscore:  0.48155293932354215
TextBlob Sentiment Classifier - fscore:  0.4671052631578947
SVM Text  Model - fscore:  0.3643122676579926
MLP Text  Model - fscore:  0.3643122676579926




Text Embedding + MLP Model - fscore:  0.747112676056338
Text Embedding + MLP Model - fscore:  0.7537037037037035




Text Embeddings + User Graph Embeddings - fscore:  0.7750000000000001
Text Embeddings + User Graph Embeddings - fscore:  0.7912087912087913
Text Embeddings + User Opinion Embeddings - fscore:  0.8144232452301767
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8087783058428851
397 397 171 171
Random Classifier - fscore:  0.5782406138668128
Majority Classifier - fscore:  0.3666666666666667
Vader Sentiment Classifier - fscore:  0.4941637352094659
TextBlob Sentiment Classifier - fscore:  0.45494739006751894
SVM Text  Model - fscore:  0.3666666666666667




MLP Text  Model - fscore:  0.3666666666666667
Text Embedding + MLP Model - fscore:  0.7046836142580823
Text Embedding + MLP Model - fscore:  0.6865857454092748




Text Embeddings + User Graph Embeddings - fscore:  0.7506249999999999
Text Embeddings + User Graph Embeddings - fscore:  0.701713582104867
Text Embeddings + User Opinion Embeddings - fscore:  0.7873722022658193
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.798125
397 397 171 171
Random Classifier - fscore:  0.48451630583721567
Majority Classifier - fscore:  0.3448275862068966
Vader Sentiment Classifier - fscore:  0.48575498575498577
TextBlob Sentiment Classifier - fscore:  0.48522167487684725
SVM Text  Model - fscore:  0.3448275862068966
MLP Text  Model - fscore:  0.3448275862068966




Text Embedding + MLP Model - fscore:  0.7680912473484716
Text Embedding + MLP Model - fscore:  0.7597079891695515




Text Embeddings + User Graph Embeddings - fscore:  0.7463170605485596
Text Embeddings + User Graph Embeddings - fscore:  0.7774047684297067
Text Embeddings + User Opinion Embeddings - fscore:  0.7825549025672749
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7986842105263159
397 397 171 171
Random Classifier - fscore:  0.4969211822660099
Majority Classifier - fscore:  0.33976833976833976
Vader Sentiment Classifier - fscore:  0.5365853658536587
TextBlob Sentiment Classifier - fscore:  0.47953216374269003
SVM Text  Model - fscore:  0.33976833976833976
MLP Text  Model - fscore:  0.33976833976833976




Text Embedding + MLP Model - fscore:  0.7601026518391787
Text Embedding + MLP Model - fscore:  0.7597079891695513




Text Embeddings + User Graph Embeddings - fscore:  0.32677165354330706
Text Embeddings + User Graph Embeddings - fscore:  0.747986427665627
Text Embeddings + User Opinion Embeddings - fscore:  0.7833590138674884
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7599383667180277
397 397 171 171
Random Classifier - fscore:  0.5437192118226601
Majority Classifier - fscore:  0.3423076923076923
Vader Sentiment Classifier - fscore:  0.4824455205811138
TextBlob Sentiment Classifier - fscore:  0.4552631578947368
SVM Text  Model - fscore:  0.3423076923076923




MLP Text  Model - fscore:  0.3423076923076923
Text Embedding + MLP Model - fscore:  0.7888888888888888
Text Embedding + MLP Model - fscore:  0.7176664832140891




Text Embeddings + User Graph Embeddings - fscore:  0.7656892299260072
Text Embeddings + User Graph Embeddings - fscore:  0.7825549025672749
Text Embeddings + User Opinion Embeddings - fscore:  0.7541757940854326
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7774047684297067
397 397 171 171
Random Classifier - fscore:  0.45494739006751894
Majority Classifier - fscore:  0.3522727272727273
Vader Sentiment Classifier - fscore:  0.5308188739034664
TextBlob Sentiment Classifier - fscore:  0.4735221674876847
SVM Text  Model - fscore:  0.3522727272727273
MLP Text  Model - fscore:  0.3522727272727273




Text Embedding + MLP Model - fscore:  0.7855948732237392
Text Embedding + MLP Model - fscore:  0.7597079891695514




Text Embeddings + User Graph Embeddings - fscore:  0.7912087912087912
Text Embeddings + User Graph Embeddings - fscore:  0.7882498624105669
Text Embeddings + User Opinion Embeddings - fscore:  0.8053130929791272
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.798125
397 397 171 171
Random Classifier - fscore:  0.49115846359065574
Majority Classifier - fscore:  0.3595505617977528
Vader Sentiment Classifier - fscore:  0.5308188739034665
TextBlob Sentiment Classifier - fscore:  0.4470968629609246
SVM Text  Model - fscore:  0.3595505617977528
MLP Text  Model - fscore:  0.3595505617977528




Text Embedding + MLP Model - fscore:  0.7365177195685669
Text Embedding + MLP Model - fscore:  0.7368061018572356




Text Embeddings + User Graph Embeddings - fscore:  0.7124669709344222
Text Embeddings + User Graph Embeddings - fscore:  0.7120321682647694
Text Embeddings + User Opinion Embeddings - fscore:  0.7298763736263736
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7362648661617026
Student_Marches  Random based:  0.5051118598926101
Student_Marches  Majority based:  0.3508807293290287
Student_Marches  Vader Sentiment based:  0.49436234536445445
Student_Marches  Textblob Sentiment based:  0.46641102343333485
Student_Marches  SVM Text based:  0.4124726393645185
Student_Marches  MLP Text based:  0.4124726393645185
Student_Marches  Text Emdedding MLP based:  0.7349321996212103
Student_Marches  Text Embeddings SGD based:  0.7543171183218936
Student_Marches  Text Embeddings NN based:  0.71896500066799
Student_Marches  Text + Node embedding based:  0.7576465676060995
Student_Marches  Text + Stance emdedding based:  0.782234345319517
Student_Marches  Text + Node + Stance based:  0.787



Text Embedding + MLP Model - fscore:  0.7625701172870984
Text Embedding + MLP Model - fscore:  0.6909515717926933




Text Embeddings + User Graph Embeddings - fscore:  0.7328913819479856
Text Embeddings + User Graph Embeddings - fscore:  0.7579645125516685
Text Embeddings + User Opinion Embeddings - fscore:  0.7863188405797101
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7988479262672811
678 678 291 291
Random Classifier - fscore:  0.49174562159058277
Majority Classifier - fscore:  0.37820512820512814
Vader Sentiment Classifier - fscore:  0.46513595518730305
TextBlob Sentiment Classifier - fscore:  0.5382910434203355
SVM Text  Model - fscore:  0.37820512820512814




MLP Text  Model - fscore:  0.37820512820512814
Text Embedding + MLP Model - fscore:  0.7822725458898643
Text Embedding + MLP Model - fscore:  0.6109924555553848




Text Embeddings + User Graph Embeddings - fscore:  0.7620428189116861
Text Embeddings + User Graph Embeddings - fscore:  0.7645191409897292
Text Embeddings + User Opinion Embeddings - fscore:  0.8136453201970444
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8156432748538012
678 678 291 291
Random Classifier - fscore:  0.463067674543182
Majority Classifier - fscore:  0.38993710691823896
Vader Sentiment Classifier - fscore:  0.4504041701291833
TextBlob Sentiment Classifier - fscore:  0.4868227686881551
SVM Text  Model - fscore:  0.38993710691823896




MLP Text  Model - fscore:  0.38993710691823896
Text Embedding + MLP Model - fscore:  0.7881095029445204
Text Embedding + MLP Model - fscore:  0.736563750714694




Text Embeddings + User Graph Embeddings - fscore:  0.7757532067216864
Text Embeddings + User Graph Embeddings - fscore:  0.7726333299727794
Text Embeddings + User Opinion Embeddings - fscore:  0.8037711363029123
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8004399301287443
678 678 291 291
Random Classifier - fscore:  0.5033122068249364
Majority Classifier - fscore:  0.3847780126849894
Vader Sentiment Classifier - fscore:  0.4788457956645701
TextBlob Sentiment Classifier - fscore:  0.49106585947532566
SVM Text  Model - fscore:  0.3847780126849894
MLP Text  Model - fscore:  0.3847780126849894


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.7680738379267791
Text Embedding + MLP Model - fscore:  0.673591370753369
Text Embeddings + User Graph Embeddings - fscore:  0.7811235605353252
Text Embeddings + User Graph Embeddings - fscore:  0.7512820512820513
Text Embeddings + User Opinion Embeddings - fscore:  0.7675624450010153
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7754416841535234
678 678 291 291
Random Classifier - fscore:  0.48866203664965274
Majority Classifier - fscore:  0.37820512820512814
Vader Sentiment Classifier - fscore:  0.4635743724241289
TextBlob Sentiment Classifier - fscore:  0.45811357929766816
SVM Text  Model - fscore:  0.37820512820512814




MLP Text  Model - fscore:  0.37820512820512814
Text Embedding + MLP Model - fscore:  0.7782704822813746
Text Embedding + MLP Model - fscore:  0.7788212560386474




Text Embeddings + User Graph Embeddings - fscore:  0.7652788980579512
Text Embeddings + User Graph Embeddings - fscore:  0.7693875240575117
Text Embeddings + User Opinion Embeddings - fscore:  0.8313043478260871
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8125185845970859
678 678 291 291
Random Classifier - fscore:  0.5087578569290632
Majority Classifier - fscore:  0.3834745762711864
Vader Sentiment Classifier - fscore:  0.49194141836808314
TextBlob Sentiment Classifier - fscore:  0.47599250710012697
SVM Text  Model - fscore:  0.3834745762711864




MLP Text  Model - fscore:  0.3834745762711864
Text Embedding + MLP Model - fscore:  0.7409947643979057
Text Embedding + MLP Model - fscore:  0.7465848681388669




Text Embeddings + User Graph Embeddings - fscore:  0.7328913819479856
Text Embeddings + User Graph Embeddings - fscore:  0.7200379696499353
Text Embeddings + User Opinion Embeddings - fscore:  0.8126254655640426
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.8028660760812923
678 678 291 291
Random Classifier - fscore:  0.4369087434304826
Majority Classifier - fscore:  0.39375000000000004
Vader Sentiment Classifier - fscore:  0.5031653697967817
TextBlob Sentiment Classifier - fscore:  0.5092871329177961
SVM Text  Model - fscore:  0.39375000000000004




MLP Text  Model - fscore:  0.39375000000000004
Text Embedding + MLP Model - fscore:  0.7900074786464961
Text Embedding + MLP Model - fscore:  0.6583683118637526




Text Embeddings + User Graph Embeddings - fscore:  0.774599358974359
Text Embeddings + User Graph Embeddings - fscore:  0.7692787605443414
Text Embeddings + User Opinion Embeddings - fscore:  0.812962772521596
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.795284002084419
678 678 291 291
Random Classifier - fscore:  0.43960515957124685
Majority Classifier - fscore:  0.3860759493670886
Vader Sentiment Classifier - fscore:  0.4538303938564695
TextBlob Sentiment Classifier - fscore:  0.5408915795266923
SVM Text  Model - fscore:  0.3860759493670886
MLP Text  Model - fscore:  0.3860759493670886


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


Text Embedding + MLP Model - fscore:  0.8059926789690177
Text Embedding + MLP Model - fscore:  0.7998821102269378
Text Embeddings + User Graph Embeddings - fscore:  0.7882487882487883
Text Embeddings + User Graph Embeddings - fscore:  0.7755553270259152
Text Embeddings + User Opinion Embeddings - fscore:  0.7930719219829339
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7953792064289302
678 678 291 291
Random Classifier - fscore:  0.5613695090439277
Majority Classifier - fscore:  0.37553648068669526
Vader Sentiment Classifier - fscore:  0.5023672569498424
TextBlob Sentiment Classifier - fscore:  0.48910533910533915
SVM Text  Model - fscore:  0.37553648068669526




MLP Text  Model - fscore:  0.37553648068669526
Text Embedding + MLP Model - fscore:  0.7941676490662034
Text Embedding + MLP Model - fscore:  0.7685373398144056




Text Embeddings + User Graph Embeddings - fscore:  0.7829869742467933
Text Embeddings + User Graph Embeddings - fscore:  0.784029586022114
Text Embeddings + User Opinion Embeddings - fscore:  0.8222835645886262
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7971381397908641
678 678 291 291
Random Classifier - fscore:  0.48394363496932513
Majority Classifier - fscore:  0.3860759493670886
Vader Sentiment Classifier - fscore:  0.4722967403521918
TextBlob Sentiment Classifier - fscore:  0.4825556441228769
SVM Text  Model - fscore:  0.3860759493670886
MLP Text  Model - fscore:  0.3860759493670886




Text Embedding + MLP Model - fscore:  0.7806747486618719
Text Embedding + MLP Model - fscore:  0.670855850194797




Text Embeddings + User Graph Embeddings - fscore:  0.7655746509129968
Text Embeddings + User Graph Embeddings - fscore:  0.7815565479598459
Text Embeddings + User Opinion Embeddings - fscore:  0.8078524989841529
fText Embeddings + User Graph + Opinion Embeddings -  score:  0.7913978494623656
Santa_Fe_Shooting  Random based:  0.4901470567650524
Santa_Fe_Shooting  Majority based:  0.383951290797673
Santa_Fe_Shooting  Vader Sentiment based:  0.4785422670201225
Santa_Fe_Shooting  Textblob Sentiment based:  0.4954681097777193
Santa_Fe_Shooting  SVM Text based:  0.383951290797673
Santa_Fe_Shooting  MLP Text based:  0.383951290797673
Santa_Fe_Shooting  Text Emdedding MLP based:  0.7135148885093547
Santa_Fe_Shooting  Text Embeddings SGD based:  0.7791133806071132
Santa_Fe_Shooting  Text Embeddings NN based:  0.7661391020505558
Santa_Fe_Shooting  Text + Node embedding based:  0.7646244750055892
Santa_Fe_Shooting  Text + Stance emdedding based:  0.8051398313548122
Santa_Fe_Shooting  Text + Node 

In [12]:
import pickle 


with open('./data/event_fscores_2_class.pkl', 'wb') as f:
    pickle.dump(event_fscores, f)
    


In [15]:
## Put results in a format for the paper

for result_type in ['f_scores_random', 
                    'f_scores_majority', 
                    'f_scores_vader',
                    'f_scores_textblob',
                    'f_scores_sgd', 
                    'f_scores_mlp', 
                    'f_scores_mlp_embeddings',
                    'f_scores_sgd_embeddings', 
                    'f_scores_nn_embeddings',
                    'f_scores_node_embedding', 
                    'f_scores_stance_embedding', 
                    'f_scores_node_stance_embedding']:


    print("{}&{:.2f}&{:.2f}&{:.2f}&{:.2f}".format(result_type,
                                                  np.mean(event_fscores['Student_Marches'][result_type]),
                                                np.mean(event_fscores['Santa_Fe_Shooting'][result_type]), 
                                                np.mean(event_fscores['Iran_Deal'][result_type]),
                                                np.mean([np.mean(event_fscores['Student_Marches'][result_type]),
                                                         np.mean(event_fscores['Santa_Fe_Shooting'][result_type]), 
                                                         np.mean(event_fscores['Iran_Deal'][result_type])])
                                              ))
    
    


f_scores_random&0.51&0.49&0.52&0.50
f_scores_majority&0.35&0.38&0.35&0.36
f_scores_vader&0.49&0.48&0.48&0.48
f_scores_textblob&0.47&0.50&0.54&0.50
f_scores_sgd&0.41&0.38&0.13&0.31
f_scores_mlp&0.41&0.38&0.13&0.31
f_scores_mlp_embeddings&0.73&0.71&0.73&0.73
f_scores_sgd_embeddings&0.75&0.78&0.73&0.76
f_scores_nn_embeddings&0.72&0.77&0.71&0.73
f_scores_node_embedding&0.76&0.76&0.72&0.75
f_scores_stance_embedding&0.78&0.81&0.74&0.78
f_scores_node_stance_embedding&0.79&0.80&0.75&0.78
