<a href="https://colab.research.google.com/github/Confirmation-Bias-Analyser/Main-Program/blob/main/Main%20Program%20(Notebook_Version).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install sentence_transformers
!pip install vaderSentiment
!pip install anytree
!pip install dash
!pip install jupyter-dash
!pip install pyvis

# Import essential libraries and functions

In [None]:
%%capture

from transformers import BertTokenizer, TFBertForSequenceClassification, InputExample, InputFeatures
import pandas as pd
import numpy as np
from google.colab import files
import random
import re

# The shutil module offers a number of high-level operations on files and collections of files.
import os
import shutil

import sys
mainDirectory = "/content/drive/MyDrive/FYP/Analyser/"
sys.path.append(mainDirectory + 'Functions/')
from confirmation_bias_model_functions import *
from data_collection_functions import *
from data_preprocessing_functions import *
from visualisation_functions import *
from verification_functions import *

# from textblob import TextBlob
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from wordcloud import STOPWORDS

from jupyter_dash import JupyterDash
from dash import Dash, html, Input, Output, dash_table, dcc
from dash.dependencies import Input, Output
from IPython.core.display import display, HTML

# Authenticate API keys

In [None]:
with open(mainDirectory + 'twitter_bearer_token.txt', 'r', encoding="utf8") as f:
    token = f.read()

header = create_Twitter_headers(token)


# Functions


In [None]:
############# Data Processing #############

from anytree import Node, RenderTree, search
import re
import urllib
from urllib.parse import urlparse
import os

def getTweetComments(conversation_data):
    conversation_dict = {'id':[], 'timestamp':[], 'reply_to':[], 'comment':[]}

    for i in conversation_data:
        print('User ID:', i['id'], 
              'Time:', i['user']['created_at'])
        print('In reply to:', i['in_reply_to_status_id'])
        print(i['text'], '\n')

        conversation_dict['id'].append(i['id'])
        conversation_dict['timestamp'].append(i['user']['created_at'])
        conversation_dict['reply_to'].append(i['in_reply_to_status_id'])
        conversation_dict['comment'].append(i['text'])

    return conversation_dict

def getLinks(string):
    urls = re.findall("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", string)
    links = ''

    for url in urls:
        try:
            opener = urllib.request.build_opener()
            request = urllib.request.Request(url)
            response = opener.open(request)
            actual_url = response.geturl()
                      
            if '](' in actual_url:
                actual_url = actual_url.split('](')[0]
          
            links += actual_url + ';'
            
            
        except:
            if '](' in url:
                url = url.split('](')[0]
          
            links += url + ';'

    return links

def getURLfromList(url):
    if ';' in url:
        url = url.split(';')[:-1]
        result = []
    
        for i in url:
            result.append(urlparse(i).hostname)
        
        return result

    else:
        return ''

def printDetailsPHEME(threads, data):
    rumours = 0
    non_rumours = 0

    for i in threads:
        path = '/content/all-rnr-annotated-threads/' + i
        print(i)

        for j in os.listdir(path):
          
            for k in data:

                for l in os.listdir(path + '/' + k):
                    if k == data[0] and l[0] != '.':
                        non_rumours += 1

                    elif k == data[1] and l[0] != '.':
                        rumours += 1

    print('Rumours:', rumours)
    print('Non-rumours:', non_rumours)
    print()
	
def traceConversation(dataframe, tree, node, printGraphOption = True):
    children_nodes_list = getAllChildNodes(tree, node, [])

    print('\n\n')
    new = search.find_by_attr(tree, node)
    
    if printGraphOption:
        printGraph(new)

    return dataframe[(dataframe['reply_to'].isin(children_nodes_list)) | (dataframe['id'].isin(children_nodes_list + [node]))], new

def getAllChildNodes(tree, node, children_nodes_list):
    children_nodes = search.find_by_attr(tree, node).children
    
    for i in children_nodes:
        children_nodes_list.append(i.name)
        
        if i.children != None:
            getAllChildNodes(tree, i.name, children_nodes_list)
            
        else:
            return
            
    return children_nodes_list

def cleanComments(comments_array):
    sentences = []

    for i in comments_array:
        sequence = i.replace('\n', ' ') # Remove new line characters
        sequence = sequence.replace('\.', '')
        sequence = sequence.replace('.', '')
        sequence = sequence.replace(",", " ")
        sequence = sequence.replace("'", " ")
        sequence = sequence.replace('\\', '')
        sequence = sequence.replace('\'s', '')
        sequence = sequence.replace('&gt;', '') # Remove ampersand
        sequence = re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", sequence) # Remove the user name
        sentences.append(sequence)

    return sentences
    
def createTweetsTree(dictionary, tree_root):
    for key, item in dictionary.items():
        child = Node(key, parent=tree_root)

        if len(dictionary[key]) != 0:      
            createTweetsTree(dictionary[key], child)

        else:
            continue

def make_map(list_child_parent):
    has_parent = set()
    all_items = {}
    
    for child, parent in list_child_parent:
        if parent not in all_items:
            all_items[parent] = {}
            
        if child not in all_items:
            all_items[child] = {}
        
        all_items[parent][child] = all_items[child]
        has_parent.add(child)

    result = {}
    
    for key, value in all_items.items():
        if key not in has_parent:
            result[key] = value
    
    return result

def printGraph(root):
    for pre, fill, node in RenderTree(root):
        print("%s%s" % (pre, node.name))    

In [None]:
############# Results Verification #############

# from Functions.data_collection_functions import *
# from Functions.confirmation_bias_model_functions import *
import re

# def obtainTweetsAndLikes(userID, header):
#     selectedUserTweets = getTweetsByUserID(userID, header, 100)
#     userLikedTweets = getTweetsLikedByUser(userID, header, 100)
#     allTweets = combineTweets([selectedUserTweets, userLikedTweets])

#     return allTweets

def combineTweets(listOfTweets):
    allTweets_ = []

    for i in listOfTweets:
        for j in i['data']:
            allTweets_.append(j['text'])

    return allTweets_

def checkForRepliesToNews(textList):
    result = []
    for i in textList:
        reply = re.findall("(@[A-Za-z0-9]+)", i)

        if any(x in ['@MothershipSG', '@straits_times', '@ChannelNewsAsia', '@YahooSG'] for x in reply):
            result.append(i)

    return result

def calculateUserBias(tweetsData, embedder, defaultClusterSize = 3):
    user_tweets = []
    textblob_polarity_res = []
    textblob_subjectivity_res = [] 
    vader_compound_scores = []
    model_subjectivity_score = []
    clean_text = []

    for i in tweetsData:
        # repliedAccounts = re.findall("(@[A-Za-z0-9]+)", i)

        # if any(x in ['@MothershipSG', '@straits_times', '@ChannelNewsAsia'] for x in repliedAccounts):
        user_tweets.append(i)
        reply = cleanComments([i])
        clean_text.append(reply[0])
        sentimentalResults = getSentimentalResults(reply[0])

        textblob_polarity_res.append(sentimentalResults['textblob_polarity'])
        textblob_subjectivity_res.append(sentimentalResults['textblob_subjectivity'])
        vader_compound_scores.append(sentimentalResults['vader_compound_scores'])
        model_subjectivity_score.append(float(predictFromModel(model, tokenizer, reply)))

    overall_subjectivity = []
    for i in range(len(model_subjectivity_score)):
        overall_subjectivity.append(defineSubjectivity(model_subjectivity_score[i], textblob_subjectivity_res[i]))

    overall_polarity = []
    for i in range(len(textblob_polarity_res)):
        overall_polarity.append(definePolarity(textblob_polarity_res[i], vader_compound_scores[i]))

    cluster = getClusters(clean_text, embedder, defaultClusterSize)

    df = pd.DataFrame(list(zip(user_tweets, textblob_polarity_res, textblob_subjectivity_res, vader_compound_scores, model_subjectivity_score, overall_subjectivity, overall_polarity, cluster)),
                      columns =['tweet', 'textblob_polarity', 'textblob_subjectivity', 'vader_compound_score', 'model_subjectivity', 'overall_subjectivity', 'overall_polarity', 'topic_cluster'])
    
    return df

In [None]:
############# Data Visualisation #############

from anytree import Node, RenderTree, search
import networkx as nx
import requests
from pyvis.network import Network

def createNetworkGraph(conversation_tree, head_thread):
    G = nx.Graph()
    G.add_node(conversation_tree.name)

    for _, __, node in RenderTree(conversation_tree):
    
        try:
            G.add_edge(node.parent.name, node.name)

        except:
            if node.name == head_thread:
                continue

            G.add_edge(head_thread, node.name)

    return G
    
def createInterativeNetworkGraph(conversation_tree, head_thread, map_dict, scoreDict):
    G = Network("500px", "500px", notebook=True)
    
    if len(scoreDict) > 0:
        option = True
    else:
        option = False

    for _, __, node in RenderTree(conversation_tree):
        try:
            if option:
                G.add_node(node.name, label=node.name, color=map_dict[node.name], title='Score: ' + str(scoreDict[node.name]))
                
            else:
                G.add_node(node.name, label=node.name, color=map_dict[node.name])

        except:
            G.add_node(node.name, label=node.name)

    for _, __, node in RenderTree(conversation_tree):
    
        try:
            G.add_edge(node.parent.name, node.name)

        except:
            if node.name == head_thread:
                continue

            G.add_edge(head_thread, node.name)

    return G
    
def getColourNodes(conversationDF):
    polarity_map = {}
    subjectivity_map = {}
    potential_bias_map = {}

    for i in conversationDF.index.tolist():
        
        ########## Polarity detection results
        
        if conversationDF['overall_polarity'].loc[i] == 'POS':
            polarity_map[conversationDF['id'].loc[i]] = '#00FF80' # Green colour

        elif conversationDF['overall_polarity'].loc[i] == 'NEG':
            polarity_map[conversationDF['id'].loc[i]] = '#FF9999' # Light pink colour
        
        # Neutral class or Unknown
        else:
            polarity_map[conversationDF['id'].loc[i]] = '#FFFF00' # Yellow colour
   
        ########## Subjectivity detection results
        
        if conversationDF['overall_subjectivity'].loc[i] == 'SUBJECTIVE':
            subjectivity_map[conversationDF['id'].loc[i]] = '#A9A9A9' # Light grey colour

        elif conversationDF['overall_subjectivity'].loc[i] == 'OBJECTIVE':
            subjectivity_map[conversationDF['id'].loc[i]] = '#ADD8E6' # Light blue colour
           
        # Unknown class
        else:
            subjectivity_map[conversationDF['id'].loc[i]] = '#FF7F50' # Red orange colour
            
        ########## Potential Bias
        
        if conversationDF['potential_bias'].loc[i] == 1:
            potential_bias_map[conversationDF['id'].loc[i]] = 'red'

        elif conversationDF['potential_bias'].loc[i] == 0:
            potential_bias_map[conversationDF['id'].loc[i]] = 'black'
            
    return polarity_map, subjectivity_map, potential_bias_map

In [None]:
############# Confirmation Bias Model #############

from textblob import TextBlob
from sklearn.cluster import KMeans
import tensorflow as tf

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sid_obj = SentimentIntensityAnalyzer()

from sentence_transformers import SentenceTransformer, util
embedder = SentenceTransformer('all-MiniLM-L6-v2')
	
def calculateBias(dataset):
    count_positive_polarity_supportive = 0
    count_negative_polarity_supportive = 0
    count_positive_polarity_unsupportive = 0
    count_negative_polarity_unsupportive = 0

    for i in dataset.index.tolist():
        if dataset['vader_compound_score'].loc[i] > 0.35 and dataset['topic_cluster'].loc[i] == 1:
            count_positive_polarity_supportive += 1

        elif dataset['vader_compound_score'].loc[i] < -0.35 and dataset['topic_cluster'].loc[i] == 1:
            count_negative_polarity_supportive += 1

        elif dataset['vader_compound_score'].loc[i] > 0.35 and dataset['topic_cluster'].loc[i] == 0:
            count_positive_polarity_unsupportive += 1

        elif dataset['vader_compound_score'].loc[i] < -0.35 and dataset['topic_cluster'].loc[i] == 0:
            count_negative_polarity_unsupportive += 1
            
    total = count_positive_polarity_supportive + count_negative_polarity_supportive + count_positive_polarity_unsupportive + count_negative_polarity_unsupportive
    
    prob_D = (count_positive_polarity_supportive + count_negative_polarity_supportive)/total
    prob_D_prime = (count_positive_polarity_unsupportive + count_negative_polarity_unsupportive)/total
    result = {'P(D)': prob_D, 'P(D_p)': prob_D_prime}

    try:
        prob_D_H = count_positive_polarity_supportive / (count_positive_polarity_supportive + count_positive_polarity_unsupportive)
        prob_D_Hprime = count_negative_polarity_supportive / (count_negative_polarity_supportive + count_negative_polarity_unsupportive)
        
        if prob_D_H/prob_D_Hprime > 1:
            final_result = 1 / (prob_D_H/prob_D_Hprime)
            
        else:
            final_result = prob_D_H/prob_D_Hprime

        # return prob_D_H, prob_D_Hprime, final_result
        return final_result

    except:
        prob_D_H  = 0
        prob_D_Hprime = 0

        # return prob_D_H, prob_D_Hprime, 1
        return 1
        
def getClusters(allSentences, embedder, num_clusters = 2):
    corpus_embeddings = embedder.encode(allSentences)

    # Perform kmean clustering
    clustering_model = KMeans(n_clusters=num_clusters)
    clustering_model.fit(corpus_embeddings)
    cluster_assignment = clustering_model.labels_

    clustered_sentences = [[] for i in range(num_clusters)]
    
    for sentence_id, cluster_id in enumerate(cluster_assignment):
        clustered_sentences[cluster_id].append([allSentences[sentence_id], sentence_id])

    return cluster_assignment        

def getSentimentalResults(sentence, vaderObject = sid_obj):
    textBlobResult = TextBlob(sentence)
    vaderResult = vaderObject.polarity_scores(sentence)
    compoundScore = vaderResult.pop('compound')
    
    overallResult = {'textblob_polarity': textBlobResult.sentiment.polarity, 
                     'textblob_subjectivity': textBlobResult.sentiment.subjectivity,
                     'vader_results': vaderResult,
                     'vader_compound_scores': compoundScore}

    return overallResult
    
def predictFromModel(model, tokeniser, data):
    tf_batch = tokeniser(data, max_length=128, padding=True, truncation=True, return_tensors='tf')
    tf_outputs = model(tf_batch)
    tf_predictions = tf.nn.softmax(tf_outputs[0], axis=-1)

    return tf_predictions[:,1]    
    
def polarityDetermination(score, threshold = 0.35):
    if score > threshold:
        return 'POS'

    elif score < -1 * threshold:
        return 'NEG'

    else:
        return 'NEU'
    
def understandLinks(list_of_links):
    for i in list_of_links:
        if isinstance(i, str):
            print(i)
            
def definePolarity(score1, score2, threshold = 0.35):
    # Either both scores 1 & 2 are above threshold or when 1 of them is above threshold while the other is above 0
    if (score1 > threshold and score2 > threshold) or (score1 > 0 and score2 > threshold) or (score1 > threshold and score2 > 0):
        return 'POS'

    # Either both scores 1 & 2 are below threshold or when 1 of them is below threshold while the other is below 0
    elif (score1 < -1 * threshold and score2 < -1 * threshold) or (score1 < 0 and score2 < -1 * threshold) or (score1 < -1 * threshold and score2 < 0):
        return 'NEG'

    # Both scores are in the neutral range
    elif (score1 >= -1 * threshold and score1 <= threshold) and (score2 >= -1 * threshold and score2 <= threshold):
        return 'NEU'
        
    else:
        return 'UNKNOWN'
        
def defineSubjectivity(score1, score2, threshold = 0.5): # score 1 is model score while score 2 is textblob score
    if (score1 > threshold and score2 > threshold) or (score1 > threshold and score2 == threshold):
        return 'SUBJECTIVE'

    elif (score1 < threshold and score2 < threshold) or (score1 < threshold and score2 == threshold):
        return 'OBJECTIVE'
        
    else:
        return 'UNKNOWN'      

def getPolarityProportion(df):
    positivePolarity = len(df[df['overall_polarity'] == 'POS'])
    negativePolarity = len(df[df['overall_polarity'] == 'NEG'])
    
    return {'positive': positivePolarity/len(df), 'negative': negativePolarity/len(df)}
    
def getSubjectivityProportion(df):
    subjecitve = len(df[df['overall_subjectivity'] == 'SUBJECTIVE'])
    objecitve = len(df[df['overall_subjectivity'] == 'OBJECTIVE'])
    
    return {'subjecitve': subjecitve/len(df), 'objecitve': objecitve/len(df)}
    
def flagPotentialBias(df):
    result = []
    
    for index, row in df.iterrows():
        if row['overall_polarity'] == 'NEG' or row['overall_polarity'] == 'POS' :
            result.append(1)
        
        else:
            result.append(0)
            
    return result

## Read the data and present as a dataframe

In [None]:
tweets_for_analysis = ['1522931750451617793', '1507922082683793408', '553553331671408641']

for i in range(len(tweets_for_analysis)):
    print(i, tweets_for_analysis[i])

tweetOption = 0 #int(input("Please indicate the tweet to analyse.")) - 1

print(f'\nChosen {tweets_for_analysis[tweetOption]}\n')

if tweetOption == 0 or tweetOption == 1:
    conversationID = tweets_for_analysis[tweetOption]

    query = '''
        select * from comments_for_analysis where conversation_id = '%s'
    '''% conversationID
    
    df = getData(query, uri)

    parent = df['head_id'][0]

    print('About the tweet:')
    print(getSingleTweetInfo(conversationID, header)['data'][0]['text'])

    print('Number of comments:', len(df))

elif tweetOption == 2:
    conversationID = tweets_for_analysis[tweetOption]

    query = '''
        select * from pheme_dataset_for_analysis where head_id = '%s'
    '''% parent

    df = getData(query, uri)

    print('Number of comments:', len(df))

# Create directory to save all results
os.mkdir(tweets_for_analysis[tweetOption])

df['url'] = df['comment'].apply(lambda x: getLinks(x))
df['link_title'] = df['url'].apply(lambda x: getURLfromList(x))
df.head()    

In [None]:
# with open(mainDirectory + 'database_uri.txt', 'r', encoding="utf8") as f:
#     uri = f.read()

# socialMedia = ['Reddit', 'Twitter', 'PHEME Dataset']

# for i in range(len(socialMedia)):
#     print(i, socialMedia[i])

# socialMediaOption = 1

# print(f'\nChosen {socialMedia[socialMediaOption]}\n')

# if socialMedia[socialMediaOption] == 'Reddit':
#     parent = 'rmqevj'
#     query = '''
#         select * from reddit_posts_for_analysis where head_id = '%s'
#     '''% parent
#     df = getData(query, uri)
    
# elif socialMedia[socialMediaOption] == 'Twitter':
#     # conversationID = '1522931750451617793'
#     conversationID = '1507922082683793408'

#     query = '''
#         select * from comments_for_analysis where conversation_id = '%s'
#     '''% conversationID
    
#     df = getData(query, uri)

#     parent = df['head_id'][0]

#     print('About the tweet:')
#     print(getSingleTweetInfo(conversationID, header)['data'][0]['text'])

#     print('Number of comments:', len(df))

# elif socialMedia[socialMediaOption] == 'PHEME Dataset':
#     parent = '553553331671408641'
#     query = '''
#         select * from pheme_dataset_for_analysis where head_id = '%s'
#     '''% parent
#     df = getData(query, uri)

# df['url'] = df['comment'].apply(lambda x: getLinks(x))
# df['link_title'] = df['url'].apply(lambda x: getURLfromList(x))
# df.head()

# Read the CSV files

In [None]:
df1 = pd.read_csv('/content/twitter_data_1540359703586377729.csv')
print(df1)

In [None]:
df2 = pd.read_csv('/content/twitter_data_1643638127804796931.csv')
print(df2)
parent = df2['head_id'][0]

In [None]:
#df2.drop(3, inplace=True)
df2.reset_index(drop=True, inplace=True)
print(df2)

In [None]:
df3 = pd.read_csv('/content/twitter_data_1644171721212149764.csv')
print(df3)
parent = df3['head_id'][0]

In [None]:
df1 = pd.read_csv('/content/twitter_data_PHEME2.csv')
print(df1)
parent = 552802011733716992

## Construction of tree of comments

In [None]:
from transformers.models.deformable_detr.modeling_deformable_detr import DeformableDetrFrozenBatchNorm2d
root = Node(parent)

input_list = [] 

item_count = df3['reply_to'].value_counts().to_dict()

for i in range(len(df3['id'].tolist())):
    try:
      if DeformableDetrFrozenBatchNorm2d['id'].loc[i] != df3['reply_to'].loc[i]:
          input_list.append((df3['id'].loc[i], df3['reply_to'].loc[i]))

    except:
      continue

In [None]:
output_dict = make_map(input_list)
output_dict
createTweetsTree(output_dict, root)

# Sentiment Analysis

In [None]:
pred_sentences = cleanComments(df3['comment'])

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
main_directory = '/content/drive/MyDrive/FYP/Analyser/'

tokenizer = BertTokenizer.from_pretrained(main_directory + "subjectivity_tokenizer/")
model = TFBertForSequenceClassification.from_pretrained(main_directory + "saved_subjectivity_model/")

In [None]:
textblob_polarity = []
textblob_subjectivity = []
vader_results = []
vaderCompoundScores = []
model_subjectivity_result = predictFromModel(model, tokenizer, pred_sentences)

for i in pred_sentences:
  result = getSentimentalResults(i)
  textblob_polarity.append(result['textblob_polarity'])
  textblob_subjectivity.append(result['textblob_subjectivity'])
  vader_results.append(result['vader_results'])
  vaderCompoundScores.append(result['vader_compound_scores'])

In [None]:
def calculateBias(dataset):
    count_positive_polarity_supportive = 0
    count_negative_polarity_supportive = 0
    count_positive_polarity_unsupportive = 0
    count_negative_polarity_unsupportive = 0

    for i in dataset.index.tolist():
        if dataset['vader_compound_score'].loc[i] > 0.35 and dataset['topic_cluster'].loc[i] == 1:
            count_positive_polarity_supportive += 1

        elif dataset['vader_compound_score'].loc[i] < -0.35 and dataset['topic_cluster'].loc[i] == 1:
            count_negative_polarity_supportive += 1

        elif dataset['vader_compound_score'].loc[i] > 0.35 and dataset['topic_cluster'].loc[i] == 0:
            count_positive_polarity_unsupportive += 1

        elif dataset['vader_compound_score'].loc[i] < -0.35 and dataset['topic_cluster'].loc[i] == 0:
            count_negative_polarity_unsupportive += 1
            
    total = count_positive_polarity_supportive + count_negative_polarity_supportive + count_positive_polarity_unsupportive + count_negative_polarity_unsupportive
    
    prob_D = (count_positive_polarity_supportive + count_negative_polarity_supportive)/total
    prob_D_prime = (count_positive_polarity_unsupportive + count_negative_polarity_unsupportive)/total
    result = {'P(D)': prob_D, 'P(D_p)': prob_D_prime}

    try:
        prob_D_H = count_positive_polarity_supportive / (count_positive_polarity_supportive + count_positive_polarity_unsupportive)
        prob_D_Hprime = count_negative_polarity_supportive / (count_negative_polarity_supportive + count_negative_polarity_unsupportive)
        
        if prob_D_H/prob_D_Hprime > 1:
            final_result = 1 / (prob_D_H/prob_D_Hprime)
            
        else:
            final_result = prob_D_H/prob_D_Hprime

        # return prob_D_H, prob_D_Hprime, final_result
        return final_result

    except:
        prob_D_H  = 0
        prob_D_Hprime = 0

        # return prob_D_H, prob_D_Hprime, 1
        return 1

## Saving of results from sentiment analysis into the same dataframe

In [None]:
#df2['number_of_links'] = df2['link_title'].apply(lambda x: len(x))

# Polarity
df3['textblob_polarity'] = textblob_polarity
df3['vader_compound_score'] = vaderCompoundScores
df3['vader_polarity'] = df3['vader_compound_score'].apply(lambda x: polarityDetermination(x))

overall_polarity = []
overall_polarity_scores = {}

for i in range(len(textblob_polarity)):
  overall_polarity.append(definePolarity(textblob_polarity[i], vaderCompoundScores[i]))
  overall_polarity_scores[df3['id'][i]] = [textblob_polarity[i], vaderCompoundScores[i]]

df3['overall_polarity'] = overall_polarity

# Subjectivity
df3['model_subjectivity'] = model_subjectivity_result
df3['textblob_subjectivity'] = textblob_subjectivity

overall_subjectivity = []
overall_subjectivity_scores = {}

for i in range(len(model_subjectivity_result)):
  overall_subjectivity.append(defineSubjectivity(model_subjectivity_result[i], textblob_subjectivity[i]))
  overall_subjectivity_scores[df3['id'][i]] = [float(model_subjectivity_result[i]), textblob_subjectivity[i]]

df3['overall_subjectivity'] = overall_subjectivity

df3['vader_sentiment'] = vader_results
df3['topic_cluster'] = getClusters(pred_sentences, embedder)

df3['potential_bias'] = flagPotentialBias(df3)

#df2.to_csv('sentiment_result_kh.csv', index=False)
df3

In [None]:
print(calculateBias(df3))

## Results from sentiment analysis

In [None]:
df2[['comment', 'topic_cluster']]

In [None]:
df3[['id','comment','textblob_polarity','vader_compound_score','vader_polarity','model_subjectivity','textblob_subjectivity','topic_cluster','potential_bias']]

# Confirmation Bias Analysis

In [None]:
def traceConversation(dataframe, tree, node, printGraphOption = True):
    children_nodes_list = getAllChildNodes(tree, node, [])

    print('\n\n')
    new = search.find_by_attr(tree, node)
    
    if printGraphOption:
        printGraph(new)

    return dataframe[(dataframe['reply_to'].isin(children_nodes_list)) | (dataframe['id'].isin(children_nodes_list + [node]))], new

In [None]:
print(parent)

In [None]:
head_thread = parent #input('Enter a comment to look at the replies. ')
conversationDF, conversationTree = traceConversation(df2, root, head_thread, False)

In [None]:
print("About the tweet\n")

print("Existence of links:")
checkLink = False

for i in df['link_title']:
    if isinstance(i, list):
        print(i)
        checkLink = False

if checkLink:
    print("No links")

else:
  print("\nThe exact link in the comments:")
  for i in df['url']:
      if len(i) > 0:
          print(i)

In [None]:
print("Conversation Tree")
printGraph(root)

## Results of confirmation bias analysis

In [None]:
print("Confirmation Bias Score for Entire Conversation:", calculateBias(conversationDF))
print("Number of potentially bias comments:", len(conversationDF[conversationDF['potential_bias'] == 1]))

## List of potentially bias comments

In [None]:
for index, row in conversationDF[conversationDF['potential_bias'] == 1].iterrows():
    print(row['id'], row['comment'])