In [None]:
import json

data_path = 'data/coaid/userRelation.json'

data = []
with open(data_path, 'r') as f:
    for x in f:
        x = json.loads(x)
        data.append(x)

In [None]:
# get all user ids
users = dict()

for d in data:
    users[str(d['user_id'])] = True

print('number of users = {}'.format(len(users)))

In [None]:
predicted_label = {}
with open('output/fake-news-classifier-v11-dev.json1', 'r') as f:
    for x in f:
        x = json.loads(x)
        predicted_label[x['user_id']] = int(x['predicted_label'])

In [None]:
cleaned_data = []
for step, d in enumerate(data):
    replies = []
    for x in d['replied_to']:
        num = None
        if isinstance(x, list):
            if len(x) > 0:
                for y in x:
                    if str(y) != str(d['user_id']) and str(y) in users:
                        replies.append(str(y))
        elif str(x) != str(d['user_id']) and str(x) in users:
            replies.append(str(x))
    mentions = []
    for x in d['mentioned_users']:
        for y in x:
            if str(y) != str(d['user_id']) and str(y) in users:
                mentions.append(str(y))
                
    replies = list(set(replies))
    mentions = list(set(mentions))
    
    cleaned_data.append({
        'user_id': str(d['user_id']),
        'replies': replies,
        'mentions': mentions,
        'label': d['user_label'],
    })

In [None]:
import numpy as np

class DisseminationGraph:
    def __init__(self, data, prob1=0.6, prob2=0.4, threshold=0.5):
        # get all users
        self.users = [d['user_id'] for d in data]
        
        self.label = {}
        for d in data:
            self.label[d['user_id']] = int(d['label'])
            
        
        self.edges = {}
        for u in users:
            self.edges[u] = []
        for d in data:
            for u in d['replies']:
                self.edges[u].append(d['user_id']) # message propagated from the replier to the current userid
            for u in d['mentions']:
                self.edges[d['user_id']].append(u)
        
        for u in self.edges:
            self.edges[u] = list(set(self.edges[u]))
            
        probs1 = np.random.normal(prob1, 0.2, len(self.users)) # probs where the user does not have any doubt on the tweet
        self.probs1 = {}
        for u, p1 in zip(self.users, probs1):
            self.probs1[u] = p1
        
#         probs2 = np.random.normal(prob2, 0.2, len(self.users))# probs where the user ahs any doubt on the tweet
        probs2 = [0] * len(self.users)
        self.probs2 = {}
        for u, p2 in zip(self.users, probs2):
            self.probs2[u] = p2
            
        self.threshold = threshold
        
    def _check(self, next_user_id, prev_user_id, type=0):
        if type == 0: # if not aware with the user label
            return self.probs1[next_user_id] >= self.threshold
        else: # if aware with the user label
            if self.label[next_user_id] == 1:
                return self.probs1[next_user_id] >= self.threshold

            if self.label[next_user_id] == 0 and self.label[prev_user_id] == 1: # checker get message from spreader
                return self.probs2[next_user_id] >= self.threshold

            if self.label[next_user_id] == 0 and self.label[prev_user_id] == 0: # checker get message from another checker
                return self.probs1[next_user_id] >= self.threshold
        
            
    def propagate(self, start_user_id, type=0):
        flag = {}
        
        if type == 1 and self.label[start_user_id] == 0 and self.probs2[start_user_id] < self.threshold:
            return 0.0
        
        q = [(start_user_id)]
        flag[start_user_id] = True
        
        while len(q) > 0:
            user_id= q.pop()
            
            for next_user in self.edges[user_id]:
                check = self._check(next_user, user_id, type)
                if check and (next_user not in flag or flag[next_user] == False):
                    flag[next_user] = True
                    q.append((next_user))
                elif not check and next_user not in flag:
                    flag[next_user] = False
                    q.append((next_user))
                    
        cnt = 0
        cnt_tot = 0
        for u in flag:
            cnt_tot += 1
            if flag[u]:
                cnt += 1
        return cnt / cnt_tot
    
    def run(self):
        ids = np.random.randint(low=0, high=len(self.users), size=100)
        without_label = 0.0
        with_label = 0.0
        
        for step, idx in enumerate(ids):
            without_label += self.propagate(self.users[idx], 0)
            with_label += self.propagate(self.users[idx], 1)
        
        return without_label/100, with_label/100
            
        
g = DisseminationGraph(cleaned_data)
g.run()