In [2]:
import json

data_path = 'data/coaid/userRelation.json'

data = []
with open(data_path, 'r') as f:
    for x in f:
        x = json.loads(x)
        data.append(x)

In [40]:
# get all user ids
users = dict()

for d in data:
    users[str(d['user_id'])] = True

print('number of users = {}'.format(len(users)))

number of users = 96099


In [187]:
predicted_label = {}
with open('output/fake-news-classifier-v11-dev.json1', 'r') as f:
    for x in f:
        x = json.loads(x)
        predicted_label[x['user_id']] = int(x['predicted_label'])

In [192]:
cleaned_data = []
for step, d in enumerate(data):
    replies = []
    for x in d['replied_to']:
        num = None
        if isinstance(x, list):
            if len(x) > 0:
                for y in x:
                    if str(y) != str(d['user_id']) and str(y) in users:
                        replies.append(str(y))
        elif str(x) != str(d['user_id']) and str(x) in users:
            replies.append(str(x))
    mentions = []
    for x in d['mentioned_users']:
        for y in x:
            if str(y) != str(d['user_id']) and str(y) in users:
                mentions.append(str(y))
                
    replies = list(set(replies))
    mentions = list(set(mentions))
    
    cleaned_data.append({
        'user_id': str(d['user_id']),
        'replies': replies,
        'mentions': mentions,
        'label': d['user_label'],
    })

In [193]:
import numpy as np

class DisseminationGraph:
    def __init__(self, data, prob1=0.6, prob2=0.4, threshold=0.5):
        # get all users
        self.users = [d['user_id'] for d in data]
        
        self.label = {}
        for d in data:
            self.label[d['user_id']] = int(d['label'])
            
        
        self.edges = {}
        for u in users:
            self.edges[u] = []
        for d in data:
            for u in d['replies']:
                self.edges[u].append(d['user_id']) # message propagated from the replier to the current userid
            for u in d['mentions']:
                self.edges[d['user_id']].append(u)
        
        for u in self.edges:
            self.edges[u] = list(set(self.edges[u]))
            
        probs1 = np.random.normal(prob1, 0.2, len(self.users)) # probs where the user does not have any doubt on the tweet
        self.probs1 = {}
        for u, p1 in zip(self.users, probs1):
            self.probs1[u] = p1
        
#         probs2 = np.random.normal(prob2, 0.2, len(self.users))# probs where the user ahs any doubt on the tweet
        probs2 = [0] * len(self.users)
        self.probs2 = {}
        for u, p2 in zip(self.users, probs2):
            self.probs2[u] = p2
            
        self.threshold = threshold
        
    def _check(self, next_user_id, prev_user_id, type=0):
        if type == 0: # if not aware with the user label
            return self.probs1[next_user_id] >= self.threshold
        else: # if aware with the user label
            if self.label[next_user_id] == 1:
                return self.probs1[next_user_id] >= self.threshold

            if self.label[next_user_id] == 0 and self.label[prev_user_id] == 1: # checker get message from spreader
                return self.probs2[next_user_id] >= self.threshold

            if self.label[next_user_id] == 0 and self.label[prev_user_id] == 0: # checker get message from another checker
                return self.probs1[next_user_id] >= self.threshold
        
            
    def propagate(self, start_user_id, type=0):
        flag = {}
        
        if type == 1 and self.label[start_user_id] == 0 and self.probs2[start_user_id] < self.threshold:
            return 0.0
        
        q = [(start_user_id)]
        flag[start_user_id] = True
        
        while len(q) > 0:
            user_id= q.pop()
            
            for next_user in self.edges[user_id]:
                check = self._check(next_user, user_id, type)
                if check and (next_user not in flag or flag[next_user] == False):
                    flag[next_user] = True
                    q.append((next_user))
                elif not check and next_user not in flag:
                    flag[next_user] = False
                    q.append((next_user))
                    
        cnt = 0
        cnt_tot = 0
        for u in flag:
            cnt_tot += 1
            if flag[u]:
                cnt += 1
        return cnt / cnt_tot
    
    def run(self):
        ids = np.random.randint(low=0, high=len(self.users), size=100)
        without_label = 0.0
        with_label = 0.0
        
        for step, idx in enumerate(ids):
            without_label += self.propagate(self.users[idx], 0)
            with_label += self.propagate(self.users[idx], 1)
            print(without_label/(step+1), with_label/(step+1))
        
        
        return without_label, with_label
            
        
g = DisseminationGraph(cleaned_data)
g.run()
# for i in range(0, 100):
#     print(g.propagate(g.users[i], 0), g.propagate(g.users[i], 1), g.label[g.users[i]])

0.6927483121213622 0.0
0.6927400584360917 0.0
0.6927373072076682 0.0
0.6927359315934565 0.16515624226217007
0.6927384076990377 0.13212499380973605
0.7439486730825314 0.11010416150811338
0.7805274340707412 0.0943749955783829
0.7695529804057512 0.16515624226217007
0.7951582048051121 0.14680554867748452
0.815642384324601 0.13212499380973605
0.8044686952724391 0.12011363073612369
0.7951586633431827 0.1651576178763818
0.810915689239861 0.15245318573204475
0.8244217114370137 0.21299224389404156
0.8361269306745461 0.19879276096777213
0.846368997507387 0.24886821340728638
0.8373315155805302 0.23422890673626953
0.8463686536038341 0.22121618969536566
0.8382833724731776 0.24434383636504228
0.8310057940870598 0.2651578929992242
0.8244213184043817 0.2525313266659278
0.818435661970412 0.2710812923240944
0.8263297636238723 0.3027734100491338
0.8207631820041619 0.31768389167411487
0.8156419269140283 0.33140153476909745
0.8109146145231356 0.3440639745490814
0.8065380848045515 0.3313208643805969
0.81344

(80.94966644511695, 36.80078935123206)

In [97]:


print(g.edges[g.users[3]])

90577989


AttributeError: 'list' object has no attribute 'push'

In [146]:
import numpy as np
np.random.normal(0.6, 0.2, 10)

KeyError: '73476'

In [150]:
for d in cleaned_data:
    if d['user_id'] == '73476' or d['user_id'] ==73476:
        print('asd')
    for u in d['replies']:
        if u == 73476 or u == '73476':
            print(d)
    for u in d['mentions']:
        if u == 73476 or u == '73476':
            print(d)