# load packages

In [1]:
from pulp import *
import pandas as pd
import numpy as np
import os

# load data

In [2]:
pol_df = pd.read_csv('../data/pol_agg.csv')

In [3]:
node_list = list(set(pol_df['Source']))

In [4]:
len(node_list) #check num of nodes

166

In [5]:
pol_df = pol_df[pol_df['Source']!=pol_df['website']]

In [6]:
pol_df

Unnamed: 0,Source,website,page_url,TRUE,FALSE
1,www.facebook.com,www.politifact.com,22,0,22
2,nationalreport.net,www.whitehouse.gov,14,0,14
6,www.naturalnews.com,www.cdc.gov,8,0,8
7,www.facebook.com,www.snopes.com,8,1,7
9,www.infowars.com,www.cdc.gov,6,0,6
10,twitter.com,www.politifact.com,6,6,0
12,www.facebook.com,www.nytimes.com,6,0,6
13,dailycaller.com,apps.who.int,5,0,5
15,americannews.com,www.politifact.com,5,0,5
16,www.facebook.com,www.youtube.com,5,0,5


In [7]:
pol_df.shape #check num of links

(1286, 5)

In [8]:
pol_df.columns

Index(['Source', 'website', 'page_url', 'TRUE', 'FALSE'], dtype='object')

# parse nodes / links information 

In [9]:
capacityNodes =  {}
capacityLink = {}
costTrueLink = {}
costFakeLink = {}
costTrueNode = {}
costFakeNode = {}

In [10]:
idxSource = 0
idxDestination = 1
idxTotal = 2
idxTrue = 3
idxFake = 4

In [11]:
nodes_map = {}

In [12]:
for i in range(pol_df.shape[0]):
    this_source = pol_df.iloc[i,idxSource]
    this_destinaiton = pol_df.iloc[i,idxDestination]
    this_total = pol_df.iloc[i,idxTotal]
    this_true = pol_df.iloc[i,idxTrue]
    this_fake = pol_df.iloc[i,idxFake]
    
    this_link_id = (this_source,this_destinaiton)
    
    if this_source in nodes_map:
        nodes_map[this_source].append(this_link_id)
    else:
        nodes_map[this_source] = [this_link_id]
    
    capacityLink[this_link_id] = this_total
    costTrueLink[this_link_id] = float(this_true)/float(this_total)
    costFakeLink[this_link_id] = float(this_fake)/float(this_total)
    
    if this_source in capacityNodes:
        capacityNodes[this_source] += this_total
        costTrueNode[this_source] += this_true
        costFakeNode[this_source] += this_fake
    else:
        capacityNodes[this_source] = this_total
        costTrueNode[this_source] = this_true
        costFakeNode[this_source] = this_fake

In [13]:
for x in node_list:
    costTrueNode[x] = float(costTrueNode[x]) / float(capacityNodes[x])
    costFakeNode[x] = float(costFakeNode[x]) / float(capacityNodes[x])

# set up symbolic model: link based

In [14]:
link_list = list(capacityLink.keys())

In [18]:
def linkbasedNetworkOpt(lambda_True,maxNewsControl):
    #problem
    probLink = LpProblem('LinkBased_NewsNetwork',LpMinimize)
    
    #variables
    linkProbLink = LpVariable.dicts('link',link_list,0)
    nodeProbLink = LpVariable.dicts('node',node_list,0)
    controlProbLink = LpVariable('news control',0)
    
    #objective
    probLink += lpSum([ linkProbLink[x]*costFakeLink[x] for x in link_list ]) - lpSum([ lambda_True*linkProbLink[x]*costTrueLink[x] for x in link_list]) #+ lambdaNewsControl*controlProbLink
    
    #link constraints
    for x in link_list:
        probLink += linkProbLink[x]<=capacityLink[x]
        
    #node constraints
    for x in node_list:
        probLink += nodeProbLink[x]<=capacityNodes[x]
        
    #sum link = nodes flow
    for x in node_list:
        probLink += lpSum([ linkProbLink[y] for y in nodes_map[x]]) == nodeProbLink[x]
        
    #calculate news control
    probLink += lpSum([ capacityNodes[x] - nodeProbLink[x] for x in node_list]) == controlProbLink
    probLink += controlProbLink <= maxNewsControl
    
    probLink.solve()
    
    return linkProbLink,nodeProbLink,controlProbLink,probLink

# run optimization and print the result

In [19]:
lambdaTrue=0
maxNewsControl=300

In [20]:
aa,bb,cc,dd=linkbasedNetworkOpt(lambdaTrue,maxNewsControl)

In [21]:
cc.varValue

300.0

In [22]:
bb[node_list[0]].varValue

2.0

# compare optimization result with network structures