# Simulation on the Ugandan village data (Section 4.2)

In [1]:
import csv
import numpy as np
from tqdm import tqdm

In [2]:
home_folder = './'

In [3]:
debug = lambda str : f"print(\"{str}\",\"=\",eval(\"{str}\"))"

## Load the dataset

In [4]:
uganVlgFolder = home_folder + 'ugandan-villages/'

In [5]:
def readData(typ = ['friend']):
    # typ: the type of ties to read
    
    g = [] # edge list
    fstRow = True
    n, e = 0, 0 # count of vertices and edges
    prev = -1
    
    mp = {} # map to convert ids to sequential numbers
    
    file = uganVlgFolder + f'ties.csv'
    
    with open(file, encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for r in tqdm(csv_reader):
            
            if fstRow: 
                fstRow = False
                continue
            
            # only read relvant ties
            if r[3] not in typ: continue
                
            r[1] = int(r[1], 10)
            r[2] = int(r[2], 10)
            
            # add nodes to the graph
            if r[1] not in mp: mp[r[1]] = n; n += 1; g.append([])
            if r[2] not in mp: mp[r[2]] = n; n += 1; g.append([])
            
            # add edge to edge list (note that we reverse the edge, 
            # so that it is easy to compute in-degrees later)
            g[mp[r[2]]].append(mp[r[1]])
            e += 1

        print(f'Tot edges: {e}, tot nodes: {n}.')
    
    # vertex descriptions
    v = [{'gender': -1,\
          'income': -1,\
          'leader': -1,\
          'heard': -1,\
          'adopted': -1,\
          'edu': -1,\
          'village': -1} for i in range(n)]
    
    fstRow = True
    
    file = uganVlgFolder + f'nodes.csv'
    with open(file, encoding='utf-8') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for r in tqdm(csv_reader):
            if fstRow: 
                fstRow = False
                continue
                
            r[1] = int(r[1], 10)
            
            if r[1] not in mp: ## nodes with no incominng edges
                mp[r[1]] = n; n += 1; v.append({'gender': -1,\
                                                'income': -1,\
                                                'leader': -1,\
                                                'heard': -1,\
                                                'adopted': -1,\
                                                'edu': -1,\
                                                'village': -1})
            
            # check for missing attributes
            gen = int(r[6],10) if r[6] != 'NA' else -1
            inc = int(r[7],10) if r[7] != 'NA' else -1
            adp = int(r[3],10) if r[3] != 'NA' else -1
            edu = int(r[8],10) if r[8] != 'NA' else -1
            
            v[mp[r[1]]] = {'gender': gen,\
                           'income': inc,\
                           'leader': int(r[15],10),\
                           'heard': int(r[2],10),\
                           'adopted': adp ,\
                           'edu': edu,\
                           'village': int(r[0],10)}
    
    # add empty edge lists for nodes with no incoming edges
    while len(g) < len(v): g.append([])
    print(len(v), len(g))
    
    return g,v

## Helper functions

In [6]:
def getOutcomeMeasure(g, v, netM, selecAtt='leader'):
    # selecTyp: selection attr
    # netM: a node level network metric
    # v is the set of vertices
    # g is the edge list
    
    # compute group sizes
    grpSz = np.array([0.0,0.0])
    for u in v: 
        if u['gender'] == 1: grpSz[1] += 1
        if u['gender'] == 0: grpSz[0] += 1
    
    # description: [male_cnt, female_cnt]
    cnt = np.array([0.0, 0.0]) # non-network outcome measure
    netCnt = np.array([0.0, 0.0]) # type1 outcome measure
    
    for i, u in enumerate(v):
        if selecAtt == -1 or u[selecAtt] == 1: 
            if u['gender'] == -1: continue
            cnt[ u['gender'] ] += 1
            netCnt[ u['gender'] ] += netM[i]
    
    print(f'Prop. without net: {cnt / np.sum(cnt)}.')
    print(f'Prop. with net: {netCnt / np.sum(netCnt)}.')
    print('')
    tmp = cnt[0]/(cnt[1]+1e-10)
    statRateNoNet = min(tmp, 1/tmp)
    tmp = netCnt[0]/(netCnt[1]+1e-10)
    statRateNet = min(tmp, 1/tmp)
    print(f'Stat rate without net: {statRateNoNet}.')
    print(f'Stat rate with net: {statRateNet}.')

In [7]:
# load the dataset
g,v = readData(['solver']) #,'geo','friend', 'family', 'lender', 'Contgame'])

# compute in-degrees
deg = [len(u) for u in g]

383674it [00:00, 668244.65it/s]
3185it [00:00, 156081.49it/s]

Tot edges: 7352, tot nodes: 3322.
3396 3396





In [8]:
getOutcomeMeasure(g,v,deg,'leader')

Prop. without net: [0.59447005 0.40552995].
Prop. with net: [0.9578973 0.0421027].

Stat rate without net: 0.6821705426360465.
Stat rate with net: 0.04395325203254573.
