# Imports and Data

In [9]:
import pandas as pd
import numpy as np

# Source data
source = {}
for c in ['uk', 'it']:
    source[c] = pd.read_csv(f"source{c.upper()}.csv", sep="\t", parse_dates=['time'])
# Transient pairs of ego-alter
pairs = {}
for c in ['uk', 'it']:
    pairs[c] = pd.read_csv(f"transients{c.upper()}.csv", sep="\t")

## Custom histogram function

In [19]:
def histogram(array, bins, log=False, countZero=False):
    xl = sorted(list(array))
    xo = xl[0]
    xf = xl[-1]
    if log:
        lmu = np.log10(xf / xo) / bins
        mu = 10**lmu
    dx = (xf - xo) / bins
    h = {}
    if log:
        for x in xl:
            if x == xf:
                h[bins - 1] = h.get(bins - 1, 0) + 1
            else:
                i = np.log10(x / xo) // lmu
                h[i] = h.get(i, 0) + 1
    else:
        for x in xl:
            if x == xf:
                h[bins - 1] = h.get(bins - 1, 0) + 1
            else:
                i = int((x - xo) // dx)
                h[i] = h.get(i, 0) + 1
    df = pd.DataFrame.from_dict(h, orient='index', columns=['h'])
    if countZero:
        df = df.reindex(range(bins), fill_value=0)
    df['pmf'] = df['h'].div(sum(df['h']))
    df['pdf'] = df['pmf'] / dx
    for i in df.index:
        if log:
            df.at[i, 'label'] = xo*(mu**i)
        else:
            df.at[i, 'label'] = xo + (dx * (i + 0.5))
    return df

# Fig. 1

In [15]:
# C_o classification by ego
stbyego = {}
for c in pairs.keys():
    stbyego[c] = {'stW': [], 'stE': []}
    df = pairs[c].copy()
    for ego in df['ego'].unique():
        df1 = df[df['ego'] == ego].copy()
        stW = df1[df1['stE'] == 0]
        stbyego[c]['stW'].append(len(stW)/len(df1))
        stE = df1[df1['stE'] == 1]
        stbyego[c]['stE'].append(len(stE)/len(df1))

In [18]:
# Average daily new alter
fcc1 = {}
for c in ['uk', 'it']:
    df = source[c].copy()
    df = df[df['pair'].isin(pairs[c]['pair'])]
    fcc1[c] = pd.DataFrame()
    for ego in df['ego'].unique():
        df1 = df[df['ego'] == ego]
        start = np.datetime64(df1['time'].min(), 'D')
        end = np.datetime64(df1['time'].max(), 'D')
        df1W = df1[df1['DiW'].isin(range(5))]
        if len(df1W) > 0:
            numtiesW = len(df1W[df1W['a'] == 0]['alter'].unique())
            numdaysW = np.busday_count(start, end)
            fcc1[c].at[ego, 'W'] = numtiesW / numdaysW
        else:
            fcc1[c].at[ego, 'W'] = 0
        df1E = df1[df1['DiW'].isin(range(5,7))]
        if len(df1E) > 0:
            numtiesE = len(df1E[df1E['a'] == 0]['alter'].unique())
            numdaysE = np.busday_count(start, end, weekmask='0000011')
            fcc1[c].at[ego, 'E'] = numtiesE / numdaysE
        else:
            fcc1[c].at[ego, 'E'] = 0

  fcc1[c].at[ego, 'W'] = numtiesW / numdaysW


In [14]:
# C_f classification by ego
byego = {}
for c in pairs.keys():
    byego[c] = {'W': [], 'E': []}
    df = pairs[c].copy()
    for ego in df['ego'].unique():
        df1 = df[df['ego'] == ego].copy()
        W = df1[df1['E'] == 0]
        byego[c]['W'].append(len(W)/len(df1))
        E = df1[df1['E'] == 1]
        byego[c]['E'].append(len(E)/len(df1))

# Fig 2

In [20]:
# Lifetime distributions
lftdist = {}
for c in ['uk', 'it']:
    lftdist[c] = {}
    df = pairs[c].copy()
    for i in range(2):
        dfW = df[df[['stE', 'E'][i]] == 0]
        HW = histogram(dfW['ell'], 10)
        dfE = df[df[['stE', 'E'][i]] == 1]
        HE = histogram(dfE['ell'], 10)
        lftdist[c][['i', 'f'][i]] = {'W': HW, 'E': HE}

# Fig. 3

In [22]:
def settle_a(tiesdata, callslabel):
    res = pd.DataFrame()
    calls = source[callslabel].copy()
    calls['DiW'] = calls['time'].map(lambda i: i.day_of_week)
    pairs = tiesdata.copy()
    for tie in pairs.index:
        df = calls.loc[calls['pair'] == tie]
        ell = pairs['ell'][tie]
        weekend = pairs['E'][tie]
        date = np.datetime64(df.loc[df['a'] == 0].iloc[0]['time'], 'D')
        if np.is_busday(date):
            Wcount = 1
            Ecount = 0
        else:
            Wcount = 0
            Ecount = 1
        if pairs['stE'][tie] != weekend:
            if (pairs['stE'][tie] == 0) and (weekend == 1):
                res.at[tie, 'WE'] = 1
                res.at[tie, 'EW'] = 0
            elif (pairs['stE'][tie] == 1) and (weekend == 0):
                res.at[tie, 'WE'] = 0
                res.at[tie, 'EW'] = 1
            for a in range(ell):
                df1 = df.loc[df['a'] <= a]
                if Wcount > 0:
                    dW = df1.loc[df1['DiW'].isin(range(5))][['duration']].sum().iloc[0] / Wcount + 0.0001
                else:
                    dW = 0.0001
                if Ecount > 0:
                    dE = df1.loc[df1['DiW'].isin(range(5,7))][['duration']].sum().iloc[0] / Ecount + 0.0001
                else:
                    dE = 0.0001
                ratio = dE / dW
                if (ratio > 1) and (weekend):
                    res.at[tie, 'a'] = a
                    res.at[tie, 'ellp'] = a / ell
                    break
                elif (ratio < 1) and (not weekend):
                    res.at[tie, 'a'] = a
                    res.at[tie, 'ellp'] = a / ell
                    break
                date += 1
                if np.is_busday(date):
                    Wcount += 1
                else:
                    Ecount += 1
    return res

In [None]:
settling = {}
for c in ['uk', 'it']:
    settling[c] = settle_a(pairs[c]['all'], {'uk': 'ukc', 'it': 'it'}[c])

# Fig. 4

In [None]:
from sklearn.metrics import mutual_info_score as MI

In [None]:
# WARNING: This loop takes about 3 hours to run
Ibootk = {'uk': {'i': [], 'f': []}, 'it': {'i': [], 'f': []}}
for c in ['uk', 'it']:
    df = pairs[c].copy()
    Ibootk[c]['i'].append(MI(df['stE'], df['ell']) * np.log2(np.e))
    Ibootk[c]['f'].append(MI(df['E'], df['ell']) * np.log2(np.e))
    for rep in range(5000):
        df1 = df.sample(frac=500, replace=True, ignore_index=True)
        Ibootk[c]['i'].append(MI(df1['stE'], df1['ell']) * np.log2(np.e))
        Ibootk[c]['f'].append(MI(df1['E'], df1['ell']) * np.log2(np.e))