## First create the time_id dict

In [None]:
import pandas as pd
from datetime import date
from dateutil.relativedelta import relativedelta
from tqdm import tqdm


def half_year_windows(start: date, end: date):
    cur = start
    while cur <= end:
        nxt = cur + relativedelta(months=6)   # step by 6 months [web:277]
        yield cur, nxt
        cur = nxt
lo = date(1350, 1, 1)
hi = date(1849, 12, 31)

time_id = {}
time_dict = {}
for t_count, (p_start, p_end) in enumerate(half_year_windows(lo,hi)): 
    time_id[t_count] = p_start
    time_dict[p_start] = t_count

import pickle
with open("time_id_final.pkl", 'wb') as f:
    pickle.dump(time_id, f)

In [None]:
## helper fucntion for creating adj matrices
def adj_from_groups(group_tuples, node_ids, weighted=False, normalize_group=False):
    """Build sparse adjacency from group co-membership (clique projection)."""

    idx = {pid: i for i, pid in enumerate(node_ids)}
    inv_idx = {i: pid for pid, i in idx.items()}

    rows, cols, data = [], [], []
    acc = {}

    for g in group_tuples:
        g = [pid for pid in g if pid in idx]
        k = len(g)
        if k < 2:
            continue

        w = 1.0 / (k - 1) if normalize_group else 1.0

        from itertools import combinations
        for a, b in combinations(g, 2):
            ia, ib = idx[a], idx[b]
            key = (ia, ib) if ia < ib else (ib, ia)
            acc[key] = acc.get(key, 0.0) + w

    for (ia, ib), w in acc.items():
        if not weighted:
            w = 1.0
        rows += [ia, ib]
        cols += [ib, ia]
        data += [w, w]

    from scipy.sparse import coo_matrix
    n = len(node_ids)
    W = coo_matrix((data, (rows, cols)), shape=(n, n)).tocsr()
    W.setdiag(0)
    W.eliminate_zeros()

    return W, idx, inv_idx



## load the CAC events and construct layer 1

In [None]:
df_events = pd.read_pickle("CAC_matched.pkl")
df_names = pd.read_parquet("authority_file_cac_alma.parquet")
df_names_cac = df_names.dropna(subset='cac_id')
cac_id_dict = dict(zip(df_names_cac.cac_id.to_list(), df_names_cac.final_id))

In [None]:
output_dict = {}  # out[institution][window_start] = set(person_ids)


In [None]:

start_col="date_start"
end_col="date_end"
inst_col="event_place_parent_id"
person_col="person_id"

d = df_events.copy()


d[start_col] = d[start_col].apply(lambda x:x.date())
d[end_col] = d[end_col].apply(lambda x:x.date())



# half-year window starts: Jan/Jul style (6MS = 6-month starts)
win_starts = half_year_windows(d[start_col].min(), d[end_col].max() )

for inst, g in d.groupby(inst_col, sort=False):
    for ws, we in win_starts:
        # interval overlap test: [s,e] overlaps [ws,we] iff s <= we and e >= ws
        m = (g[start_col] <= we) & (g[end_col] >= ws)
        cur_people = tuple(set(g.loc[m, person_col]))
        if len(cur_people) > 1:
            cur_people = list(map(lambda x: cac_id_dict[x], cur_people))
            output_dict.setdefault(ws, [])
            output_dict[ws] += [cur_people]

In [None]:
matrices_layer1 = {}
for k, v in output_dict.items():
    t_count = time_dict[k]
    node_ids = sorted(set(pid for grp in v for pid in grp))
    matrix, list_indexes, _ = adj_from_groups(v, node_ids, weighted=False)
    matrices_layer1[t_count] = {
        "time" : k, 
        "matrix" : matrix,
        "ids_pos_mat" :list_indexes
    }

## Load bibliographic data and create layer2

In [None]:
alma_df = pd.read_pickle("ALMA_matched.pkl")
df_combined = alma_df[alma_df["all_names_final_id"].apply(lambda x: len(x) > 1)]

In [None]:
positions = []
matrices_layer2 = {}
ids = []
for t_count, (p_start, p_end) in enumerate(tqdm(half_year_windows(lo,hi))):
    current_match = df_combined[df_combined.apply(lambda row: p_start <= row.date_end and row.date_start <= p_end, axis=1)]['all_names_final_id'].to_list()
    if len(current_match)> 0:
        node_ids = sorted(set(pid for grp in current_match for pid in grp))
        ids.append(node_ids)
        matrix, list_indexes, _ = adj_from_groups(current_match, node_ids, weighted=False)
        matrices_layer2[t_count] = {
            "time" : p_start, 
            "matrix" : matrix,
            "ids_pos_mat" :list_indexes
        }

            


    


## Unite layers and export CHExNet

In [None]:
chexnet = {"layer_1":matrices_layer1, "layer_2":matrices_layer2}

# save
with open("CHExNet.pkl", "wb") as f:
    pickle.dump(chexnet, f)