In [8]:

import numpy as np
import networkx as nx
import pandas as pd
import pickle

# data
df = pd.read_csv("df_normalizedcleanfinalscaled.csv")
opinion_cols1 = ["affrmact_norm1b", "gunlaw_norm1b", "partyid_norm1b"]
opinion_cols2 = ["affrmact_norm2", "gunlaw_norm2", "partyid_norm2"]
df_clean = df.dropna(subset=opinion_cols1+opinion_cols2)
A_initial = df_clean[opinion_cols1].values
A_final = df_clean[opinion_cols2].values
N = A_initial.shape[0]
lambda_val = 0.5

#graph using prob of similarity
prob_matrix = np.zeros((N, N))
for i in range(N):
    for j in range(i + 1, N):
        dist = np.linalg.norm(A_initial[i] - A_initial[j])**2
        prob_matrix[i, j] = np.exp(-dist / (4 * lambda_val))
        prob_matrix[j, i] = prob_matrix[i, j]

G = nx.Graph()
G.add_nodes_from(range(N))
rng = np.random.default_rng(42)
for i in range(N):
    for j in range(i + 1, N):
        if rng.random() < prob_matrix[i, j]:
            G.add_edge(i, j)

#saving
with open("fixed_graph.pkl", "wb") as f:
    pickle.dump(G, f)
#for verification
print(f"Graph saved with {G.number_of_nodes()} nodes and {G.number_of_edges()} edges and {np.mean([d for _, d in G.degree()])} average degree.")


Graph saved with 282 nodes and 14374 edges and 101.94326241134752 average degree.


In [9]:

#individual's yearid and opinion vector in the graph for access
year_ids = df_clean['yearid'].values
for i in range(N):
    G.add_node(i, yearid=year_ids[i], initial_opinion=A_initial[i])


In [10]:

#node-to-yearid mapping and initial opinions for for access
node_info = pd.DataFrame({
    "node_id": list(G.nodes),
    "yearid": [G.nodes[n]["yearid"] for n in G.nodes],
    "initial_affirm": [G.nodes[n]["initial_opinion"][0] for n in G.nodes],
    "initial_gunlaw": [G.nodes[n]["initial_opinion"][1] for n in G.nodes],
    "initial_party": [G.nodes[n]["initial_opinion"][2] for n in G.nodes],
})
node_info.to_csv("[DIRECTORY]/node_yearid_mapping.csv", index=False)

Saved node-to-yearid mapping to 'node_yearid_mapping.csv'
