In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import json
import openai
import random
import os
import collections
import copy

with open('params.json') as f:
    params = json.load(f)

openai.api_key = params['OPENAI_API_KEY']
openai.organization = params['OPENAI_ORG']

def get_response(prompt, model='gpt-3.5-turbo', temperature=0.9):
    result = openai.ChatCompletion.create(
    model=model,
    temperature=temperature,
    messages=[
            {"role": "system", "content": "You are mimicking a real-life person who wants to make friends."},
            {"role": "user", "content": prompt},
    ])

    return result.choices[0]['message']['content']

def network_growth(n0, temperature, method='llm'):
    G = nx.stochastic_block_model([n0 // 2, n0 // 2], [[0.5, 0.1], [0.1, 0.5]], seed=0)

    Gs = []
    results = []

    for t in G.nodes():

        if method == 'llm':
            result = select_neighbor(G, t, temperature)
            if result:
                v = result['name']
                G.add_edge(t, v)
                results.append(result)
        elif method == 'random':
            v = random.choice(list(set(G.nodes() - set(G.neighbors(t)))))
            G.add_edge(t, v)
            results.append({'name' : v, 'common_friends' : list(set(G.neighbors(v)) & set(G.neighbors(t))), 'reason' : 'random'})
        elif method == 'winner':
            v = None,
            max_common_friends = 0
            for u in G.nodes():
                if u not in G.neighbors(t) and u != t and len(set(G.neighbors(u)) & set(G.neighbors(t))) > max_common_friends:
                    v = u
                    max_common_friends = len(set(G.neighbors(u)) & set(G.neighbors(t)))

            G.add_edge(t, v)
            results.append({'name' : v, 'common_friends' : list(set(G.neighbors(v)) & set(G.neighbors(t))), 'reason' : 'winner'})

        Gs.append(G.copy())

    return Gs, results

def select_neighbor(G, t, temperature):
    common_friends = []
    for v in G.nodes():
        if v != t and v not in G.neighbors(t):
            common_friends.append({'name' : v, 'common_friends' : list(set(G.neighbors(v)) & set(G.neighbors(t)))})

    prompt = f"""
    # Task
    Your task is to select a person to be friends with.

    # Input
    The input is a list of dictionaries. Each dictionary has two keys: 'name' and 'common friends'.
    'name' is the name of the person and 'number_of_common_friends' are the common friends you have with the other person.
    The data is given below after chevrons:
    <DEGREES>
    {json.dumps(common_friends, indent=4)}
    </DEGREES>

    # Output
    The output should be given in JSON format with the following structure

    {{
        "name" : name of the person you selected,
        "common_friends" : the common friends of the person you selected,
        "reason" : reason for selecting the person
    }}

    # Notes
    - The output must be a valid JSON object.

    ```json
    """   

    for i in range(10):
        try:
            result = json.loads(get_response(prompt, temperature=temperature).split('```')[0])
            if result['name'] in G.nodes():    
                print('COMMON FRIENDS', common_friends)
                print('NEW EDGE', result)
                return result 
        except Exception as e:
            print(e)
      

def run_network_formation_experiment(n_min, n_max, n_step, num_simulations, outfile, temperatures, method):
    saved_scenarios = set()

    if os.path.exists(outfile):
        with open(outfile) as f:
            lines = f.read().splitlines()

            for line in lines:
                scenario = json.loads(line)
                saved_scenarios.add((scenario['n'], scenario['simulation'], scenario['temperature']))

        exit()

    f = open(outfile, 'a+')


    for n in range(n_min, n_max + 1, n_step):
        for i in range(num_simulations):
            for temperature in temperatures:
                if (n, i, temperature) in saved_scenarios:
                    print(f'Skipping simulation for n={n}, i={i}, temperature={temperature}')
                    continue
                else:
                    print(f'Running simulation for n={n}, i={i}, temperature={temperature}')

                    Gs, reasons = network_growth(n, temperature=temperature, method=method)

                    temp = {
                        'n' : n,
                        'temperature' : temperature,
                        'simulation' : i,
                        'graphs' : [nx.to_dict_of_lists(G) for G in Gs],
                        'reasons' : reasons
                    }    

                    f.write(json.dumps(temp) + '\n')            

                if method != 'llm':
                    break

    f.close()

def draw_graph(G, ax, G0=None):
    pos = nx.spring_layout(G)
    group_1 = [n for n in G.nodes() if n < len(G.nodes()) // 2]

    node_color = ['red' if n in group_1 else 'blue' for n in G.nodes()]

    if not G0:
        nx.draw(G, pos, ax=ax, node_size=10, width=0.1, node_color=node_color, alpha=0.7, edge_color='black')
    else:
        G0_edges = set(G0.edges())
        G_edges = set(G.edges()) - G0_edges

        nx.draw_networkx_edges(G, pos, edgelist=G0_edges, width=0.1, alpha=0.5, edge_color='black', ax=ax)
        nx.draw_networkx_edges(G, pos, edgelist=G_edges, width=0.5, alpha=1, edge_color='red', ax=ax)

        nx.draw_networkx_nodes(G, pos, nodelist=list(G.nodes()), node_size=10, node_color=node_color, alpha=0.7, ax=ax)

        ax.set_axis_off()

def analyze_experiments(filename):

    with open(filename) as f:
        lines = f.read().splitlines()

    data = []

    for line in lines:
        data.append(json.loads(line))


    clustering_coefficients = collections.defaultdict(list)
    algebraic_connectivities = collections.defaultdict(list)

    for d in data:
        Gs = []
        for graph in d['graphs']:
            G = nx.Graph()

            for k, v in graph.items():
                k = int(k)
                G.add_node(k)
                for n in v:
                    G.add_edge(k, n)

            Gs.append(G)

        fig, ax = plt.subplots(1, 3, figsize=(15, 5))

        fig.suptitle(f'Graph created based on Principle 2 with $n = {d["n"]}$, temperature={d["temperature"]}')

        for i, t in enumerate([0, len(Gs) - 1]):
            G = Gs[t]
            ax[i].set_title(f'$t = {t}$')
            draw_graph(G, ax=ax[i], G0=Gs[0])

            # print(d['reasons'])

        clustering_coefficient = [nx.average_clustering(G) for G in Gs]

        algebraic_connectivity = [nx.algebraic_connectivity(G) for G in Gs]

        ax[-1].set_title('Metrics')
        ax[-1].plot(clustering_coefficient, label='Clustering Coefficient', color='r')

        ax_y = ax[-1].twinx()

        ax_y.plot(algebraic_connectivity, label='Algebraic Connectivity', color='b')
        ax[-1].set_xlabel('t')
        ax[-1].set_ylabel('Average Clustering Coefficient', color='r')
        ax_y.set_ylabel('Algebraic Connectivity', color='b')

        clustering_coefficients[d['n'], d['temperature']].append(clustering_coefficient)
        algebraic_connectivities[d['n'], d['temperature']].append(algebraic_connectivity)

        fig.tight_layout()
        fig.savefig(f'figures/principle_2/principle_2_{d["n"]}_{d["simulation"]}_{d["temperature"]}.png')

    fig, ax = plt.subplots(1, len(clustering_coefficients), figsize=(5 * len(clustering_coefficients), 5), squeeze=False)

    ax_x = []

    for i in range(len(clustering_coefficients)):
        ax_x.append(ax[0, i].twinx())


    for i, k in enumerate(sorted(clustering_coefficients.keys())):
        v = clustering_coefficients[k]
        v = np.array(v)

        mean = v.mean(axis=0)
        std = v.std(axis=0)

        ci = 1.96 * std / np.sqrt(len(v))

        ax[0, i].plot(mean, color='r', label='LLM')
        ax[0, i].fill_between(np.arange(len(mean)), mean - ci, mean + ci, alpha=0.2, color='r')

        ax[0, i].set_title(f'$n = {k[0]}$, temperature = {k[1]}')

        ax[0, i].set_xlabel('t')
        ax[0, i].set_ylabel('Average Clustering Coefficient', color='r')

        ax[0, i].set_ylim(0.35, 0.5)

        ax[0, i].set_xlim(0, len(mean) - 1)


    for i, (k, v) in enumerate(algebraic_connectivities.items()):
        v = np.array(v)

        mean = v.mean(axis=0)
        std = v.std(axis=0)

        ci = 1.96 * std / np.sqrt(len(v))


        ax_x[i].plot(mean, color='b', label='LLM')
        ax_x[i].fill_between(np.arange(len(mean)), mean - ci, mean + ci, alpha=0.2, color='b')
        ax_x[i].set_ylabel('Algebraic Connectivity', color='b')
        ax_x[i].set_ylim(3.5, 6)

        ax_x[i].set_xlim(0, len(mean) - 1)


    # Null models
    clustering_coefficients_null = { 'random' : collections.defaultdict(list), 'winner' : collections.defaultdict(list) }
    algebraic_connectivities_null = { 'random' : collections.defaultdict(list), 'winner' : collections.defaultdict(list) }

    for d in data:
        for method in ['random', 'winner']:
            Gs, _ = network_growth(d['n'], d['temperature'], method=method)

            clustering_coefficient = [nx.average_clustering(G) for G in Gs]

            clustering_coefficients_null[method][d['n'], d['temperature']].append(clustering_coefficient)

            algebraic_connectivity = [nx.algebraic_connectivity(G) for G in Gs]

            algebraic_connectivities_null[method][d['n'], d['temperature']].append(algebraic_connectivity)

    for method in ['random', 'winner']:
        for i, (k, v) in enumerate(clustering_coefficients_null[method].items()):
            v = np.array(v)

            mean = v.mean(axis=0)
            std = v.std(axis=0)

            ci = 1.96 * std / np.sqrt(len(v))

            if method == 'random':
                ax[0, i].plot(mean, color='r', linestyle='--', label=method.capitalize())
            elif method == 'winner':
                ax[0, i].plot(mean, color='r', linestyle=':', label=method.capitalize())

            ax[0, i].fill_between(np.arange(len(mean)), mean - ci, mean + ci, alpha=0.2, color='r')

        for i, (k, v) in enumerate(algebraic_connectivities_null[method].items()):
            v = np.array(v)

            mean = v.mean(axis=0)
            std = v.std(axis=0)

            ci = 1.96 * std / np.sqrt(len(v))

            if method == 'random':
                ax_x[i].plot(mean, color='b', linestyle='--', label=method.capitalize())
            elif method == 'winner':
                ax_x[i].plot(mean, color='b', linestyle=':', label=method.capitalize())

            ax_x[i].fill_between(np.arange(len(mean)), mean - ci, mean + ci, alpha=0.2, color='b', hatch='||')

    for i in range(len(clustering_coefficients)):
        ax[0, i].legend(loc='upper left')
        ax_x[i].legend(loc='upper right')

    fig.tight_layout()

    fig.savefig('figures/principle_2/principle_2_overall.png')

run_network_formation_experiment(50, 50, 1, 10, 'outputs/principle_2.jsonl', [0.5, 1.0, 1.5], method='llm')
analyze_experiments('outputs/principle_2.jsonl')

Skipping simulation for n=50, i=0, temperature=0.5
Skipping simulation for n=50, i=0, temperature=1.0
Skipping simulation for n=50, i=0, temperature=1.5
Skipping simulation for n=50, i=1, temperature=0.5
Skipping simulation for n=50, i=1, temperature=1.0
Skipping simulation for n=50, i=1, temperature=1.5
Skipping simulation for n=50, i=2, temperature=0.5
Skipping simulation for n=50, i=2, temperature=1.0
Skipping simulation for n=50, i=2, temperature=1.5
Skipping simulation for n=50, i=3, temperature=0.5
Skipping simulation for n=50, i=3, temperature=1.0
Skipping simulation for n=50, i=3, temperature=1.5
Skipping simulation for n=50, i=4, temperature=0.5
Skipping simulation for n=50, i=4, temperature=1.0
Skipping simulation for n=50, i=4, temperature=1.5
Skipping simulation for n=50, i=5, temperature=0.5
Skipping simulation for n=50, i=5, temperature=1.0
Skipping simulation for n=50, i=5, temperature=1.5
Skipping simulation for n=50, i=6, temperature=0.5
Skipping simulation for n=50, i

