In [None]:
import matplotlib.pyplot as plt
import json
import argparse
import numpy as np
import seaborn as sns

from plot_utils import extract_measures, extract_utterance, get_distributions, get_x_positions, order_lists

# from plot_graphs import plot_reward  

sns.set()
# plt.set
# plt.figure.

%matplotlib inline

dir_ = 'final/logs/'

filenames = {
    'self_both': 'final/new/log_20191102_173823final_fixed_disable-prosocial.log',
    'prosoc_both': 'final/new/log_20191101_231223final_fixed.log',
    'prosoc_linguistic': 'final/logs/log_20191029_191658final_disable-proposal.log' # old
}


In [None]:

def plot_reward(logfile, min_y, max_y, title, max_x, labels=None):
    """
    logfiles separated by : are combined
    logfiles separated by , go in separate plots
    (: binds tighter than ,)
    """
    logfiles = logfile
    split_logfiles = logfiles.split(',')
    if labels:
        labels = labels.split(',')

    for j, logfile_groups in enumerate(split_logfiles):
        epoch = []
        reward = []
        test_reward_0 = []
        test_reward_1 = []
        test_reward = []
        for logfile in logfile_groups.split(':'):
            with open(logfile, 'r') as f:
                for n, line in enumerate(f):
                    if n == 0:
                        print(logfile, line)
                        continue  # skip first line
                    line = line.strip()
                    if line == '':
                        continue
                    d = json.loads(line)
                    if max_x is not None and d['episode'] > max_x:
                        continue
                    epoch.append(int(d['episode']))
                    reward.append(float(d['avg_reward_0']))
                    test_reward_0.append(float(d['agent0_test_reward']))
                    test_reward_1.append(float(d['agent1_test_reward']))
                    if 'test_reward' in d:
                        test_reward.append(d['test_reward'])

        while len(epoch) > 200:
            new_epoch = []
            new_reward = []
            new_test_reward = []
            new_test_reward_0 = []
            new_test_reward_1 = []

            for n in range(len(epoch) // 2):
                r = (reward[n * 2] + reward[n * 2 + 1]) / 2
                e = (epoch[n * 2] + epoch[n * 2 + 1]) // 2
                new_epoch.append(e)
                new_reward.append(r)
                new_test_reward_0.append(test_reward_0[n * 2])
                new_test_reward_1.append(test_reward_1[n * 2])
                if len(test_reward) > 0:
                    rt = (test_reward[n * 2] + test_reward[n * 2 + 1]) / 2
                    new_test_reward.append(rt)
            epoch = new_epoch
            reward = new_reward
            test_reward = new_test_reward
            test_reward_0 = new_test_reward_0
            test_reward_1 = new_test_reward_1

        if min_y is None:
            min_y = 0
        if max_y is not None:
            plt.ylim([min_y, max_y])
        suffix = ''
        if len(split_logfiles) > 0:
            suffix = ' %s' % (j + 1)
        if len(test_reward) > 0:
            label = labels[j] + ' ' if labels else ''
            plt.plot(np.array(epoch) / 1000, reward, label=label + 'train' + suffix)
            plt.plot(np.array(epoch) / 1000, test_reward, label=label + 'test' + suffix)
            plt.plot(np.array(epoch) / 1000, test_reward_0, label=label + 'test 0' + suffix)
            plt.plot(np.array(epoch) / 1000, test_reward_1, label=label + 'test 1' + suffix)

            
        else:
            plt.plot(np.array(epoch) / 1000, reward, label='reward' + suffix)
    if title is not None:
        plt.title(title)
    plt.xlabel('Episodes of 128 games (thousands)')
    plt.ylabel('Reward')
    plt.legend()
    print('saving file')
    plt.savefig('/tmp/out-reward.png')

In [None]:
plot_reward(filenames['self_both'], 0, 1, 'Final', 200000, 'final,final_memory-comp')

In [None]:
def joint_reward_success(**kwargs):

    """
    FOR TABLE 2
    
    keys:
    self_proposal, self_linguistic, self_both, self_none,
    prosoc_proposal, prosoc_linguistic, prosoc_both, prosoc_none
    
    values:
    filenames
    
    Joint reward success and average number of turns taken for paired agents negotiating
    with random game termination, varying the agent reward scheme and communication channel.
    """
    
    from_paper = {'self_proposal': 0.87, 'self_linguistic': 0.75, 'self_both': 0.87, 'self_none': 0.77,
                  'prosoc_proposal': 0.93,  'prosoc_linguistic': 0.99, 'prosoc_both': 0.92, 'prosoc_none': 0.95}
    
    socialities = ['self', 'prosoc']
    channels = ['proposal', 'linguistic', 'both', 'none']
    data = {}
    
    for sociality in socialities:
        for channel in channels:
            key = sociality + '_' + channel
            filename = kwargs.pop(key, None)
            
            if filename:
                extracted = extract_measures(filenames[key], ['test_reward'])
                joint_reward = np.mean(extracted['test_reward'])

            else:
                joint_reward = -1
            data[key] = {'joint_reward': joint_reward}
            
    for sociality in socialities:
        for channel in channels:
            key = sociality + '_' + channel
            print('\tour:       {}'.format(data[key]['joint_reward']))
            print('\tfrom paper: {}'.format(from_paper[key]))
    return data
    
    

In [None]:
joint_reward_success(**filenames)

In [None]:
def plot_training_curve(filename, min_y=0, max_y=1, title='', max_x=200000, labels=None):
    """

    """
    epoch = []
    test_reward_0 = []
    test_reward_1 = []
    with open(filename, 'r') as f:
        for n, line in enumerate(f):
            if n == 0:
                continue  # skip first line
            line = line.strip()
            if line == '':
                continue
            d = json.loads(line)
            if max_x is not None and d['episode'] > max_x:
                continue
            epoch.append(int(d['episode']))
            test_reward_0.append(float(d['agent0_test_reward']))
            test_reward_1.append(float(d['agent1_test_reward']))
                
        while len(epoch) > 200:
            new_epoch = []
            new_test_reward_0 = []
            new_test_reward_1 = []

            for n in range(len(epoch) // 2):
                r = (reward[n * 2] + reward[n * 2 + 1]) / 2
                e = (epoch[n * 2] + epoch[n * 2 + 1]) // 2
                new_epoch.append(e)
                new_test_reward_0.append(test_reward_0[n * 2])
                new_test_reward_1.append(test_reward_1[n * 2])

            epoch = new_epoch
            test_reward_0 = new_test_reward_0
            test_reward_1 = new_test_reward_1

        if min_y is None:
            min_y = 0
        if max_y is not None:
            plt.ylim([min_y, max_y])
        suffix = ''
        label = labels[j] + ' ' if labels else ''
        plt.plot(np.array(epoch) / 1000, test_reward_0, label='test 0' + suffix)
        plt.plot(np.array(epoch) / 1000, test_reward_1, label='test 1' + suffix)

    if title is not None:
        plt.title(title)
    plt.xlabel('Episodes of 128 games (thousands)')
    plt.ylabel('Reward')
    plt.legend()
#     print('saving file')
#     plt.savefig('/tmp/out-reward.png')

def training_curves(filenames):
    """
    FOR FIGURE 2a
    
    Training curves for SELF-INTERESTED agents learning to negotiate under the various com- munication channels.
    """
    
    channels = ['proposal', 'linguistic', 'both', 'none']
    
    for channel in channels:
        key = 'self_{}'.format(channel)
        
    pass

In [None]:
plot_training_curve(filenames['self_both'])

In [None]:
def plot_utterance(distribution, turn, vocab_len=10, utter_len=6):
    labels = [str(i) for i in range(utter_len)]
    
    x = np.arange(utter_len)
    width = 4
    positions = get_x_positions(vocab_len, utter_len, width=width, outer_width=2).T
    
    fig, ax = plt.subplots()
    reacts = []
    
    for i in range(vocab_len):
        values = distribution[turn, :, i]
        l = positions[i]
        print('lens', len(l), values.shape)
        reacts.append(ax.bar(l, values, width, label=str(i)))
    ax.legend()
        
    def autolabel(rects):
        """
        Attach a text label above each bar in *rects*, displaying its height.
        from: https://matplotlib.org/3.1.1/gallery/lines_bars_and_markers/barchart.html#sphx-glr-gallery-lines-bars-and-markers-barchart-py
        """
        for rect in rects:
            height = rect.get_height()
            ax.annotate('{}'.format(height),
                        xy=(rect.get_x() + rect.get_width() / 2, height),
                        xytext=(0, 3),  # 3 points vertical offset
                        textcoords="offset points",
                        ha='center', va='bottom')
            
    ax.set_ylabel('Occurance')
    ax.set_title('Symbols distribution in position')
#     ax.set_xticks(np.arange(labels))
    ax.set_xticklabels(labels)
    fig.tight_layout()


def unigram_statistics(filenames):
    """
    FOR FIGURE 3a
    
    Unigram statistics of symbol usage broken down by turn and by position within the utterance
    for prosocial agents communicating via the linguistic channel.
    """
    
    pass


In [None]:
extracted = extract_utterance(filenames['prosoc_both'])
distribution = get_distributions(extracted)

In [None]:
plot_utterance(np.array(distribution), 0)

In [None]:
def bigram_statistics(filenames):
    """
    FOR FIGURE 3b
    
    Bigram counts for prosocial agents communicating via the linguistic channel, sorted by frequency.
    """
    filename = filenames['prosoc_linguistic']
    extracted = extract_utterance(filename)
    extracted_a = extracted[::2]
    extracted_b = extracted[1::2]
    extracted_a = np.array([list(map(str, msg)) for sublist in extracted_a for msg in sublist])
    extracted_b = np.array([list(map(str, msg)) for sublist in extracted_b for msg in sublist])

    bigrams_a = []
    bigrams_b = []
    for i in range(extracted_a.shape[1] - 1):
        new_bigrams_a = list(np.core.defchararray.add(extracted_a[:, i], extracted_a[:, i + 1]))
        new_bigrams_b = list(np.core.defchararray.add(extracted_b[:, i], extracted_b[:, i + 1]))
        bigrams_a += new_bigrams_a
        bigrams_b += new_bigrams_b

    unique, counts = np.unique(bigrams_b, return_counts=True)
    counts, unique = order_lists(counts, unique)
    x = np.arange(len(counts))
    plt.bar(x, counts)

bigram_statistics(filenames)