# Results analysis

In [1]:
import json
from pathlib import Path
from shutil import copy

import matplotlib.pyplot as plt
import pandas as pd
import plotly.graph_objects as go
import seaborn as sns
from cltl.dialogue_evaluation.graph_evaluation import GraphEvaluator
from cltl.dialogue_evaluation.metrics.brain_measures import *
from cltl.dialogue_evaluation.metrics.graph_measures import *
from rdflib import ConjunctiveGraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from sklearn.preprocessing import MinMaxScaler  #, StandardScaler, MaxAbsScaler


% matplotlib inline

UsageError: Line magic function `%` not found.


In [4]:
ABSOLUTE_PATH = '.'
DATA_FOLDER = ABSOLUTE_PATH + f'/../resources/'

RDF_FOLDERS = [
    Path(f'../resources/pilot/Total-triples_2_tae/2022-03-07-17-20/').resolve(),
    Path(f'../resources/pilot/Ratio-conflicts-to-triples_2_thomas/2022-03-07-14-25/').resolve(),
    Path(f'../resources/pilot/Ratio-conflicts-to-statements_2_lea/2022-03-07-14-25/').resolve(),
    Path(f'../resources/pilot/Ratio-perspectives-to-statements_2_piek/2022-03-07-15-17/').resolve()
]

SCENARIOS_FOLDER = Path(f'../resources/pilot/').resolve()

CONVO_IDS = ['Total-triples', 'Ratio-conflicts-to-triples',
             'Ratio-conflicts-to-statements', 'Ratio-perspectives-to-statements']

THOUGHT_TYPES = ['complement_conflict', 'negation_conflict',
                 'subject_gap', 'object_gap',
                 'no_statement_novelty', 'statement_novelty', 'entity_novelty', 'overlap']

METRICS = ['Average degree', 'Shortest path', 'Sparseness',
           'Average degree centrality', 'Number of strong components']


## Automatic evaluation

### Evaluate all scenarios individually

In [5]:
# Gather all scenarios
RDF_FOLDERS_ALL_CONVOS = []

for rdf_folder in RDF_FOLDERS:
    # Find folders relates to this reward condition
    reward = rdf_folder.parent.stem.split('_')[0]
    scenarios_with_reward = sorted([path for path in SCENARIOS_FOLDER.glob(f'*{reward}*')])
    df_for_reward = pd.DataFrame(columns=['Mention ID', 'Turn', 'Speaker', 'Response', 'rdf_file'])

    for scenario in scenarios_with_reward:
        # Read files from this conversation
        scenario_rdf_folder = [f for f in scenario.iterdir() if f.is_dir() and f.stem[0].isdigit()][0]
        files = sorted([path for path in scenario_rdf_folder.glob('*.trig')])

        # Gather all folders for automatic evaluation of each conversation
        RDF_FOLDERS_ALL_CONVOS.append(scenario_rdf_folder)

        # Make directory per agent merging all conversations
        processed_folder = Path(DATA_FOLDER) / 'processed_pilot' / reward
        processed_folder.mkdir(parents=True, exist_ok=True)
        processed_folder_rdf = processed_folder / 'rdf'
        processed_folder_rdf.mkdir(parents=True, exist_ok=True)

        # Copy rdf files
        for file in files:
            copy(file, processed_folder_rdf / file.stem)

        # Merge turn to trig
        full_df = pd.read_json(scenario / f'turn_to_trig_file.json')
        print(scenario)
        print(full_df.head())


/Users/sbaez/Documents/PhD/research/thought-selection/resources/pilot/Total-triples_1_lea
   Mention ID  Turn Speaker                     Response  \
0           0     0     lea   I'm doing well, thank you.   
1           1     1     lea  I like going to the cinema.   
2           2     2     lea        I don't own a cinema.   
3           3     3     lea      I ate sour candy today.   
4           4     4     lea   I recently read Jane Eyre.   

                                            rdf_file  
0  [brain_log_2022-03-04-14-23-26.trig, brain_log...  
1               [brain_log_2022-03-04-14-24-17.trig]  
2               [brain_log_2022-03-04-14-28-07.trig]  
3               [brain_log_2022-03-04-14-29-15.trig]  
4               [brain_log_2022-03-04-14-30-14.trig]  
/Users/sbaez/Documents/PhD/research/thought-selection/resources/pilot/Total-triples_2_tae
   Mention ID  Turn Speaker                              Response  \
0           0     0     tae               Russia invaded Ukr

In [6]:
# Evaluate each conversation
graph_evaluator = GraphEvaluator()
print(RDF_FOLDERS_ALL_CONVOS)
for SCENARIO_FOLDER in RDF_FOLDERS_ALL_CONVOS:
    graph_evaluator.evaluate_conversation(SCENARIO_FOLDER.parent, rdf_folder=Path(SCENARIO_FOLDER))

2023-01-08 19:57:07 -     INFO -                      cltl.dialogue_evaluation.GraphEvaluator - Booted


[PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Total-triples_1_lea/2022-03-04-14-13'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Total-triples_2_tae/2022-03-07-17-20'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-triples_1_piek/2022-03-04-14-48'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-triples_2_thomas/2022-03-07-14-25'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-statements_1_dimitris/2022-03-06-19-32'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-statements_2_lea/2022-03-07-14-25'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-perspectives-to-statements_1_thomas/2022-03-04-13-30'), PosixPath('/Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-perspectives-to-statemen

  ent = np.nansum(dist * np.log2(1.0 / dist))
  ent = np.nansum(dist * np.log2(1.0 / dist))


	Calculating RDF graph metrics
	Calculating interaction metrics
	Found RDF, cumulative: 2
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 1/20
	Found RDF, cumulative: 3
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 2/20
	Found RDF, cumulative: 4
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 3/20
	Found RDF, cumulative: 5
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 4/20
	Found RDF, cumulative: 6
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 5/20
	Found RDF, cumulative: 7
	Adding triples
	Calculating graph metrics
	Calculating RDF graph metrics
	Calculating interaction metrics
Processing turn 6/20
	Found RDF, 

## Accumulated episodic memories


### Create brains

In [5]:
brains_as_graph = []
brains_as_netx = []
graph_evaluator = GraphEvaluator()
RDF_FOLDERS_ALL_CONVOS = []

for rdf_folder in RDF_FOLDERS:
    # Find folders relates to this reward condition
    scenarios = rdf_folder.parent.parent
    reward = rdf_folder.parent.stem.split('_')[0]
    scenarios_with_reward = sorted([path for path in scenarios.glob(f'*{reward}*')])

    # Create brain for this reward condition
    print(f"\nCreate new brain: {reward}")
    brain_as_graph = ConjunctiveGraph()
    brain_as_netx = None

    for scenario in scenarios_with_reward:
        # Read files from this conversation
        scenario_rdf_folder = [f for f in scenario.iterdir() if f.is_dir() and f.stem[0].isdigit()][0]
        files = sorted([path for path in scenario_rdf_folder.glob('*.trig')])

        # Gather all folders for automatic evaluation of each conversation
        RDF_FOLDERS_ALL_CONVOS.append(scenario_rdf_folder)

        # Make directory per agent merging all conversations
        evaluation_folder = Path(scenario_folder / 'evaluation/')
        evaluation_folder.mkdir(parents=True, exist_ok=True)

        print(f"\tAdding triples to brain: {files[-1].parent.parent}")
        brain_as_graph.parse(files[-1], format='trig')
        brain_as_netx = rdflib_to_networkx_multidigraph(brain_as_graph)

        # Calculate metrics (only when needed! otherwise copy row)
        # full_df = graph_evaluator._calculate_metrics(brain_as_graph, brain_as_netx, full_df, idx)

    brains_as_graph.append(brain_as_graph)
    brains_as_netx.append(brain_as_netx)

print(f"\nFound {len(brains_as_graph)} final graphs")


Create new brain: Total-triples
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Total-triples_1_lea
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Total-triples_2_tae

Create new brain: Ratio-conflicts-to-triples
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-triples_1_piek
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-triples_2_thomas

Create new brain: Ratio-conflicts-to-statements
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-statements_1_dimitris
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Ratio-conflicts-to-statements_2_lea

Create new brain: Ratio-perspectives-to-statements
	Adding triples to brain: /Users/sbaez/Documents/PhD/research/thought-selection/resources/Rat

### Score brains

In [11]:
def score_brains(brains_as_graph, brains_as_netx):
    graphs_scores = []
    for brain_as_graph, brain_as_netx in zip(brains_as_graph, brains_as_netx):
        print(f"\nEvaluating brain")

        print(f"\tCrude statistics")
        turns = get_number_utterances(brain_as_graph)
        print(f"\t\tTurns: {turns}")
        claims = get_number_statements(brain_as_graph)
        print(f"\t\tClaims: {claims}")
        perspectives = get_number_perspectives(brain_as_graph)
        print(f"\t\tPerspectives: {perspectives}")

        print(f"\tGraph metrics")
        ad = get_avg_degree(brain_as_netx)
        print(f"\t\t{METRICS[0]}: {ad}")
        sp = get_shortest_path(brain_as_netx)
        print(f"\t\t{METRICS[1]}: {sp}")
        s = get_sparseness(brain_as_netx)
        print(f"\t\t{METRICS[2]}: {s}")
        nc = get_avg_degree_centr(brain_as_netx)
        print(f"\t\t{METRICS[3]}: {nc}")
        comp = get_number_strong_comp(brain_as_netx)
        print(f"\t\t{METRICS[4]}: {comp}")

        scores = [ad, nc, sp, comp, s]
        graphs_scores.append(scores)

    graphs_scores = np.array(graphs_scores)

    return graphs_scores

In [12]:
graphs_scores = score_brains(brains_as_graph, brains_as_netx)


Evaluating brain
	Crude statistics
		Turns: 47
		Claims: 45
		Perspectives: 43
	Graph metrics
		Average degree: 5.538548432646145
		Shortest path: 2.7748566257317115
		Sparseness: 0.0007811028927205342
		Average degree centrality: 0.0015645617041373293
		Number of strong components: 3266

Evaluating brain
	Crude statistics
		Turns: 54
		Claims: 49
		Perspectives: 48
	Graph metrics
		Average degree: 5.574527252502781
		Shortest path: 2.776659479322653
		Sparseness: 0.0007740184681780894
		Average degree centrality: 0.0015506334499312327
		Number of strong components: 3305

Evaluating brain
	Crude statistics
		Turns: 61
		Claims: 56
		Perspectives: 55
	Graph metrics
		Average degree: 5.549288208434058
		Shortest path: 2.780774172986828
		Sparseness: 0.0007444774369371193
		Average degree centrality: 0.0014909425600306446
		Number of strong components: 3384

Evaluating brain
	Crude statistics
		Turns: 59
		Claims: 50
		Perspectives: 53
	Graph metrics
		Average degree: 5.579868708971554
	

In [32]:
graphs_scores

array([[5.53854843e+00, 1.56456170e-03, 2.77485663e+00, 3.26600000e+03,
        7.81102893e-04],
       [5.57452725e+00, 1.55063345e-03, 2.77665948e+00, 3.30500000e+03,
        7.74018468e-04],
       [5.54928821e+00, 1.49094256e-03, 2.78077417e+00, 3.38400000e+03,
        7.44477437e-04],
       [5.57986871e+00, 1.52663987e-03, 2.77905529e+00, 3.34500000e+03,
        7.61914110e-04]])

### Star Plot

In [30]:
def plot_spider_scores(graphs_scores, graph_ids):
    scaler = MinMaxScaler()
    scaled_graph_scores = scaler.fit_transform(graphs_scores)

    fig = go.Figure()

    for scores, id in zip(scaled_graph_scores, graph_ids):
        fig.add_trace(go.Scatterpolar(r=scores,
                                      theta=METRICS,
                                      fill='toself',
                                      name=f'{id}'))

    fig.update_layout(polar=dict(radialaxis=dict(visible=True), ), showlegend=True)

    fig.show()
    fig.write_image(Path(DATA_FOLDER).resolve() / f'aggregation' / f'final_graph_starplot.png')


In [31]:
plot_spider_scores(graphs_scores, CONVO_IDS)

In [10]:

from cltl.dialogue_evaluation.metrics_plotting import Plotter

plotter = Plotter()
plotter.plot_conversations(Path(DATA_FOLDER) / 'pilot',
                           metrics=['GROUP A - Average degree', 'GROUP A - Sparseness',
                                    # 'GROUP A - Shortest path',
                                    'GROUP A - Number of components', 'GROUP A - Centrality entropy',
                                    'GROUP B - Average population'])

2023-01-09 04:33:40 -     INFO -                             cltl.dialogue_evaluation.Plotter - Booted


Plotting metric GROUP A - Average degree
Plotting metric GROUP A - Sparseness
Plotting metric GROUP A - Number of components
Plotting metric GROUP A - Centrality entropy
Plotting metric GROUP B - Average population


## Utility estimates

### Read policies

In [None]:
types_dfs, content_dfs = [], []
for rdf_folder, id in zip(RDF_FOLDERS, CONVO_IDS):
    thoughts_file = rdf_folder.parent / "thoughts.json"
    with open(thoughts_file, "r") as file:
        data = json.load(file)

    df = pd.DataFrame(data["data"])

    # Filter columns
    missing_columns = [col for col in THOUGHT_TYPES if col not in df.columns]
    for col in missing_columns:
        df[col] = 0
    types_df = df[THOUGHT_TYPES]
    content_df = df.drop(THOUGHT_TYPES, axis=1)

    # Merge statement novelties
    types_df['statement_novelty'] = types_df['statement_novelty'] + types_df['no_statement_novelty']
    types_df.drop('no_statement_novelty', axis=1, inplace=True)

    types_dfs.append(types_df)
    content_dfs.append(content_df)

### Pie plot

In [None]:
def plot_pie_counts(dfs):
    fig, axes = plt.subplots(1, 4, figsize=(12, 12))
    labels = THOUGHT_TYPES
    if 'no_statement_novelty' in labels:
        labels.remove('no_statement_novelty')

    for idx, df in enumerate(dfs):
        def fmt(x):
            total = df.loc['count'].sum()
            return '{:.1f}% ({:.0f})'.format(x, total * x / 100)

        # Plot
        # ax = axes[idx // 2, idx % 2]
        ax = axes[idx]
        fig.add_subplot(ax)
        plt.pie(df.loc['count'], autopct=fmt, textprops={'fontsize': 9})

        # Format plot
        ax.set_title(CONVO_IDS[idx])
        fig.subplots_adjust(wspace=.01, hspace=.01)
        plt.axis('off')

    fig.legend(labels, loc="center left")

    # fig.show()
    fig.savefig(Path(DATA_FOLDER).resolve() / f'aggregation' / f'thought_types_piechart.png', dpi=300,
                transparent=False,
                bbox_inches='tight')
    # plt.close()


In [None]:
plot_pie_counts(types_dfs)

In [None]:
def plot_bar_utility(content_dfs):
    # Massage data
    agg_df = pd.DataFrame()
    for id, content_df in zip(CONVO_IDS, content_dfs):
        if len(agg_df) == 0:
            agg_df[id] = content_df.loc['value']
        else:
            agg_df = pd.concat([agg_df, content_df.loc['value']], axis=1)
            agg_df.rename(columns={'value': id}, inplace=True)

    # Scale cause not all rewards are on the same range
    scaler = MinMaxScaler()  #MaxAbsScaler()
    scaled = pd.DataFrame(scaler.fit_transform(agg_df), columns=agg_df.columns)

    # Prepare for plotting
    scaled['Action'] = agg_df.index
    scaled = scaled.sort_index(key=scaled.sum(1).get, ascending=False)
    df1 = pd.melt(scaled, id_vars=['Action'])

    # Plot
    fig = plt.figure(figsize=(10, 5), tight_layout=True)
    plt.ylabel("$Value$ $(Q)$")
    plt.xlabel("$Actions$ $(a)$")
    plt.xticks(rotation=45, ha="right")
    # fig.legend(loc="center left")

    sns.barplot(x='Action', y='value', hue='variable', data=df1)
    fig.savefig(Path(DATA_FOLDER).resolve() / f'aggregation' / f'thought_content_barchart.png', dpi=300,
                transparent=False,
                bbox_inches='tight')


In [None]:
plot_bar_utility(content_dfs)