In [1]:
%cd ../..

/home/jovyan/work/mjozwiak/mgr_experiments


In [2]:
import json

from functools import wraps
from itertools import product

import tensorflow as tf
tf.enable_eager_execution()

from graph_neurawkes.experiments import calculate_everything
from graph_neurawkes.data.edgelist_data import utils as edge_utils
from graph_neurawkes.src.models import Neurawkes, GraphNeurawkes

  from ._conv import register_converters as _register_converters


## Utils

In [3]:
def df_to_list(df):
    return [(int(s), int(r), t) for s,r,t in df.values.tolist()]

def reset_tf_graph_decorator(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tf.reset_default_graph()
        return func(*args, **kwargs)
    return wrapper

## Constants

In [4]:
VAL_RATIO = 0.2
PRED_RATIO = 0.2

#### Fb-forum

In [5]:
DATA_PATH_FB = 'graph_neurawkes/data/edgelist_data/fb-forum/data.csv'
GAP_SIZE_FB = 5000
NUM_VERTICES_FB = 899

SAVE_PATH_FULL_FB = 'graph_neurawkes/saves/gnh_fb-forum/1'
SAVE_PATH_NAIVE_FB = 'graph_neurawkes/saves/nh_fb-forum/naive_basic_fb'

In [6]:
@reset_tf_graph_decorator
def get_gnh_model_fb():
    return GraphNeurawkes(64, NUM_VERTICES_FB, 50, True)

@reset_tf_graph_decorator
def get_nh_model_fb():
    return Neurawkes(64, NUM_VERTICES_FB ** 2)

#### Radoslaw

In [7]:
DATA_PATH_RAD = 'graph_neurawkes/data/edgelist_data/ia-radoslaw-email/data.csv'
GAP_SIZE_RAD = 45000
NUM_VERTICES_RAD = 167

SAVE_PATH_FULL_RAD = 'graph_neurawkes/saves/gnh_radoslaw/25'
SAVE_PATH_NAIVE_RAD = 'graph_neurawkes/saves/nh_radoslaw/naive_basic_radoslaw'

In [8]:
@reset_tf_graph_decorator
def get_gnh_model_radoslaw():
    return GraphNeurawkes(64, NUM_VERTICES_RAD, 50, True)

@reset_tf_graph_decorator
def get_nh_model_radoslaw():
    return Neurawkes(64, NUM_VERTICES_RAD ** 2)

#### Hypertext

In [9]:
DATA_PATH_HYP = 'graph_neurawkes/data/edgelist_data/ia-contacts_hypertext2009/data.csv'
GAP_SIZE_HYP = 75
NUM_VERTICES_HYP = 113

SAVE_PATH_FULL_HYP = 'graph_neurawkes/saves/gnh_hypertext/13'
SAVE_PATH_NAIVE_HYP = 'graph_neurawkes/saves/nh_hypertext/naive_basic_ia'

In [10]:
@reset_tf_graph_decorator
def get_gnh_model_hypertext():
    return GraphNeurawkes(64, NUM_VERTICES_HYP, 50, False)
    
@reset_tf_graph_decorator
def get_nh_model_hypertext():
    return Neurawkes(64, NUM_VERTICES_HYP ** 2 - NUM_VERTICES_HYP)

## Processing functions

In [11]:
def get_longest_val_sequence(data_path, gap_size):
    df = edge_utils._get_df_from_csv(data_path)
    num_ids = max(df.sender.max(), df.recipient.max()) + 1

    dfs = edge_utils.cut_on_big_gaps(df, gap_size)
    val_count = int(len(dfs) * VAL_RATIO)
    val_dfs = dfs[:val_count]
    
    return df_to_list(max(val_dfs, key=lambda d: len(d)))

def split_seq_into_seed_and_true(seq):
    true_len = int(len(seq) * PRED_RATIO)
    seed_seq, true_seq = seq[:-true_len], seq[-true_len:]
    return seed_seq, true_seq

def generate_sequence(seed_seq, true_seq, mode, model, save_path):
    if mode == 'length':
        pred_seq = model.generate(saved_path=save_path, seed=seed_seq, max_events=len(true_seq))
    elif mode == 'duration':
        pred_seq = model.generate(saved_path=save_path, seed=seed_seq, max_time=true_seq[-1][2])

    return pred_seq

## THE MEAT

### Jaccard-esque

In [12]:
EXPERIMENT_SPEC = [
    ('hyper', DATA_PATH_HYP, GAP_SIZE_HYP, get_gnh_model_hypertext, SAVE_PATH_FULL_HYP, get_nh_model_hypertext, SAVE_PATH_NAIVE_HYP),
    ('radoslaw', DATA_PATH_RAD, GAP_SIZE_RAD, get_gnh_model_radoslaw, SAVE_PATH_FULL_RAD, get_nh_model_radoslaw, SAVE_PATH_NAIVE_RAD),
    ('fb', DATA_PATH_FB, GAP_SIZE_FB, get_gnh_model_fb, SAVE_PATH_FULL_FB, get_nh_model_fb, SAVE_PATH_NAIVE_FB)
]

In [14]:
for name, data_path, gap_size, gnh_model_func, full_savepath, nh_model_func, naive_savepath in EXPERIMENT_SPEC:
    val_seq = get_longest_val_sequence(data_path, gap_size)
    seed_seq, true_seq = split_seq_into_seed_and_true(val_seq)

    for gen_mode, model_mode in product(['duration', 'length'], ['full', 'naive']):
        if model_mode == 'full':
            model_func = gnh_model_func
            savepath = full_savepath
        else:
            model_func = nh_model_func
            savepath = naive_savepath

        for _ in range(50):
            generated_seq = generate_sequence(seed_seq, true_seq, gen_mode, model_func(), savepath)
            generated_seq = [(s, r, t.item()) for s, r, t in generated_seq]

            with open(f'graph_neurawkes/experiments/jaccard_results/{name}_{model_mode}_{gen_mode}.txt', 'a') as f:
                f.write(json.dumps(generated_seq) + '\n')

INFO:tensorflow:Restoring parameters from graph_neurawkes/saves/gnh_hypertext/13
events: 1241/None	time: 7.8e+03/7761.0


TypeError: Object of type 'float32' is not JSON serializable

In [21]:
generated_seq[0][2].item()

4901.15087890625

### Synthetic tests