## Distribution of conflicting gradients 

In [None]:
import pickle
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import os 

In [None]:
from plot.acronyms import get_acronym

def show_conflicting_gradients_per_parameter(results_dir, dataset, model):
    file = f'{results_dir}/{dataset}/{dataset}_{model}_split0.txt'
    with open(file, 'rb') as f:
        data = pickle.load(f)
    cos = data['train'][-2]['dot_products']
    num_p = len(cos)
    nrows= int(num_p/10 + 1)
    ncols = 10
    fig, axes = plt.subplots(nrows, ncols, figsize=(30,10), sharey=True)
    i = 0
    for name, dot in cos.items():
        dot = np.array(dot)
        irow = int(i/ncols)
        icol = i % ncols  
        prop_pos = np.round((dot >= 0).sum()/len(dot),2)
        prop_neg = np.round((dot < 0).sum()/len(dot),2)
        axes[irow, icol].hist(dot, label=f'PG:{prop_pos} \n NG:{prop_neg}')
        axes[irow,icol].set_title(name)
        axes[irow, icol].legend()
        i += 1
    plt.show()

def show_conflicting_gradients_per_layer(results_dir, dataset, model):
    file = f'{results_dir}/{dataset}/{dataset}_{model}_split0.txt'
    with open(file, 'rb') as f:
        data = pickle.load(f)
    layers = [
        'encoder',
        'decoder'
    ]
    cos_per_layer = {'encoder':[], 'decoder':[]}
    cos = data['train'][-2]['dot_products']
    fig, axes = plt.subplots(1, 2, figsize=(10,5))
    sns.set_style("whitegrid")
    for name, dot in cos.items():
        if 'encoder' in name:
            cos_per_layer['encoder'].extend(dot)
        else:
            cos_per_layer['decoder'].extend(dot)
    for i, layer in enumerate(layers):    
        dot = np.array(cos_per_layer[layer])
        prop_pos = np.round((dot >= 0).sum()/len(dot),2)
        prop_neg = np.round((dot < 0).sum()/len(dot),2)
        #axes[i].hist(dot, label=f'PG:{prop_pos} \n NG:{prop_neg}')
        sns.histplot(x=dot, stat='proportion', ax=axes[i], bins=15, label=f'PG:{prop_pos} \n NG:{prop_neg}', color='sandybrown')
        axes[i].set_title(layer, fontsize=16)
        axes[i].annotate(f'PG:{prop_pos}\nNG:{prop_neg}', xy=(240, 250), xycoords='axes points',
            size=14, ha='right', va='top',
            bbox=dict(boxstyle='round', fc='w'))
        axes[i].tick_params(axis='both', which='major', labelsize=14)
        axes[i].tick_params(axis='both', which='minor', labelsize=14)
    axes[0].set_ylabel('Proportion', fontsize=16)
    axes[1].set_ylabel('')
    fig.text(0.5, 0, r'$cos \phi_{ij}$', ha='center', fontsize=16)
    plt.show()


def show_conflicting_gradients_all(results_dir, dataset, model, save_dir=None, splits=3):
    fig, axes = plt.subplots(1, 1, figsize=(5,5))
    sns.set_style("whitegrid")
    cos_per_layer = []
    sim_per_layer = []
    ind_per_layer = []
    for split in range(splits):
        file = f'{results_dir}/{dataset}/{dataset}_gru_{model}_temporal_with_labels_split{split}.txt'
        with open(file, 'rb') as f:
            data = pickle.load(f)
        cos = data['train'][-2]['dot_products']
        sim = data['train'][-2]['grad_sim']
        ind = data['train'][-2]['grad_ind']
        for name, dot in cos.items():
            cos_per_layer.extend(dot)
            sim_per_layer.extend(sim[name])
            ind_per_layer.extend(sim[name])
    dot = np.array(cos_per_layer)
    sim = np.array(sim_per_layer)
    ind = np.array(ind_per_layer)
    neg_dot_mask = dot < 0
    pos_sim = sim[~neg_dot_mask]
    neg_sim = sim[neg_dot_mask]
    neg_ind = ind[neg_dot_mask]

    pos_sim = np.round(np.mean(pos_sim),2)
    neg_sim = np.round(np.mean(neg_sim),2)
    neg_ind = np.round(np.mean(neg_ind),2)
    
    non_zero = np.sum(dot != 0)

    prop_pos = np.round((dot > 0).sum()/non_zero,2)
    prop_neg = np.round((dot < 0).sum()/non_zero,2)
    
    
    label = f'CG:{prop_neg}\nGMS:{neg_sim}\nTPI:{neg_ind}'
    #label = f'CG:{prop_neg}'
    sns.histplot(x=dot, stat='proportion', ax=axes, bins=15, label=label)

    axes.legend(fontsize=12)
    axes.tick_params(axis='both', which='major', labelsize=14)
    axes.tick_params(axis='both', which='minor', labelsize=14)

    fig.suptitle(acronyms_grad(model), fontsize=24, ha='center', va='top')
    axes.set_ylabel('Proportion', fontsize=16)
    axes.set_xlabel(r'$cos~\phi_{TM}$', fontsize=16)
    if save_dir is not None:
        save_dir = f'{save_dir}/conflicting_gradients_all'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        fig.savefig(f'{save_dir}/{dataset}_{model}.pdf', bbox_inches='tight')
    plt.show()
    

def show_conflicting_gradients_per_layer_joint(results_dir, dataset, model, save_dir=None, splits=3):
    models_type = ['', '-jd']
    if model in ['log-normal-mixture', 'rmtpp', 'smurf-thp']: 
        layers = [
                'encoder'
            ]
        if model == 'smurf-thp':
            models_type = ['-jd']
        fig, axes = plt.subplots(1, 1, figsize=(5,5), sharey=True)
        axes = [axes]
    else:
        layers = [
                'encoder',
                'decoder'
            ]
        fig, axes = plt.subplots(1, 2, figsize=(10,5), sharey=True)
    sns.set_style("whitegrid")
    for type in models_type:
        cos_per_layer = {'encoder':[], 'decoder':[]}
        sim_per_layer = {'encoder':[], 'decoder':[]}
        ind_per_layer = {'encoder':[], 'decoder':[]}
        for split in range(splits):
            file = f'{results_dir}/{dataset}/{dataset}_gru_{model}{type}_temporal_with_labels_split{split}.txt'
            with open(file, 'rb') as f:
                data = pickle.load(f)
            cos = data['train'][-2]['dot_products']
            sim = data['train'][-2]['grad_sim']
            ind = data['train'][-2]['grad_ind']
            for name, dot in cos.items():
                if 'encoder' in name:
                    cos_per_layer['encoder'].extend(dot)
                    sim_per_layer['encoder'].extend(sim[name])
                    ind_per_layer['encoder'].extend(ind[name])
                else:
                    cos_per_layer['decoder'].extend(dot)
                    sim_per_layer['decoder'].extend(sim[name])
                    ind_per_layer['decoder'].extend(ind[name])
        for i, layer in enumerate(layers):    
            dot = np.array(cos_per_layer[layer])
            sim = np.array(sim_per_layer[layer])
            ind = np.array(ind_per_layer[layer])
            neg_dot_mask = dot < 0
            #pos_sim = sim[~neg_dot_mask]
            neg_sim = sim[neg_dot_mask]
            neg_ind = ind[neg_dot_mask]
            #pos_sim = np.round(np.mean(pos_sim),2)
            neg_sim = np.round(np.mean(neg_sim),2)
            neg_ind = np.round(np.mean(neg_ind),2)

            prop_pos = np.round((dot > 0).sum()/len(dot),2)
            prop_neg = np.round((dot < 0).sum()/len(dot),2)
            #axes[i].hist(dot, label=f'PG:{prop_pos} \n NG:{prop_neg}')            
            if type == '':
                label = r'$\bf{Base}$' + f':\n CG:{prop_neg}\n GMS:{neg_sim}\n TPI:{neg_ind}'
                sns.histplot(x=dot, stat='proportion', ax=axes[i], bins=15, label=label)
            else:
                if layer == 'encoder':
                    label = r'$\bf{+}$' + f':\n CG:{prop_neg}\n GMS:{neg_sim}\n TPI:{neg_ind}'
                    sns.histplot(x=dot, stat='proportion', ax=axes[i], bins=15, label=label)
            title = 'Enc' if layer == 'encoder' else 'Dec'
            if model not in ['log-normal-mixture', 'rmtpp', 'smurf-thp']:
                axes[i].set_title(title, fontsize=20)
            axes[i].legend(fontsize=12)
            axes[i].tick_params(axis='both', which='major', labelsize=14)
            axes[i].tick_params(axis='both', which='minor', labelsize=14)
    
    axes[0].set_ylabel('Proportion', fontsize=16)
    if model not in ['log-normal-mixture', 'rmtpp', 'smurf-thp']:
        axes[1].set_ylabel('')
        fig.suptitle(acronyms_grad(model), fontsize=24, ha='center', va='top')
    else:
        axes[0].set_title(f'{acronyms_grad(model)}-Enc', fontsize=24)
    fig.text(0.5, 0, r'$cos~\phi_{TM}$', ha='center', fontsize=16)
    if save_dir is not None:
        save_dir = f'{save_dir}/conflicting_gradients_joint'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        fig.savefig(f'{save_dir}/{dataset}_{model}.pdf', bbox_inches='tight')
    #plt.show()



def acronyms_grad(model):
    mapping={
        'thp':'THP',
        'sahp':'SAHP',
        'mlp-cm':'FNN',
        'log-normal-mixture': 'LNM',
        'rmtpp': 'RMTPP',
        'smurf-thp': 'STHP'
    }
    return mapping[model]


In [None]:
results_dir = 'results/neurips2'
dataset = 'lastfm_filtered'
model_joint = 'thp'
save_dir = 'figures/gradients'

datasets = [
    'lastfm_filtered',
    'mooc_filtered',
    'stack_overflow_filtered',
    #'hawkes_sum_exponential_mutual',
    'reddit_filtered_short',
    'github_filtered'
    ]


models = [
    #'thp',
    #'sahp',
    #'mlp-cm', 
    #'log-normal-mixture',
    #'rmtpp',
    'smurf-thp'
]
#split = 0

#models = ['sahp']
#datasets = ['mooc_filtered']
#show_conflicting_gradients_per_parameter(results_dir, dataset, model)
#show_conflicting_gradients_per_layer(results_dir, dataset, model)

for dataset in datasets:
    for model in models:
        show_conflicting_gradients_per_layer_joint(results_dir, dataset, model, save_dir=save_dir,splits=3)
        #show_conflicting_gradients_all(results_dir, dataset, model, save_dir=save_dir, splits=3)

In [None]:
from plot.acronyms import get_acronym

def show_evolution_conflicting_gradients_mean_module(results_dir, dataset, model):
    file = f'{results_dir}/{dataset}/{dataset}_{model}_split0.txt'
    with open(file, 'rb') as f:
        data = pickle.load(f)
    layers = [
        'encoder',
        'decoder'
    ]
    cos_per_layer = {'encoder':[], 'decoder':[]}
    cos = data['train'][-2]['dot_products']
    fig, axes = plt.subplots(1, 2, figsize=(10,5), sharey=True)
    sns.set_style("whitegrid")
    for name, dot in cos.items():
        if 'encoder' in name:
            cos_per_layer['encoder'].append(np.array(dot))
        else:
            cos_per_layer['decoder'].append(np.array(dot))
    for i, layer in enumerate(layers):    
        dot = np.array(cos_per_layer[layer])
        dot = np.mean(dot, axis=0)
        step = 100
        mean_dot = [np.mean(dot[i*step:i*step+1000]) for i in range(int(len(dot)/step))]
        x = np.arange(len(mean_dot))
        sns.lineplot(x=x, y=mean_dot, ax=axes[i])
        title = 'Enc' if layer == 'encoder' else 'Dec'
        axes[i].set_title(title, fontsize=20)
        axes[i].tick_params(axis='both', which='major', labelsize=14)
        axes[i].tick_params(axis='both', which='minor', labelsize=14)
        axes[i].set_ylim(-1,1)
        '''
    axes[0].set_ylabel('Proportion', fontsize=16)   
    axes[1].set_ylabel('')
    fig.text(0.5, 0, r'$cos \phi_{ij}$', ha='center', fontsize=16)
    '''       
    fig.suptitle(get_acronym([model])[0], fontsize=24, ha='center', va='top')
    axes[0].set_ylabel(r'$cos \phi_{ij}$', fontsize=16)
    axes[1].set_ylabel('')
    fig.text(0.5, 0, 'Iteration' , ha='center', fontsize=16)
    plt.show()

def show_evolution_conflicting_gradients_per_layer(results_dir, dataset, model):
    file = f'{results_dir}/{dataset}/{dataset}_{model}_split0.txt'
    with open(file, 'rb') as f:
        data = pickle.load(f)
    layers = [
        'encoder',
        'decoder'
    ]
    cos_per_layer = {'encoder':[], 'decoder':[]}
    cos = data['train'][-2]['dot_products']
    fig, axes = plt.subplots(1, 1, figsize=(10,5))
    sns.set_style("whitegrid")
    for name, dot in cos.items():
        if 'encoder.rnn.weight_ih_l0' in name:  
            mean_dot = []
            for i in range(len(dot)-100):
                mean_dot.append(np.mean(dot[i:i+1000]))
            x = np.arange(len(mean_dot))
            sns.lineplot(x=x, y=mean_dot, ax=axes)
    plt.show()



In [None]:
results_dir = 'results/neurips2'
dataset = 'mooc_filtered'
model = 'gru_sahp_temporal_with_labels'

datasets = [
    'lastfm_filtered',
    'mooc_filtered',
    'stack_overflow_filtered',
    'hawkes_exponential_mutual'
]


models = [
    'gru_thp_temporal_with_labels',
    'gru_sahp_temporal_with_labels',
    'gru_mlp-cm_temporal_with_labels', 
    'gru_log-normal-mixture_temporal_with_labels',
    'rmtpp'
]

#DO LIKE WITH THE OTHER. 


show_evolution_conflicting_gradients_mean_module(results_dir, dataset, model)
#show_evolution_conflicting_gradients_per_layer(results_dir, dataset, model)