In [2]:
import os
os.chdir('/home/gebhart/projects/rfunklab/network_structure')
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from ast import literal_eval as make_tuple
from network_structure.homology_parser import *
import persim

In [3]:
graph_type = 'knowledge'
collab_data_loc = '/home/gebhart/projects/rfunklab/data/temporal/aps/1_1/collaboration/homology'
knowl_data_loc = '/home/gebhart/projects/rfunklab/data/temporal/aps/1_1/knowledge/homology'

In [4]:
def interval_to_numpy(interval):
    if len(interval) > 1:
        return np.array([list(ival) if len(ival) > 1 else [ival[0],np.inf] for ival in interval])            
    return np.array([])

In [5]:
def plot_diagram(diagram, title='', lims=None):
    fig, ax = plt.subplots()
    if diagram.size > 0:
        d = diagram[~np.isinf(diagram[:,1])]
        if d.shape[0] > 0:
            ax.scatter(d[:,0], d[:,1], s=25, c=d[:,1]**2 - d[:,0], cmap=plt.cm.coolwarm, zorder=10)
            if lims is None:
                lims = [
                    np.min(d[:,0]-1),  # min of both axes
                    np.max(d[:,1]+1),  # max of both axes
                ]

            # now plot both limits against eachother
            ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
            ax.set_xlim(lims)
            ax.set_ylim(lims)
    ax.set_aspect('equal')
    plt.xlabel('Birth')
    plt.ylabel('Death')
    plt.title(title)

In [7]:
files = [f for f in os.listdir(knowl_data_loc) if os.path.isfile(os.path.join(knowl_data_loc, f))]
files.sort()

In [None]:
doi = 1
npints = []
names = []
bettis = []
cell_counts = []
for file in files:
    yr = file[:4]
    print(yr)
    try:
        knowl_intervals = parse_intervals(os.path.join(knowl_data_loc, file))
        knowl_bettis = parse_betti(os.path.join(knowl_data_loc,file))
        knowl_cell_counts = parse_cell_counts(os.path.join(knowl_data_loc,file))
    except IndexError:
        continue
    names.append(yr)
    print('Betti Numbers: ', knowl_bettis)
    print('Cell Counts: ', knowl_cell_counts)
    bettis.append(knowl_bettis)
    cell_counts.append(knowl_cell_counts)
    ints = []
    for i in range(len(knowl_intervals)):
        knowl_npint = interval_to_numpy(knowl_intervals[i])
        ints.append(knowl_npint)
        if i == doi:
            if knowl_npint.size > 0:
                plot_diagram(knowl_npint, title='{} Knowledge Dimension {}'.format(yr, i))
            plt.show()
    npints.append(ints)
    

In [None]:
doi = 1
diags = []
for i in range(len(npints)):
    npint = npints[i]
    if len(npint) > doi:
        npdoi = npint[doi]
        diags.append(npdoi)
    else:
        diags.append(np.array([]))

In [None]:
yr_ref = 8
bottlenecks = []
wassersteins = []
heats = []
for i in range(yr_ref,len(diags)):
    print('{}/{}'.format(i,len(diags)))
    if diags[i].size > 0:
        diags[i][diags[i] == np.inf] = 50
        bottlenecks.append(persim.bottleneck(diags[yr_ref], diags[i]))
        wassersteins.append(persim.sliced_wasserstein(diags[yr_ref], diags[i]))
#         heats.append(persim.heat(diags[yr_ref], diags[i]))
    else:
        bottlenecks.append(-1)
        wassersteins.append(-1)
#         heats.append(-1)

In [None]:
plt.plot(names[yr_ref:], bottlenecks)
plt.xticks(rotation=60)
plt.ylabel('Bottleneck Distance')
plt.xlabel('Year')
plt.title('Bottleneck Distance ({} Reference)'.format(names[yr_ref]))

In [None]:
plt.plot(names[yr_ref:], wassersteins)
plt.xticks(rotation=60)
plt.ylabel('Wasserstein Distance')
plt.xlabel('Year')
plt.title('Wasserstein Distance ({} Reference)'.format(names[yr_ref]))

In [None]:
# plt.plot(names[yr_ref:], heats)
# plt.xticks(rotation=60)
# plt.ylabel('Heat Kernel Distance')
# plt.xlabel('Year')
# plt.title('Heat Kernel Distance ({} Reference)'.format(names[yr_ref]))

In [None]:
for d in range(4):
    plt.plot(names, [b[d] for b in bettis])
    plt.xticks(rotation=60)
    plt.ylabel('Betti_{}'.format(d))
    plt.xlabel('Year')
    plt.title('Betti Number Dimension {}'.format(d))
    plt.show()

In [None]:
for d in range(9):
    plt.plot(names, [c[d] for c in cell_counts])
    plt.xticks(rotation=60)
    plt.ylabel('Cell Count'.format(d))
    plt.xlabel('Year')
    plt.title('Cell Count Dimension {}'.format(d))
    plt.show()

In [None]:
# for file in files[:1]:
#     yr = file[:4]
#     print(yr)
    
#     collab_intervals = parse_intervals(os.path.join(collab_data_loc, file))
#     collab_bettis = parse_betti(os.path.join(collab_data_loc,file))
#     collab_cell_counts = parse_cell_counts(os.path.join(collab_data_loc,file))
#     collab_euler_characteristic = parse_euler_characteristic(os.path.join(collab_data_loc,file))
    
#     knowl_intervals = parse_intervals(os.path.join(knowl_data_loc, file))
#     knowl_bettis = parse_betti(os.path.join(knowl_data_loc,file))
#     knowl_cell_counts = parse_cell_counts(os.path.join(knowl_data_loc,file))
#     knowl_euler_characteristic = parse_euler_characteristic(os.path.join(knowl_data_loc,file))
    
#     print(collab_bettis, knowl_bettis)
#     print(collab_cell_counts, knowl_cell_counts)
    
#     for i in range(min(len(collab_intervals), len(knowl_intervals))):
#         collab_npint = interval_to_numpy(collab_intervals[i])
#         knowl_npint = interval_to_numpy(knowl_intervals[i])
#         if collab_npint.size > 0:
#             plot_diagram(collab_npint, title='{} Collaboration Persistence Diagram Dimension {}'.format(yr, i))
#         if knowl_npint.size > 0:
#             plot_diagram(knowl_npint, title='{} Knowledge Persistence Diagram Dimension {}'.format(yr, i))

In [None]:
# for i in range(len(collab_intervals)-1):
#     print(persim.bottleneck(interval_to_numpy(collab_intervals[i]), interval_to_numpy(knowl_intervals[i])))