In [1]:
import os
project_root = os.pardir
import sys
src_dir = os.path.join(project_root, 'src')
sys.path.append(src_dir)

import updated_deg_flows as udf
from data import make_dataset_classes as dat
import matplotlib.pyplot as plt
import numpy as np



In [2]:
lfn = dat.LFN('1996-1997', show_info=False)
A, B = udf.UpdatedDegreeFlowMatrix(lfn, year=1997)



In [3]:
'''
Take the matrix of flows and create a new matrix where the rows
and columns now corresponds to logarithmically scalled bins.
'''

#create the bins
#set the logarithmic bin width
#nb, log bin width b defined such that log(x_i+1) = log(x_i) + b
#so x_i+1 = x_i*e^b
#and the width of bin (i, i+1) w = x_i*(e^b-1)
#so the width of a bin is proportional to its lower limit
b = 0.5
#create container of bin edges and set first two edges to 0 and 1
bin_edges = np.array([-0.1, 1])
#add further bin edges until the upper limit is greater than the
#largest value in the flow matrix
upper_limit = A.shape[0]
assert upper_limit > 1
#set current bin edge to 1
bin_edge = 1
while bin_edge <= upper_limit:
    bin_edge = bin_edge * np.exp(b)
    bin_edges = np.append(bin_edges, bin_edge)
    print(bin_edge)
    if len(bin_edges) > 100:
        print('Too many bins')
        break

1.6487212707
2.71828182846
4.48168907034
7.38905609893
12.1824939607
20.0855369232
33.1154519587
54.5981500331
90.0171313005
148.413159103
244.691932264
403.428793493
665.141633044


In [4]:
bin_edges

array([ -1.00000000e-01,   1.00000000e+00,   1.64872127e+00,
         2.71828183e+00,   4.48168907e+00,   7.38905610e+00,
         1.21824940e+01,   2.00855369e+01,   3.31154520e+01,
         5.45981500e+01,   9.00171313e+01,   1.48413159e+02,
         2.44691932e+02,   4.03428793e+02,   6.65141633e+02])

In [5]:
degs = [lfn.UpdatedDegree(node) for node in lfn.graph.nodes()]

In [6]:
plt.hist(degs, bins=bin_edges)
plt.yscale('log')
plt.xscale('log')

In [None]:
plt.show()