In [1]:
import os
project_root = os.pardir
import sys
src_dir = os.path.join(project_root, 'src')
sys.path.append(src_dir)

import updated_deg_flows as udf
from data import make_dataset_classes as dat
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
lfn = dat.LFN('1996-1997', show_info=False)
A, B = udf.UpdatedDegreeFlowMatrix(lfn, year=1997)
bin_edges = udf.CreateLogBins(b=0.5, upper_limit=A.shape[0])




In [39]:
'''
Take the matrix of flows and create a new matrix where the rows
and columns now corresponds to logarithmically scalled bins.
'''

#create new square matrix with one row/column for each bin
number_rows = len(bin_edges) - 1
#create dtype for matrix recording bins
Dlist = []
for i in range(0, len(bin_edges)-1):
    x = str(bin_edges[i]) + '-' + str(bin_edges[i+1])
    Dlist += [(x, 'i4')]
    
M = np.zeros(number_rows, dtype=np.dtype(Dlist))

def WhichBin(value, bin_edges):
    '''
    Find the bin appropriate for a value.
    '''
    k = 0
    assert value > bin_edges[k]
    while value > bin_edges[k]:
        k += 1
    #once value is smaller than the bin edge get the bin
    bin_lower = bin_edges[k-1]
    bin_upper = bin_edges[k]
    return str(bin_lower) + '-' + str(bin_upper)

for i in range(A.shape[0]):
    #get which bin i is in
    i_bin = WhichBin(i, bin_edges)
    for j in range(M.shape[0]):
        #get which bin j is in
        j_bin = WhichBin(i, bin_edges)
        A[i_bin]

In [40]:
WhichBin(1, bin_edges)

'-0.01-1.0'

In [30]:
#detemine which columns (/rows) are in each bin
#use the numpy histogram function to get the number in each bin
freq, bins = np.histogram(range(A.shape[0]), bin_edges)
#then go thro

[('-0.01-1.0', '<i4'), ('1.0-1.6487212707', '<i4'), ('1.6487212707-2.71828182846', '<i4'), ('2.71828182846-4.48168907034', '<i4'), ('4.48168907034-7.38905609893', '<i4'), ('7.38905609893-12.1824939607', '<i4'), ('12.1824939607-20.0855369232', '<i4'), ('20.0855369232-33.1154519587', '<i4'), ('33.1154519587-54.5981500331', '<i4'), ('54.5981500331-90.0171313005', '<i4'), ('90.0171313005-148.413159103', '<i4'), ('148.413159103-244.691932264', '<i4'), ('244.691932264-403.428793493', '<i4'), ('403.428793493-665.141633044', '<i4')]


In [None]:
#get degrees for degree histograms
degs = [lfn.UpdatedDegree(node) for node in lfn.graph.nodes()]

In [None]:
freq, bins = np.histogram(degs, bin_edges)
for i in range(len(freq)):
    freq[i] = freq[i]/float(bins[i+1])
s = 0
for i in range(len(bins)-1):
    x = bins[i+1] - bins[i]
    y = x*freq[i]
    s += y
freq = freq/float(s)
plt.bar(bins[:-1], freq, label='Log Bins', alpha=0.5, color='green')
plt.hist(degs, bins=100, label='Linear Bins', alpha=0.3, color='blue', normed=True)
plt.yscale('log')
plt.xscale('log')
plt.legend()
plt.title('Histogram of updated degrees of LFN. \n Log Bins Divided By Upper Bin Limit.')
plt.savefig(os.path.join(project_root, 'reports', 'figures', 'Histogram of updated degrees of LFN_logscale_bins_normalised.png'))

In [None]:
lh = plt.hist(degs, bins=bin_edges, label='Log Bins', alpha=0.5)
plt.hist(degs, bins=100, label='Linear Bins', alpha=0.5)
plt.yscale('log')
plt.xscale('log')
plt.title('Histogram of updated degrees of LFN.')
plt.legend()
plt.savefig(os.path.join(project_root, 'reports', 'figures', 'Histogram of updated degrees of LFN_logscale_bins'))