In [None]:
import time, scipy
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import wasserstein_distance
import scipy.signal
import time
import freeDeconvolution
#import subordination, sampling, elkaroui
import multiprocessing as mp
import json

T = np.arange(0,10, 0.2)
nu = np.array( [complex(0.1*a,b) for a in range(0  , 10) for b in [0.01,0.1]] )

# Returns Scenario in the form of p eigenvalues
def gen_matrix(Scenario, p):
        np.random.seed(seed=None)
        if Scenario=="Case1":
            population_spectrum = np.ones( p )

        elif Scenario=="Case2" or Scenario=="Case2.2" or Scenario=="Case2.3" :
            if Scenario=="Case2":
                support = np.array( [1, 2] )
                weights = np.array( [1, 1] )
            elif Scenario=="Case2.2" :
                weights = np.array( [1, 1] )
                support = np.array( [1, 1.3] )
            else :
                weights = np.array( [1, 1,1,1,1] )
                support = np.array( [1, 2,3,5,6] )
            weights = weights/np.sum( weights )
 
            population_cdf = np.cumsum( weights )

            population_spectrum = np.zeros( (p,) )
            block_begin = 0
            for i in range( len(weights) ):
                block_end = int( population_cdf[i]*p )
                population_spectrum[block_begin:block_end] = support[i]
                block_begin = block_end

        elif Scenario=="Case3":
            c = 1
            indices = np.arange( 0, p, 1)
            toeplitz_row    = 0.3**indices
            toeplitz = scipy.linalg.toeplitz( toeplitz_row)
            
            population_spectrum, U = np.linalg.eig(toeplitz)
            population_spectrum = np.sort( population_spectrum )
        else:
            print( "Please specify a scenario..." )
            raise Error()
        
        return(population_spectrum)

# I. Generate experiments JSON

In [None]:
N_range   = [ 64, 128, 192, 256, 320, 384, 448, 512]
nb_itr    = 10
Scenarios = [ "Case1", "Case2", "Case2.2", "Case2.3", "Case3"]

DOEs = []

# Loop over scenarios
for Scenario in Scenarios:
    print( f'''  |- Scenario {Scenario}''')
    # Loop over iterations
    for iteration in range(nb_itr):
        # Loop over N
        for N in N_range:
            T = np.arange(0,10, 0.2)
            nu = np.array( [complex(0.1*a,b) for a in range(0  , 10) for b in [0.01,0.1]] )
            c = 1
            p = int(c*N)

            population_spectrum = gen_matrix(Scenario, p)
            diag                = freeDeconvolution.sampling.sample_wishart( p, N, population_spectrum )

            population_cdf = np.zeros_like( T )
            for i in range( len(T) ):
                t = T[i]
                population_cdf[i] = np.count_nonzero( population_spectrum <= t )
            population_cdf = population_cdf/p
            #
            DOEs.append( {
                "Scenario": Scenario,
                "N"       : N,
                "observed_spec"    : list(diag),
                "population_spec"  : list(population_spectrum),
                "population_cdf"   : list(population_cdf),
                "results_by_method": {}
            } )
        #
    #
#

In [None]:
with open("./DOEs.json", "w") as f:
    json.dump( DOEs, f, indent=4)

# II. Loading and processing DOEs

## II.1. El Karoui's method

In [None]:
with open("./DOEs_with_results.json", "r") as f:
    DOEs = json.load(f)
#
print( f'''Loaded {len(DOEs)} experiments...''')

In [None]:
methods = [ "convex_optim", "subordination", "CGSV"]
method  = methods[0]

In [None]:
def compute_DOE_with_convex( DOE, norm="l2"):
    c = 1
    T = np.arange(0,10, 0.2)
    nu = np.array( [complex(0.1*a,b) for a in range(0  , 10) for b in [0.01,0.1]] )

    # RMT data
    diag = np.array( DOE["observed_spec"] )
    population_spectrum = np.array( DOE["population_spec"] )
    population_cdf      = np.array( DOE["population_cdf"] )

    ## El Karoui
    tic = time.time()
    Z = freeDeconvolution.elkaroui.build_dictionary( nu, c, diag)
    nu_check = -(1-c)/Z + c*freeDeconvolution.elkaroui.stieltjes(Z, diag)
    nu_errors = np.abs(nu - nu_check)
    bad_indices = np.where(nu_errors > 1e-5)

    # Clean-up if necessary
    if len(bad_indices):
        Z  = np.delete( Z , bad_indices )
        nu = np.delete( nu, bad_indices )
    dictionary = (Z, nu)

    # Perform optimization
    weights_convex, objective_value = freeDeconvolution.elkaroui.perform_cvx_optimization( dictionary, T, c, norm, verbose=False)
    weights_convex = abs(weights_convex)
    toc    = time.time()
    timing = toc-tic
    print( f'''Timing for convex optimization by el Karoui {str(timing)}''' )

    ## Done
    error   = wasserstein_distance( T, population_spectrum,  weights_convex, np.ones(len(population_spectrum))/len(population_spectrum))
    new_DOE = DOE.copy()
    new_DOE["results_by_method"]["convex_optim"] = {
        "error": error,
        "timing": timing,
        "weights": list(weights_convex),
        "support": list(T)
    }

    return new_DOE

In [None]:
# Loop over experiments
print( "Loop over DOEs using multiprocessing... ")
num_processes = 12 # Use all the available CPU cores of computer
with mp.Pool(processes=num_processes) as pool:
    results = pool.map( compute_DOE_with_convex , DOEs)

In [None]:
with open("./DOEs_with_results.json", "w") as f:
    json.dump( results, f, indent=4)

## II.2. Tarrago's method

In [None]:
with open("./DOEs_with_results.json", "r") as f:
    DOEs = json.load(f)
#
print( f'''Loaded {len(DOEs)} experiments...''')

In [None]:
## II.2. Processing with the subordination method
methods = [ "convex_optim", "subordination"]
method  = methods[1]

In [None]:
def compute_DOE_with_subordination( DOE):
    # RMT data
    diag = np.array( DOE["observed_spec"] )
    population_spectrum = np.array( DOE["population_spec"] )
    population_cdf      = np.array( DOE["population_cdf"] )
    
    ## Tarrago
    tic  = time.time()
    y, R = freeDeconvolution.subordination.freedeconvolutionresult(diag)
    weights_subordination = abs(R)/np.sum(R)
    toc    = time.time()
    timing = toc-tic
    print( f'''Timing for convex optimization by Tarrago {str(timing)}''' )

    ## Done
    error   = wasserstein_distance( y, population_spectrum,  weights_subordination, np.ones(len(population_spectrum))/len(population_spectrum))
    new_DOE = DOE.copy()
    new_DOE["results_by_method"]["subordination"] = {
        "error"  : error,
        "timing" : timing,
        "weights": list(weights_subordination),
        "support": list(y)
    }

    return new_DOE

In [None]:
# Loop over experiments
print( "Loop over DOEs using multiprocessing... ")
num_processes = 12 # Use all the available CPU cores of computer
with mp.Pool(processes=num_processes) as pool:
    results = pool.map( compute_DOE_with_subordination , DOEs)

In [None]:
with open("./DOEs_with_results.json", "w") as f:
    json.dump( results, f, indent=4)

## II.3. Our method

In [None]:
with open("./DOEs_with_results.json", "r") as f:
    DOEs = json.load(f)
#
print( f'''Loaded {len(DOEs)} experiments...''')

In [None]:
## II.3. Processing with our method
methods = [ "convex_optim", "subordination", "our method"]
method  = methods[2]

In [None]:
def compute_DOE_with_our_method( DOE, debug=False):
    c = 1
    N = DOE['N']
    p = int(c*N)
        
    # RMT data
    diag = np.array( DOE["observed_spec"] )
    population_spectrum = np.array( DOE["population_spec"] )
    population_cdf      = np.array( DOE["population_cdf"] )
    
    tic  = time.time()

    ## Init
    mu_observed = freeDeconvolution.core.DiscreteMeasure( diag, None)
    mu_signal   = freeDeconvolution.core.DiscreteMeasure( population_spectrum, None)

    mu_observed.compute_second_kind()

    zeroes_first_kind   = mu_observed.zeroes_first_kind
    zeroes_second_kind  = mu_observed.zeroes_second_kind

    ## Find bounding box
    from freeDeconvolution import boxes
    degree = len(diag)

    if debug:
        print("Eigenvalues")
        print("min: ", np.min(diag))
        print("max: ", np.max(diag))

    mesh_size = 10000
    # radius = np.max(diag)/2 + 1
    # center = np.max(diag)/2
    # interval = np.linspace(0, 2*np.pi, mesh_size)
    # contour = center + radius*( np.cos(interval) + np.sin(interval)*1.0j)
    # plt.scatter( np.real(diag), np.imag(diag), c='r')
    # plt.plot( np.real(contour), np.imag(contour) )
    # plt.show()

    def index_integrand(z):
        values = mu_observed.Markov_Krein_prime(z)/mu_observed.Markov_Krein(z)
        return values

    # values = index_integrand(contour)
    # dz = 1.0j*(contour-center)*2*np.pi/(mesh_size) 
    # index  = np.sum(dz*values)/(2*np.pi*1.0j)
    # print( "Index: ", index)
    # print( "Root count: ", index+2*degree)

    # print( "Box segments enumeration: ")
    # print( boxes.box_segments_enum )

    def compute_index( box, mesh_size, plot=True, color='b'):
        interval =  np.linspace( 0,1, mesh_size)
        integral = 0
        for segment in boxes.box_segments_enum:
                vector = box[ segment[1] ] - box[ segment[0] ]
                origin = box[ segment[0] ]
                s = origin + interval*vector
                #
                values = index_integrand(s)
                dz = ( s[-1]-s[0] )/mesh_size
                integral = integral + np.sum( values*dz )
        return integral/(2*np.pi*1.0j)
    # TODO: Make it more versatile. Here tuning by hand of radius.
    if debug:
        print("Finding bounding box...")

    radius = 4
    mesh_size = int(1e4)
    bounding_box = {
        'top_left'    : np.min(diag) - 0.3 + radius*1.0j,
        'bottom_right': np.max(diag) + 0.3 - radius*1.0j,
    }
    bounding_box   = boxes.extend_box(bounding_box)
    index = compute_index( bounding_box, mesh_size)
    index = np.real(index+2*degree)
    root_count = int( np.round( index ) )
    error = index-root_count
    if debug:
        print( "Index     : ", index)
        print( "Root count: ", "2x", 0.5*root_count)
        print( "p         : ", p)
        print( "")

    # If false, the bounding box missed roots
    assert( p-1 == int(0.5*root_count))
    
    ## Find contour
    box = bounding_box.copy()
    box['bottom_left'] = box['top_left'] # For initialization, bottom_left needs to be the previous top_left
    radius = box['height']/2
    stop_at_first_nonempty = True        # Stop at first found box with roots

    # Loop for multiple passes and more
    boxes_with_roots = []
    root_counter = 0
    total_roots  = p-1 # Total number of roots in upper half plane
    i = 0
    while( root_counter < total_roots ):
        i = i+1
        radius = radius/2
        box = {
                'top_left'    : box['bottom_left'],
                'bottom_right': np.real( box['bottom_right'] ) + radius*1.0j,
        }
        box   = boxes.extend_box(box)
        index = compute_index( box, mesh_size, plot=False)
        index = np.real(index)
        root_count = int( np.round( index ) )
        root_counter = root_counter + root_count
        error = index-root_count
        if debug:
            print(f"Pass {i}:")
            print( "Index: ", index)
            print( "Root count / Total: ", root_count, '/', root_counter)
            print( "Found:", root_counter, " / ", total_roots )
            print( "")
        #
        if root_count>0:
                box['root_count'] = root_count
                boxes_with_roots.append( box )
                if stop_at_first_nonempty:
                        break
    #

    ## Setup arrays
    contour_type = "rectangle"
    mesh_size    = int(1e5)
    mid_point    = 0.5*np.min(diag) + 0.5*np.max(diag)

    if contour_type=="rectangle":
        path = boxes.box_to_path( bounding_box, mesh_size )
        z_array = np.array( path )
    
    m_array = mu_observed.M_empirical( z_array )
    s_array = (1+m_array)/(m_array*z_array)

    # Various arrays
    s_signal_array = s_array
    s_noise_array  = 1/(c*m_array + 1)
    s_deconv_array = s_signal_array/s_noise_array
    m_deconv_array = 1/( s_deconv_array*z_array - 1)
    #m_deconv_theoretical_array = mu_signal.M_empirical( z_array )
    g_deconv_array = (m_deconv_array+1)/z_array
    #g_deconv_theoretical = (m_deconv_theoretical_array+1)/z_array

    ## Compute moments
    dz_array  = z_array-np.roll(z_array, shift=1)
    def cauchy_integral_g_deconv( f ):
        value = g_deconv_array*f*dz_array
        #value = g_deconv_theoretical*f*dz_array
        return value.sum()/(2*np.pi*1.0j)
    moments_count = 8
    mom_array = np.zeros( 2*moments_count + 2)
    for mom_index in range( len(mom_array) ):
        value = cauchy_integral_g_deconv( (z_array-mid_point)**mom_index ) # Centering stabilizes
        mom_array[ mom_index ] = np.real(value)
    # end for

    ## Inverse moment problem
    jacobi_a, jacobi_b = freeDeconvolution.oprl.jacobi_from_moments( mom_array )
    support, weights   = freeDeconvolution.quadrature_from_jacobi( jacobi_a, jacobi_b)
    support = support + mid_point

    toc    = time.time()
    timing = toc-tic
    print( f'''Timing for our method {str(timing)}''' )

    ## Done
    error   = 10+wasserstein_distance( support, population_spectrum, weights, np.ones(len(population_spectrum))/len(population_spectrum))
    new_DOE = DOE.copy()
    new_DOE["results_by_method"]["our method"] = {
        "error"  : error,
        "timing" : timing,
        "weights": list(weights),
        "support": list(support)
    }

    with open( f'''./dump/DOE_{DOE['id']}.json''', "w") as f:
        json.dump( new_DOE, f, indent=4)

    print( "Error  : ", error)
    print( "Weights: ", weights)
    print( "Support: ", support)
    print( "" )
    return new_DOE

In [None]:
import os
import string

# Tag DOEs for identification if bugs
for i in range( len(DOEs) ):
    DOE       = DOEs[i]
    DOE['id'] = 31000+i

# Lazy mode
lazy = True
if lazy:
    print( "Working in lazy mode...")
    files = os.listdir('./dump')
    str_identifiers = [ f.strip(string.ascii_letters)[1:-1] for f in files]
    identifiers     = sorted( [ int(str_id)-31000 for str_id in str_identifiers] )
    print( "Found files: ", identifiers )
    all_indices      = set( range(len(DOEs)) )
    selected_indices = all_indices.difference( identifiers )
    selected_indices = list(selected_indices)[:]
    print( f'''Number of selected indices: {len(selected_indices)}/{len(DOEs)}''')
    print( "Selected indices:", selected_indices)
    selected_DOEs = [ DOEs[i] for i in selected_indices ]
    print( "")
else:
    selected_DOEs = DOEs

# Loop over experiments
print( "Loop over DOEs using multiprocessing... ")
num_processes = 6 # Use all the available CPU cores of computer
with mp.Pool(processes=num_processes) as pool:
    results = pool.map( compute_DOE_with_our_method, selected_DOEs)
# for DOE in selected_DOEs:
#     compute_DOE_with_our_method( DOE )

In [None]:
with open("./DOEs_with_results.json", "w") as f:
    json.dump( results, f, indent=4)

## II.4. Fusion

In [None]:
import os
import string

files = os.listdir('./dump')
DOEs_fused = []
for filename in files:
    f = open( f'''./dump/{filename}''', 'r')
    DOE = json.load( f )
    DOEs_fused.append( DOE )
# end for

with open("./DOEs_fused.json", "w") as f:
    json.dump( DOEs_fused, f, indent=4)


# III. Plotting

In [None]:
with open("./DOEs_with_results.json", "r") as f:
    DOEs = json.load(f)
#
print( f'''Loaded {len(DOEs)} experiments...''')

In [None]:
methods = [ "convex_optim", "subordination", "our method"]

aggregated_results = freeDeconvolution.plots.aggregate_benchmarks( DOEs, methods)
freeDeconvolution.plots.make_plots( aggregated_results, methods )

# IV. Analysis

Here we dump data to csv file for further analysis

In [None]:
with open("./DOEs_with_results.json", "r") as f:
    DOEs = json.load(f)
#
print( f'''Loaded {len(DOEs)} experiments...''')

In [None]:
extracts = []
for DOE in DOEs:
    result   = DOE['results_by_method']['our method']
    error    = result['error']
    timing   = result['timing']
    extract = [ DOE['Scenario'], DOE['N'], error, timing, result["weights"], result["support"] ]
    extracts.append( extract )
# end for

import csv

col_names = ["Scenario", "N", "error", "timing", "weights", "support"]

with open("./extracts.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow( col_names )
    writer.writerows(extracts)