In [None]:
import numpy as np
import matplotlib.pyplot as plt
import random
from tqdm import tqdm
import os
import matplotlib.font_manager as font_manager
import copy
from scipy.stats import linregress
from matplotlib.ticker import LogLocator, MultipleLocator, MaxNLocator
title_font = font_manager.FontProperties(family='Times New Roman', size=16, weight='normal')
label_font = font_manager.FontProperties(family='Times New Roman', size=16, weight='bold')
legend_font = font_manager.FontProperties(family='Times New Roman', size=14, weight='normal')
legend_small_font = font_manager.FontProperties(family='Times New Roman', size=12, weight='normal')

size_marker=90
special_tick_size=15

#output ticks (base10)
def tranform_ticks_log(log_value_min,log_value_max,max_to_show=3):
    if(log_value_max-log_value_min<=1.3):
        return [10**int((log_value_min+log_value_max)/2)]
    seperate=list(range(int(log_value_min),int(log_value_max)+1))
    if(len(seperate)<=max_to_show):
        ss=seperate
    else:
        add=int((log_value_max-log_value_min)/max_to_show)+1
        z=int(log_value_min)
        ss=[]
        while(len(ss)<max_to_show):
            ss.append(z)
            z+=add
    ticks=[]
    for s in ss:
        ticks.append(10**s)
    return ticks

#output ticks (base2)
def tranform_ticks_log2(log_value_min,log_value_max,max_to_show=3):
    seperate=list(range(int(log_value_min),int(log_value_max)+1))
    if(len(seperate)<=max_to_show):
        ss=seperate
    else:
        add=int((log_value_max-log_value_min)/max_to_show)+1
        z=int(log_value_min)
        ss=[]
        while(len(ss)<max_to_show):
            ss.append(z)
            z+=add
    ticks=[]
    for s in ss:
        ticks.append(10**s)
    return ticks



def partitionGroundTruthSamples(groundTruthSamples,seperation=20):
    """
    This function divide empirical distribution into seperation=20 intervals,
    each interval has equal samples.
    """
    
    interpoints=[] #
    groundTruthSamples.sort()
    for _ in range(1,seperation):
        rank=int((_/seperation)*len(groundTruthSamples))
        interpoints.append(groundTruthSamples[rank])
    return interpoints

def KL_div_calGivenInterpoints(interpoints,samples,groundTruthSamples):
    """
    this function calculate KL-divergence between samples and ground truth given interpoints
    """
    P=[] # Discrite distribution for samples in given interpoints
    Q=[] # Discrite distribution for samples in groundTruthSamples
    interpoints=list(interpoints)
    interpoints=[-float('inf')]+interpoints+[float('inf')]
    for index in range(len(interpoints)-1):
        up_interval=interpoints[index+1]
        down_interval=interpoints[index]
        count = len([x for x in samples if down_interval <= x < up_interval])
        P.append(count/len(samples))
        count = len([x for x in groundTruthSamples if down_interval <= x < up_interval])
        Q.append(count/len(groundTruthSamples))
    return basic_module_for_KL(P,Q)
def basic_module_for_KL(P,Q):
    """
    Input：two discrite probability distribution 
    Return：KL_divergence"""
    assert(len(P)==len(Q))
    kl=0
    for index in range(len(P)):
        if(P[index]!=0):
            kl+=P[index]*np.log(P[index]/Q[index])
    return kl

# Scaling relationships fitting

In [None]:
N_list=100*2**np.array(range(2,10))
#N_list=[400,565,800,1131,1600,2262,3200,4525,6400,9050,12800,18101,25600,36203,51200]
print(N_list[:1])

In [None]:
# data processing: read node lifetime file and output network lifetime
# network lifetime is the time when 20% nodes dead.

alpha=0.8 # network lifetime is the time when fraction of living nodes larger than alpha=80%
for N in N_list:
    file_name_to_write='./net_percentile_lifetime/PercentileLife_N='+str(N)+'.txt'
    f=open(file_name_to_write,'w')
    f.close()
    id_count=0
    file_name_to_read='./node_lifetime_log/N='+str(N)+'/nodeLife_N='+str(N)+"_id="+str(id_count)+'.txt'
    while(os.path.exists(file_name_to_read)):
        node_lifetime=[]
        f=open(file_name_to_read,'r')
        line=f.readline()
        while(line!=''):
            node_lifetime.append(int(line))
            line=f.readline()
        f.close()
        node_lifetime.sort()
        net_life=node_lifetime[int(alpha*N)]
        file_name_to_write='./net_percentile_lifetime/PercentileLife_N='+str(N)+'.txt'
        f=open(file_name_to_write,'a')
        f.write(str(net_life)+"\n")
        f.close()    
        id_count+=1
        file_name_to_read='./node_lifetime_log/N='+str(N)+'/nodeLife_N='+str(N)+"_id="+str(id_count)+'.txt'

In [None]:
#Read NetLife and demonstrate their distribution, mean value and standard deviation
N_list=100*2**np.array(range(2,10))
#N_list=[400,565,800,1131,1600,2262,3200,4525,6400,9050,12800,18101,25600,36203,51200]

std_all_list=[]
mean_all_list=[]
for N in N_list:
    net_lifetime_list=[]
    file_PercentileNetLife='./net_percentile_lifetime/PercentileLife_N='+str(N)+'.txt'
    f=open(file_PercentileNetLife,'r')
    line=f.readline()
    while(line!=''):
        net_lifetime_list.append(int(line))
        line=f.readline()
    f.close()
    net_lifetime_list=net_lifetime_list[:150]
    plt.hist(net_lifetime_list,bins=20)
    std_all_list.append(np.std(net_lifetime_list))
    mean_all_list.append(np.mean(net_lifetime_list))
    plt.title("N="+str(N))
    plt.xlabel("Lifetime",)
    plt.ylabel("Distribution",)
    plt.show()

plt.scatter(N_list,std_all_list)
#plt.plot(N_list,500*np.array(N_list)**(-0.4))
plt.xscale('log')
plt.yscale('log')
plt.ylabel('Standard Variation')
plt.xlabel("System size N")
plt.show()

plt.scatter(N_list,mean_all_list)
#plt.plot(N_list,500*np.array(N_list)**(-0.4))
plt.xscale('log')
#plt.yscale('log')
plt.ylabel('mean value')
plt.xlabel("System size N")
plt.show()

# Scaling law

In [None]:
from scipy.optimize import minimize
from scipy.integrate import quad

In [None]:
line_width_marker_set=2
dpi_set=1000
max_tick_num=4
num_train=5
L_list_train=N_list[:num_train]
L_list_predict=N_list[num_train:]
mean_list=mean_all_list[:num_train]
std_list=std_all_list[:num_train]

mean_predict_list=mean_all_list[num_train:]
std_predict_list=std_all_list[num_train:]
L_show_list=np.array(list(L_list_train)+list(L_list_predict))


x_to_train=np.array(L_list_train,dtype='float')
y_to_train=mean_list
x_to_predict=np.array(L_list_predict,dtype='float')
y_to_predict=np.array(mean_predict_list)

delta=1 #——————————————————————————————参数： 收敛速率delta
power_for_weight_of_size=2  #——————————————————误差权重中 规模的幂次
renormalized_x_to_train=x_to_train/np.sum(x_to_train)

def target_function(x,y0,c1):
    return y0+c1*x**(-delta)
def loss_SQ(parms):
    y0=parms[0]
    c1=parms[1]
    y_predict_list=y0+c1*x_to_train**(-delta)
    res=y_predict_list-y_to_train
    res=res*renormalized_x_to_train**power_for_weight_of_size/np.sum(renormalized_x_to_train**power_for_weight_of_size)
    return np.sum(res**2)

initial_guess=[0,1]
result = minimize(loss_SQ, initial_guess, method='BFGS')

params=result.x
best_mean_limit,c1=params #parameters fitted from small-scale network lifetime data

plt.scatter(L_list_train,np.abs(best_mean_limit-mean_list),label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
plt.scatter(L_list_predict,np.abs(best_mean_limit-mean_predict_list),label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)
x_to_show=np.linspace(np.min(x_to_train),np.max(x_to_predict),1000)
y_predict=target_function(x_to_show,best_mean_limit,c1)
plt.plot(x_to_show, np.abs(y_predict-best_mean_limit), color='red', label='Fit',linestyle='--')
r_value=np.corrcoef(x_to_show,np.abs(y_predict-best_mean_limit))[0][1]
print("R2="+str(round(r_value**2,3)))
plt.xscale('log')
plt.yscale('log')
#ticks=tranform_ticks_log(np.min(value_list),np.max(value_list),max_tick_num)
#ticks=[0.2,0.3,0.4,0.6]
#plt.yticks(ticks)  

plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
plt.ylabel(r"$<t_{\infty}>-<t_N>$",fontproperties=label_font)
#plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
plt.legend(prop=legend_font)
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
filename='Mean_loglog.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

x_min_to_show=((np.min(mean_list)-best_mean_limit)/c1)**(1/(-delta))


x_show_list=np.linspace(x_min_to_show,L_list_predict[-1],100)

plt.scatter(L_list_train,mean_list,label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
plt.scatter(L_list_predict,mean_predict_list,label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)

plt.plot(x_show_list, target_function(x_show_list,best_mean_limit,c1), color='red', label='Fit',linestyle='--')
plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
plt.ylabel(r"$<t_N>$",fontproperties=label_font)
#plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
plt.legend(prop=legend_font)

ax = plt.gca()
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
filename='Mean_predict.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

plt.scatter(L_list_train,std_list,label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
plt.scatter(L_list_predict,std_predict_list,label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)

x_list=np.log(L_list_train)
y_list=np.log(std_list)
slope=-1/2
intercept=np.mean(y_list)-slope*np.mean(x_list)

x_show_list=np.log(np.array(L_show_list))
y_show_list=slope * x_show_list + intercept

alpha=copy.deepcopy(slope) #——————————————————————————————————第三个参数： 缩放参数alpha
regression_line = slope * x_list + intercept
#plt.plot(np.exp(x_list), np.exp(regression_line), color='red', label=str(round(np.exp(intercept),4))+r'$N^{{{}}}$'.format(round(slope,4)))
plt.plot(np.exp(x_show_list), np.exp(y_show_list), color='red', label="Fit",linestyle='--')
print(str(round(np.exp(intercept),4))+r'$N^{{{}}}$'.format(round(slope,4)))

plt.xscale('log')
plt.yscale('log')

#value_list=np.abs(best_mean_limit-mean_list)
#ticks=tranform_ticks_log(np.min(value_list),np.max(value_list),max_tick_num)
#plt.yticks(yticks)  

plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
plt.ylabel(r"Standard deviation $\chi_t(N)$",fontproperties=label_font)
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
#plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
plt.legend(prop=legend_font)
filename='std_predict.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

marker_list=['^','v','>','<','o','d','*','1','2','3','4','+','x','|','_']
count_L=0
for L in np.array(list(L_list_train)):
    marker=marker_list[count_L]
    count_L+=1
    N=L
    lifetime_data=[]
    file_name='./net_percentile_lifetime/PercentileLife_N='+str(L)+'.txt'
    f=open(file_name,'r')
    line=f.readline()
    while(line!=''):
        lifetime_data.append(float(line))
        line=f.readline()
    f.close()
    lifetime_data=lifetime_data[:150]
    min_life=np.min(lifetime_data)
    max_life=np.max(lifetime_data)
    cut_list=np.linspace(min_life,max_life,20)
    mid_list=[]
    distri_value_list=[]
    for index_cut in range(len(cut_list)-1):
        down=cut_list[index_cut]
        up=cut_list[index_cut+1]
        mid_list.append(((up+down)/2))
        distri_value_list.append(np.sum((lifetime_data>down)&(lifetime_data<up))/(len(lifetime_data)*(up-down)))
    distri_value_list=np.array(distri_value_list)
    mid_list=np.array(mid_list)
    plt.scatter((mid_list-np.mean(lifetime_data))/N**alpha,distri_value_list*N**alpha,label='N='+str(N),marker=marker,s=size_marker)
    plt.plot((mid_list-np.mean(lifetime_data))/N**alpha,distri_value_list*N**alpha,linewidth=1)
ax = plt.gca()
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
#plt.xticks([-50,0,50])
#plt.xlim(-100,150)
plt.legend(loc='upper right',prop=legend_small_font)
plt.xlabel("Rescaled Deviation "+r"$(t_N-<t_N>)$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
plt.ylabel("Rescaled Probability "+r"$P(t)/$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size) 
filename='universalDistribution.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

print('c1='+str(round(c1,4))+' delta='+str(round(delta,4))+' alpha='+str(round(-alpha,4)))

In [None]:
print("t_infinity="+str(best_mean_limit))

In [None]:
#The rescaled samples belong to one universal distribution

rescaled_lifetime_dict=dict()
all_scaled_samples=[]
for L in L_list_train:
    N=L
    lifetime_data=[]
    file_name='./net_percentile_lifetime/PercentileLife_N='+str(L)+'.txt'
    f=open(file_name,'r')
    line=f.readline()
    while(line!=''):
        lifetime_data.append(float(line))
        line=f.readline()
    f.close()
    lifetime_data=lifetime_data[:150]
    min_life=np.min(lifetime_data)
    max_life=np.max(lifetime_data)
    cut_list=np.linspace(min_life,max_life,20)
    mid_list=[]
    distri_value_list=[]
    for index_cut in range(len(cut_list)-1):
        down=cut_list[index_cut]
        up=cut_list[index_cut+1]
        mid_list.append(((up+down)/2))
        distri_value_list.append(np.sum((lifetime_data>down)&(lifetime_data<up))/(len(lifetime_data)*(up-down)))
    rescaled_lifetime_data=(lifetime_data-np.mean(lifetime_data))/N**(alpha)
    rescaled_lifetime_data.sort()
    rescaled_lifetime_dict[N]=rescaled_lifetime_data
    all_scaled_samples=all_scaled_samples+list(rescaled_lifetime_data)
    acc_list=np.array(list(range(1,len(rescaled_lifetime_data)+1)))/len(rescaled_lifetime_data)
    plt.plot(rescaled_lifetime_data,acc_list,label='N='+str(N))
plt.legend(prop=legend_font)

plt.legend(prop=legend_font)
ax = plt.gca()
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
ax.xaxis.set_major_locator(MaxNLocator(nbins=4))

plt.ylabel("Cumulative probability",fontproperties=label_font)
plt.xlabel("Rescaled Deviation "+r"$(t_N-<t_N>)$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
plt.show()

In [None]:
#calculate the quantile of rescaled distribution

Reliability_list=np.arange(0.99,0.01,-0.01)

kappa_list=[]
all_scaled_samples.sort()
for Reliability in Reliability_list:
    rank=int((1-Reliability)*len(all_scaled_samples))
    kappa_list.append(all_scaled_samples[rank])

In [None]:
#predicting cumulative probability function of large-scale network based on universal function and scaling laws
for L_to_predict in np.array(list(L_list_predict)):
    N_to_predict=L_to_predict
    #predict result
    Reliability_list=np.arange(0.99,0.01,-0.01)
    all_scaled_samples.sort()
    t_frac=target_function(float(N_to_predict),best_mean_limit,c1)+np.array(kappa_list)*N_to_predict**(alpha)
    plt.plot(t_frac,Reliability_list,label='predict',c='r',linestyle='--')
    
    lifetime_data=[]
    file_name='./net_percentile_lifetime/PercentileLife_N='+str(L_to_predict)+'.txt'
    f=open(file_name,'r')
    line=f.readline()
    while(line!=''):
        lifetime_data.append(float(line))
        line=f.readline()
    f.close()
    lifetime_data=lifetime_data[:300]
    lifetime_data.sort()
    
    #1. draw reliablity function (scatter: empirical; line: prediction)
    Reliability_discret_list=np.arange(0.99,0.01,-0.02)
    t_frac_list=[]
    for Reliability in Reliability_discret_list:
        rank=int((1-Reliability)*len(lifetime_data))
        t_frac_list.append(lifetime_data[rank])
    t_show_list=list(t_frac_list)
    Reliability_show_list=list(Reliability_discret_list)
    plt.scatter(t_frac_list,Reliability_show_list,label='Simulation',s=40,marker='o',edgecolors='#1f77b4', facecolors='none',linewidth=line_width_marker_set)
    plt.title("N="+str(N_to_predict),fontproperties=label_font)
    plt.legend(prop=legend_font)
    plt.ylabel("Reliability",fontproperties=label_font)
    plt.xlabel("Time / days",fontproperties=label_font)
    ax = plt.gca()
    ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
    ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
    filename='predictedReliability_N='+str(N_to_predict)+'.png'
    #plt.savefig("./fig_case1/"+filename,dpi=2000,bbox_inches='tight')
    plt.close()
    
    #2. draw cumulative function (scatter: empirical; line: prediction)
    cumulative_discret_list=np.arange(0.01,0.99,0.05)
    t_frac_list=[]
    for cumulative in cumulative_discret_list:
        rank=int(cumulative*len(lifetime_data))
        t_frac_list.append(lifetime_data[rank])
    t_show_list=list(t_frac_list)
    cumulative_show_list=list(cumulative_discret_list)
    plt.scatter(t_frac_list,cumulative_show_list,label='Simulation',marker='o',edgecolors='#1f77b4', facecolors='none',linewidth=line_width_marker_set,s=size_marker)
    plt.plot(t_frac,1-Reliability_list,label='Predict',c='r',linestyle='--')
    plt.title("N="+str(N_to_predict),fontproperties=label_font)
    plt.legend(prop=legend_font)
    plt.ylabel("Cumulative Probability",fontproperties=label_font)
    plt.xlabel("Time / days",fontproperties=label_font)
    ax = plt.gca()
    #ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
    #plt.xticks([15,16,17])
    #ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
    #ax.xaxis.set_major_locator(MultipleLocator(1))
    plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
    plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
    filename='predictedCumulative_N='+str(N_to_predict)+'.png'
    plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
    plt.show()

    predict_distributions=target_function(float(N_to_predict),best_mean_limit,c1)+np.array(all_scaled_samples)*N_to_predict**(alpha)
    lifetime_data=[]
    file_name='./net_percentile_lifetime/PercentileLife_N='+str(N_to_predict)+'.txt'
    f=open(file_name,'r')
    line=f.readline()
    while(line!=''):
        lifetime_data.append(float(line))
        line=f.readline()
    f.close()
    lifetime_data=lifetime_data[:500]
    lifetime_data.sort()
    plt.hist(lifetime_data,alpha=0.5,density=True,bins=20,label='GroundTruth')
    plt.hist(predict_distributions,alpha=0.5,density=True,bins=20,label='Predict')
    plt.legend()
    plt.title("N="+str(N_to_predict),fontproperties=label_font)
    plt.ylabel("Probability Density",fontproperties=label_font)
    plt.xlabel("Time / days",fontproperties=label_font)
    plt.show()
    
    
    groundTruthSamples=lifetime_data
    if(N_to_predict!=999900):
        interpoints=partitionGroundTruthSamples(groundTruthSamples,seperation=10)
    else:
        interpoints=partitionGroundTruthSamples(groundTruthSamples,seperation=5)
    KL_predict=KL_div_calGivenInterpoints(interpoints,predict_distributions,groundTruthSamples)
    print("KL divergence for N="+str(N_to_predict)+" is "+str(KL_predict))

In [None]:
#Predicting percentile lifetime of large-scale network
color_list=['#1f77b4','orange','green','c']
marker_list_known=['o','s','D']
marker_list_unknown=['^','v','>']

Reliability_test_list=[0.01,0.5,0.99]
for index_reliability in range(len(Reliability_test_list)):
    Reliability=Reliability_test_list[index_reliability]
    color=color_list[index_reliability]
    marker_known=marker_list_known[index_reliability]
    marker_unknown=marker_list_unknown[index_reliability]
    #1. empirical percentile lifetime (large-scale ; triangle)
    t_critical_empirical=[]
    for L_to_predict in L_list_predict:
        N_to_predict=L_to_predict
        lifetime_data=[]
        file_name='./net_percentile_lifetime/PercentileLife_N='+str(L_to_predict)+'.txt'
        f=open(file_name,'r')
        line=f.readline()
        while(line!=''):
            lifetime_data.append(float(line))
            line=f.readline()
        f.close()
        lifetime_data=lifetime_data[:500]
        lifetime_data.sort()
        rank=int((1-Reliability)*len(lifetime_data))
        t_critical_empirical.append(lifetime_data[rank])
    plt.scatter(L_list_predict,t_critical_empirical,marker=marker_unknown,edgecolors=color, facecolors='none',label='R='+str(Reliability)+"(unknown)",linewidth=line_width_marker_set,s=size_marker)
    
    #2. empirical percentile lifetime (small-scale ), which is used in fitting scaling law
    t_critical_empirical=[]
    for L_to_predict in L_list_train:
        N_to_predict=L_to_predict
        lifetime_data=[]
        file_name='./net_percentile_lifetime/PercentileLife_N='+str(L_to_predict)+'.txt'
        f=open(file_name,'r')
        line=f.readline()
        while(line!=''):
            lifetime_data.append(float(line))
            line=f.readline()
        f.close()
        lifetime_data=lifetime_data[:500]
        lifetime_data.sort()
        rank=int((1-Reliability)*len(lifetime_data))
        t_critical_empirical.append(lifetime_data[rank])
    plt.scatter(L_list_train,t_critical_empirical,marker=marker_known,edgecolors=color, facecolors='none',label='R='+str(Reliability)+"(known)",linewidth=line_width_marker_set,s=size_marker)

    
    
    #3.prediction result (line)
    N_test=np.logspace(start=2.7, stop=5.5, num=60)
    rank=int((1-Reliability)*len(all_scaled_samples))
    kappa=all_scaled_samples[rank]
    t_critical_predict=target_function(np.array(N_test,dtype=float),best_mean_limit,c1)+kappa*N_test**(alpha)
    plt.plot(N_test,t_critical_predict,linewidth=1,linestyle='--',label='R='+str(Reliability)+"(predict)")
    #plt.scatter(x, y, s=100, edgecolors='b', facecolors='none', marker='o')
plt.xscale('log')
plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
plt.ylabel("Percentile Lifetime / days",fontproperties=label_font)
plt.legend(prop=legend_font)
ax = plt.gca()
ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
plt.xticks([10**3,10**4,10**5])
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)

plt.title("Percentile Lifetime",fontproperties=label_font)
filename='PercentileLifetime.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

# Computation Time and Accuracy

In [None]:
def partitionGroundTruthSamples(groundTruthSamples,seperation=20):
    interpoints=[] #
    groundTruthSamples.sort()
    for _ in range(1,seperation):
        rank=int((_/seperation)*len(groundTruthSamples))
        interpoints.append(groundTruthSamples[rank])
    return interpoints
#Test of this function:Partioning uniform distribution U[0,1] into ten intervals
groundTruthSamples_test=list(np.random.random(1000000))
interpoints=partitionGroundTruthSamples(groundTruthSamples_test,seperation=20)
print(interpoints)

In [None]:
#Simulation time of sampling one sample for system with different size N
# These data are recorded in my computer and used to estimate computation time below.
time_cost_each_sample=dict()
time_cost_each_sample[100]=0.32 #seconds
time_cost_each_sample[200]=0.6 #seconds
time_cost_each_sample[400]=1.7 #seconds
time_cost_each_sample[800]=5.28 #seconds
time_cost_each_sample[1600]=17.67 #seconds
time_cost_each_sample[3200]=68.86 #seconds
time_cost_each_sample[6400]=308 #seconds
time_cost_each_sample[12800]=1320 #seconds
time_cost_each_sample[25600]=4800 #seconds
time_cost_each_sample[51200]=19380 #seconds

Accuracy and Time cost of our method based on universal scaling function

In [None]:
employ_sample_size_list=[10,14,20,28,40,55,70,90,110,140,170,200,250,300]


N_list=100*2**np.array(range(2,10))
L_list_train=N_list[:5]
L_list_predict=N_list[5:]
L_show_list=np.array(list(L_list_train)+list(L_list_predict))

each_trian_sample_time=0
for N in L_list_train:
    each_trian_sample_time+=time_cost_each_sample[N]
predict_error_dict=dict()
total_train_time_list=each_trian_sample_time*np.array(employ_sample_size_list)
KL_predict_dict=dict()
for N_to_predict in L_list_predict:
    KL_predict_dict[N_to_predict]=np.zeros(len(employ_sample_size_list))

repeat_num= 30 # Repeat sampling and average to make result stable

for index_employ_sample_size in range(len(employ_sample_size_list)):
    employ_sample_size=employ_sample_size_list[index_employ_sample_size]
    #Read NetLife
    for _ in range(repeat_num):
        std_all_list=[]
        mean_all_list=[]
        for N in N_list:
            net_lifetime_list=[]
            file_PercentileNetLife='./net_percentile_lifetime/PercentileLife_N='+str(N)+'.txt'
            f=open(file_PercentileNetLife,'r')
            line=f.readline()
            while(line!=''):
                net_lifetime_list.append(int(line))
                line=f.readline()
            f.close()
            if(N in L_list_train):
                net_lifetime_list=random.sample(net_lifetime_list,k=employ_sample_size)
            else:
                net_lifetime_list=net_lifetime_list
            mean_all_list.append(np.mean(net_lifetime_list))
            std_all_list.append(np.std(net_lifetime_list))
        line_width_marker_set=2
        dpi_set=1000
        max_tick_num=4
        mean_list=mean_all_list[:5]
        std_list=std_all_list[:5]
        mean_predict_list=mean_all_list[5:]
        std_predict_list=std_all_list[5:]

        x_to_train=np.array(L_list_train,dtype='float')
        y_to_train=mean_list
        x_to_predict=np.array(L_list_predict,dtype='float')
        y_to_predict=np.array(mean_predict_list)

        delta=1 #——————————————————————————————parameter：delta, the rate of convergence
        power_for_weight_of_size=2  #——————————————————the weight of loss function 
        renormalized_x_to_train=x_to_train/np.sum(x_to_train)

        def target_function(x,y0,c1):
            return y0+c1*x**(-delta)
        def loss_SQ(parms):
            y0=parms[0]
            c1=parms[1]
            y_predict_list=y0+c1*x_to_train**(-delta)
            res=y_predict_list-y_to_train
            res=res*renormalized_x_to_train**power_for_weight_of_size/np.sum(renormalized_x_to_train**power_for_weight_of_size)
            return np.sum(res**2)

        initial_guess=[0,1]
        result = minimize(loss_SQ, initial_guess, method='BFGS')

        params=result.x
        best_mean_limit,c1=params #detremining t_inf and c1.

        plt.scatter(L_list_train,np.abs(best_mean_limit-mean_list),label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
        plt.scatter(L_list_predict,np.abs(best_mean_limit-mean_predict_list),label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)
        x_to_show=np.linspace(np.min(x_to_train),np.max(x_to_predict),1000)
        y_predict=target_function(x_to_show,best_mean_limit,c1)
        plt.plot(x_to_show, np.abs(y_predict-best_mean_limit), color='red', label='Fit',linestyle='--')
        r_value=np.corrcoef(x_to_show,np.abs(y_predict-best_mean_limit))[0][1]
        print("R2="+str(round(r_value**2,3)))


        plt.xscale('log')
        plt.yscale('log')
        #ticks=tranform_ticks_log(np.min(value_list),np.max(value_list),max_tick_num)
        #ticks=[0.2,0.3,0.4,0.6]
        #plt.yticks(ticks)  


        plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
        plt.ylabel(r"$<t_{\infty}>-<t_N>$",fontproperties=label_font)
        #plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
        plt.legend(prop=legend_font)
        plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
        plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
        filename='Mean_loglog.png'
        plt.show()
        
        x_min_to_show=((np.min(mean_list)-best_mean_limit)/c1)**(1/(-delta))
        x_show_list=np.linspace(x_min_to_show,L_list_predict[-1],100)

        plt.scatter(L_list_train,mean_list,label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
        plt.scatter(L_list_predict,mean_predict_list,label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)

        plt.plot(x_show_list, target_function(x_show_list,best_mean_limit,c1), color='red', label='Fit',linestyle='--')
        print(r'$<t_{\infty}>-$'+str(round(np.exp(intercept),4))+r'$N^{{{}}}$'.format(round(slope,4)))
        plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
        plt.ylabel(r"$<t_N>$",fontproperties=label_font)
        #plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
        plt.legend(prop=legend_font)

        ax = plt.gca()
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
        ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
        plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
        plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
        filename='Mean_predict.png'
        plt.show()

        plt.scatter(L_list_train,std_list,label='Known',edgecolors='#1f77b4', facecolors='none',marker='o',linewidth=line_width_marker_set,s=size_marker)
        plt.scatter(L_list_predict,std_predict_list,label='Unknown',edgecolors='#1f77b4', facecolors='none',marker='^',linewidth=line_width_marker_set,s=size_marker)

        x_list=np.log(L_list_train)
        y_list=np.log(std_list)
        slope=-1/2
        intercept=np.mean(y_list)-slope*np.mean(x_list)

        x_show_list=np.log(np.array(L_show_list))
        y_show_list=slope * x_show_list + intercept

        alpha=copy.deepcopy(slope) #——————————————————————————————————parameter： the scaling exponent for standard deviation. We set to 1/2 here.
        regression_line = slope * x_list + intercept
        #plt.plot(np.exp(x_list), np.exp(regression_line), color='red', label=str(round(np.exp(intercept),4))+r'$N^{{{}}}$'.format(round(slope,4)))
        plt.plot(np.exp(x_show_list), np.exp(y_show_list), color='red', label="Fit",linestyle='--')
        print(str(round(np.exp(intercept),4))+r'$N^{{{}}}$'.format(round(slope,4)))

        plt.xscale('log')
        plt.yscale('log')

        #value_list=np.abs(best_mean_limit-mean_list)
        #ticks=tranform_ticks_log(np.min(value_list),np.max(value_list),max_tick_num)
        #plt.yticks(yticks)  

        plt.xlabel("System Size "+r"$N$",fontproperties=label_font)
        plt.ylabel(r"Standard deviation $\chi_t(N)$",fontproperties=label_font)
        plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
        plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size)
        #plt.title(net_name_show+" ("+"power="+str(power)+")",fontproperties=label_font)
        plt.legend(prop=legend_font)
        filename='std_predict.png'
        plt.show()

        marker_list=['^','v','>','<','o','d','*','1','2','3','4','+','x','|','_']
        count_L=0
        for L in np.array(list(L_list_train)):
            marker=marker_list[count_L]
            count_L+=1
            N=L
            lifetime_data=[]
            file_name='./net_percentile_lifetime/PercentileLife_N='+str(L)+'.txt'
            f=open(file_name,'r')
            line=f.readline()
            while(line!=''):
                lifetime_data.append(float(line))
                line=f.readline()
            f.close()
            lifetime_data=lifetime_data[:500]
            min_life=np.min(lifetime_data)
            max_life=np.max(lifetime_data)
            cut_list=np.linspace(min_life,max_life,20)
            mid_list=[]
            distri_value_list=[]
            for index_cut in range(len(cut_list)-1):
                down=cut_list[index_cut]
                up=cut_list[index_cut+1]
                mid_list.append(((up+down)/2))
                distri_value_list.append(np.sum((lifetime_data>down)&(lifetime_data<up))/(len(lifetime_data)*(up-down)))
            distri_value_list=np.array(distri_value_list)
            mid_list=np.array(mid_list)
            plt.scatter((mid_list-np.mean(lifetime_data))/N**alpha,distri_value_list*N**alpha,label='N='+str(N),marker=marker,s=size_marker)
            plt.plot((mid_list-np.mean(lifetime_data))/N**alpha,distri_value_list*N**alpha,linewidth=1)
        ax = plt.gca()
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
        ax.xaxis.set_major_locator(MaxNLocator(nbins=4))
        #plt.xticks([-50,0,50])
        #plt.xlim(-100,150)
        plt.legend(loc='upper right',prop=legend_small_font)
        plt.xlabel("Rescaled Deviation "+r"$(t_N-<t_N>)$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
        plt.ylabel("Rescaled Probability "+r"$P(t)/$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
        plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
        plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size) 
        filename='universalDistribution.png'
        plt.show()


        print('c1='+str(round(c1,4))+' delta='+str(round(delta,4))+' alpha='+str(round(-alpha,4)))
        rescaled_lifetime_dict=dict()
        all_scaled_samples=[]
        for L in L_list_train:
            N=L
            lifetime_data=[]
            file_name='./net_percentile_lifetime/PercentileLife_N='+str(L)+'.txt'
            f=open(file_name,'r')

            line=f.readline()
            while(line!=''):
                lifetime_data.append(float(line))
                line=f.readline()
            f.close()
            lifetime_data=lifetime_data[:employ_sample_size]
            min_life=np.min(lifetime_data)
            max_life=np.max(lifetime_data)
            cut_list=np.linspace(min_life,max_life,20)
            mid_list=[]
            distri_value_list=[]
            for index_cut in range(len(cut_list)-1):
                down=cut_list[index_cut]
                up=cut_list[index_cut+1]
                mid_list.append(((up+down)/2))
                distri_value_list.append(np.sum((lifetime_data>down)&(lifetime_data<up))/(len(lifetime_data)*(up-down)))
            rescaled_lifetime_data=(lifetime_data-np.mean(lifetime_data))/N**(alpha)
            rescaled_lifetime_data.sort()
            rescaled_lifetime_dict[N]=rescaled_lifetime_data
            all_scaled_samples=all_scaled_samples+list(rescaled_lifetime_data)
            acc_list=np.array(list(range(1,len(rescaled_lifetime_data)+1)))/len(rescaled_lifetime_data)
            plt.plot(rescaled_lifetime_data,acc_list,label='N='+str(N))
        plt.legend(prop=legend_font)

        ax = plt.gca()
        ax.yaxis.set_major_locator(MaxNLocator(nbins=4))
        ax.xaxis.set_major_locator(MaxNLocator(nbins=4))

        plt.ylabel("Cumulative probability",fontproperties=label_font)
        plt.xlabel("Rescaled Deviation "+r"$(t_N-<t_N>)$"+r'$N^{{{}}}$'.format(round(-alpha,4)),fontproperties=label_font)
        plt.close()

        for L_to_predict in L_list_predict:
            N_to_predict=L_to_predict
            predict_distributions=target_function(float(N_to_predict),best_mean_limit,c1)+np.array(all_scaled_samples)*N_to_predict**(alpha)
            lifetime_data=[]
            file_name='./net_percentile_lifetime/PercentileLife_N='+str(N_to_predict)+'.txt'
            f=open(file_name,'r')
            line=f.readline()
            while(line!=''):
                lifetime_data.append(float(line))
                line=f.readline()
            f.close()
            lifetime_data=lifetime_data[:500]
            lifetime_data.sort()

            plt.hist(lifetime_data,alpha=0.5,density=True,bins=20,label='GroundTruth')
            plt.hist(predict_distributions,alpha=0.5,density=True,bins=20,label='Predict')
            plt.legend()
            plt.close()

            groundTruthSamples=lifetime_data
            interpoints=partitionGroundTruthSamples(groundTruthSamples,seperation=10)
            KL_predict=KL_div_calGivenInterpoints(interpoints,predict_distributions,groundTruthSamples)
            print("KL="+str(KL_predict))
            KL_predict_dict[N_to_predict][index_employ_sample_size]+=KL_predict/repeat_num
    

In [None]:
#draw figure for relationship between error and computation time for USF method
for N in L_list_predict:
    plt.scatter(KL_predict_dict[N],total_train_time_list/3600,label='USF Method(N='+str(N)+")")
plt.legend()
plt.xlabel("Error (KL divergence with Ground Truth)")
plt.ylabel("Computation Time / hours")
plt.xscale('log')
plt.yscale('log')
plt.show()

Accuracy and Time cost of direct simulation

In [None]:
direct_samples_num=[5,10,14,20,28,40,56,79,100]
time_direct_dict=dict()
KL_direct_dict=dict()
repeat_num= 30 # Repeat sampling to make result stable

for N_to_predict in L_list_predict:
    KL_direct_dict[N_to_predict]=[]
for N in L_list_predict:
    time_direct_dict[N]=time_cost_each_sample[N]*np.array(direct_samples_num)
for L_to_predict in L_list_predict:
    
    N_to_predict=L_to_predict
    lifetime_data=[]
    file_name='./net_percentile_lifetime/PercentileLife_N='+str(N_to_predict)+'.txt'
    f=open(file_name,'r')
    line=f.readline()
    while(line!=''):
        lifetime_data.append(float(line))
        line=f.readline()
    f.close()
    lifetime_data=lifetime_data[:500]
    
    for direct_sample_num in direct_samples_num:
        KL_direct_ave=0
        for _ in range(repeat_num):
            direct_data=random.sample(lifetime_data,direct_sample_num)
            groundTruthSamples=lifetime_data
            interpoints=partitionGroundTruthSamples(groundTruthSamples,seperation=10)
            KL_direct=KL_div_calGivenInterpoints(interpoints,direct_data,groundTruthSamples)
            KL_direct_ave+=KL_direct
        KL_direct_ave/=repeat_num
        print("KL="+str(KL_direct))
        KL_direct_dict[N_to_predict].append(KL_direct_ave)

In [None]:
for N in L_list_predict:
    plt.scatter(KL_direct_dict[N],time_direct_dict[N]/3600,label='Direct(N='+str(N)+")")
plt.legend()
plt.xlabel("Error (KL divergence with Ground Truth)")
plt.ylabel("Computation Time / hours")
plt.xscale('log')
plt.yscale('log')
plt.show()

The result of direct simulation and USF method in one figure

In [None]:
#Comparision between USF method and Direct simulation


default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] #color setting
default_colors=['#1f77b4', '#ff7f0e', '#2ca02c']
for index_N in range(len(L_list_predict)):
    N=L_list_predict[index_N]
    color_to_show=default_colors[index_N]
    plt.scatter(KL_direct_dict[N],time_direct_dict[N]/3600,label='Direct(N='+str(N)+")",marker='o',edgecolors=color_to_show, facecolors='none',linewidth=line_width_marker_set,s=size_marker)
    plt.scatter(KL_predict_dict[N],total_train_time_list/3600,label='USF(N='+str(N)+")",marker='^',edgecolors=color_to_show, facecolors='none',linewidth=line_width_marker_set,s=size_marker)
plt.subplots_adjust(right=1.0)
plt.legend(prop=legend_font)
plt.xlabel("Error (KL divergence)",fontproperties=label_font)
plt.ylabel("Computation Time / hours",fontproperties=label_font)
plt.xscale('log')
plt.yscale('log')
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size) 
filename='ComputationTimeAndAccuracy_legendIn.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()

In [None]:
#Comparision between USF method and Direct simulation


default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] #color setting
default_colors=['#1f77b4', '#ff7f0e', '#2ca02c']
for index_N in range(len(L_list_predict)):
    N=L_list_predict[index_N]
    color_to_show=default_colors[index_N]
    plt.scatter(KL_direct_dict[N],time_direct_dict[N]/3600,label='Direct(N='+str(N)+")",marker='o',edgecolors=color_to_show, facecolors='none',linewidth=line_width_marker_set,s=size_marker)
    plt.scatter(KL_predict_dict[N],total_train_time_list/3600,label='USF(N='+str(N)+")",marker='^',edgecolors=color_to_show, facecolors='none',linewidth=line_width_marker_set,s=size_marker)
plt.subplots_adjust(right=1.0)
plt.legend(prop=legend_font,loc=8,bbox_to_anchor=(1.2,0.5))
plt.xlabel("Error (KL divergence)",fontproperties=label_font)
plt.ylabel("Computation Time / hours",fontproperties=label_font)
plt.xscale('log')
plt.yscale('log')
plt.xticks(fontproperties = 'Times New Roman', size = special_tick_size)
plt.yticks(fontproperties = 'Times New Roman', size = special_tick_size) 
filename='ComputationTimeAndAccuracy_legendOut.png'
plt.savefig("./fig_case/"+filename,dpi=dpi_set,bbox_inches='tight')
plt.show()