In [5]:
import numpy as np
from scipy import stats
import distTools
import matplotlib.pyplot as plt
import os

In [6]:
'''
    Gives num_samples samples from each of num_its different
    normal distributions.
    Returns a matrix where the rows are the data from each iteration
'''
def norm(num_samples, num_its):
    data = []
    loc = 0
    for scale in np.linspace(1,10,num_its):
        data.append((stats.norm.rvs(loc,scale,num_samples),(loc,scale)))
    return data

'''
    Gives num_samples samples from each of num_its different
    rayleigh distributions.
    Returns a matrix where the rows are the data from each iteration
'''
def rayleigh(num_samples, num_its):
    data = []
    loc = 0
    for scale in np.linspace(1,10,num_its):
        data.append((stats.rayleigh.rvs(loc,scale,num_samples),(loc,scale)))
    return data

'''
    Gives num_samples samples from each of num_its different
    exponential distributions.
    Returns a matrix where the rows are the data from each iteration
'''
def expon(num_samples, num_its):
    data = []
    loc = 0
    for scale in np.linspace(1,10,num_its):
        data.append((stats.expon.rvs(loc,scale,num_samples),(loc,scale)))
    return data

'''
    Gives num_samples samples from each of num_its different
    pareto distributions.
    Returns a matrix where the rows are the data from each iteration
'''
def pareto(num_samples, num_its):
    data = []
    loc = 0
    scale = 1
    for b in np.linspace(1,100,num_its):
        data.append((stats.pareto.rvs(b,loc,scale,num_samples),(b,loc,scale)))
    return data

'''
    Gives num_samples samples from each of num_its different
    cauchy distributions.
    Returns a matrix where the rows are the data from each iteration
'''
def norm(num_samples, num_its):
    data = []
    loc = 0
    for scale in np.linspace(1,10,num_its):
        data.append((stats.cauchy.rvs(loc,scale,num_samples),(loc,scale)))
    return data

In [7]:
'''
    Writes a file for the sample data
'''
def logFile(samples,path,num_nodes,num_samps,dist,it):
    file = format("%s%s_%04dnodes_%04dsamples_%02d.dat"%
                  (path,dist,num_nodes,num_samps,it))
    outfile = open(file, 'w')
    print(file)
    
    outfile.write("#ground_truth\n")
    outfile.write(format("dist_name: %s\n"%dist))
    outfile.write("dist_params: ")
    outfile.write(str(samples[1]))
    outfile.write("\n")
    
    outfile.write("#gev_params\n")
    maxima = [np.max(samples[0][i*num_nodes:(i+1)*num_nodes]) for i in range(num_samps)]
    shape,loc,scale = distTools.gev_fit(maxima)
    outfile.write(format("shape: %s\n"%str(shape)))
    outfile.write(format("location: %s\n"%str(loc)))
    outfile.write(format("scale: %s\n"%str(scale)))
    
    outfile.write("#dist_guess\n")
    dists = distTools.fit_dist(samples[0])
    for dist in dists:
        outfile.write(format("%s: %s %s\n"%(dist[0],str(dist[1]),str(dist[2]))))
    
    outfile.write("#data\n")
    outfile.write("run,max,")
    for i in range(num_nodes):
        outfile.write(format("node%d"%i))
        if i+1 < num_nodes:
            outfile.write(",")
            
    outfile.write("\n")
    for i in range(num_samps):
        line = samples[0][i*num_nodes:(i+1)*num_nodes]
        outfile.write(format("%d,%s,"%(i,str(np.max(line)))))
        for j,point in enumerate(line):
            outfile.write(format("%s"%str(point)))
            if j+1 < line.size:
                outfile.write(",")
        outfile.write("\n")
    outfile.close()

In [9]:
size = 10000
num_nodes = [10,50] #[10,50,100,1000]
num_samps = [10,50] #[10,50,100,1000]
dists = ['norm', 'rayleigh', 'expon', 'pareto', 'cauchy']

n = 0
for node in num_nodes:
    for samples in num_samps:
        num_its = size // (node * samples)
        if num_its == 0:
            continue
        path = format("%04dnodes_%04dsamples/"%(node,samples))
        if not os.path.exists(path):
            os.makedirs(path)
        for dist in dists:
            bigData = []
            if dist == 'norm':
                bigData = norm(samples*node,num_its)
            elif dist == 'rayleigh':
                bigData = rayleigh(samples*node,num_its)
            elif dist == 'expon':
                bigData = expon(samples*node,num_its)
            elif dist == 'pareto':
                bigData = pareto(samples*node,num_its)
            elif dist == 'cauchy':
                bigData = cauchy(samples*node,num_its)
            else:
                continue
            
            for it,data in enumerate(bigData):
                logFile(data,path,node,samples,dist,it)
                n += 1
print(n)

0010nodes_0010samples/norm_0010nodes_0010samples_00.dat
0010nodes_0010samples/norm_0010nodes_0010samples_01.dat
0010nodes_0010samples/norm_0010nodes_0010samples_02.dat
0010nodes_0010samples/norm_0010nodes_0010samples_03.dat
0010nodes_0010samples/norm_0010nodes_0010samples_04.dat


  Lhat = muhat - Shat*mu


0010nodes_0010samples/norm_0010nodes_0010samples_05.dat
0010nodes_0010samples/norm_0010nodes_0010samples_06.dat
0010nodes_0010samples/norm_0010nodes_0010samples_07.dat
0010nodes_0010samples/norm_0010nodes_0010samples_08.dat
0010nodes_0010samples/norm_0010nodes_0010samples_09.dat
0010nodes_0010samples/norm_0010nodes_0010samples_10.dat
0010nodes_0010samples/norm_0010nodes_0010samples_11.dat
0010nodes_0010samples/norm_0010nodes_0010samples_12.dat
0010nodes_0010samples/norm_0010nodes_0010samples_13.dat
0010nodes_0010samples/norm_0010nodes_0010samples_14.dat
0010nodes_0010samples/norm_0010nodes_0010samples_15.dat
0010nodes_0010samples/norm_0010nodes_0010samples_16.dat
0010nodes_0010samples/norm_0010nodes_0010samples_17.dat
0010nodes_0010samples/norm_0010nodes_0010samples_18.dat
0010nodes_0010samples/norm_0010nodes_0010samples_19.dat
0010nodes_0010samples/norm_0010nodes_0010samples_20.dat
0010nodes_0010samples/norm_0010nodes_0010samples_21.dat
0010nodes_0010samples/norm_0010nodes_0010samples

0010nodes_0010samples/rayleigh_0010nodes_0010samples_48.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_49.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_50.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_51.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_52.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_53.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_54.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_55.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_56.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_57.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_58.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_59.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_60.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_61.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_62.dat
0010nodes_0010samples/rayleigh_0010nodes_0010samples_63.dat
0010nodes_0010samples/rayleigh_0010nodes

0010nodes_0010samples/expon_0010nodes_0010samples_92.dat
0010nodes_0010samples/expon_0010nodes_0010samples_93.dat
0010nodes_0010samples/expon_0010nodes_0010samples_94.dat
0010nodes_0010samples/expon_0010nodes_0010samples_95.dat
0010nodes_0010samples/expon_0010nodes_0010samples_96.dat
0010nodes_0010samples/expon_0010nodes_0010samples_97.dat
0010nodes_0010samples/expon_0010nodes_0010samples_98.dat
0010nodes_0010samples/expon_0010nodes_0010samples_99.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_00.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_01.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_02.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_03.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_04.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_05.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_06.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_07.dat
0010nodes_0010samples/pareto_0010nodes_0010samples_08.dat
0010nodes_0010samples/

NameError: name 'cauchy' is not defined