In [1]:
import os

# Get the current working directory
current_directory = os.getcwd()
print("Current Directory:", current_directory)

# Move to the parent directory
parent_directory = os.path.dirname(current_directory)
os.chdir(parent_directory)

# Print the updated working directory
updated_directory = os.getcwd()
print("Updated Directory:", updated_directory)

Current Directory: /mnt/c/users/rjmay/code/research_ms/misc
Updated Directory: /mnt/c/users/rjmay/code/research_ms


Demonstration of using the bound files.


In [2]:

import numpy as np
from scipy.stats import norm
from modules.bounds_calculator import bounds_calculator

from modules.data_gen_mv import data_gen_multivariate
from modules.data_gen_gauss_mix import data_gen_gauss_mix
from modules.data_gen import data_gen

import pandas as pd 



Running the bounds

In [3]:
default = 7


def compute_time(data_generator, sample_size, eng,runs=default):
    data = [data_generator.sample(sample_size) for i in range(runs)]

    times = []
    for d in data:
        a, b= d

        _, time_results = bounds_calculator(a, b, MATLAB = eng, Timer = True)


        times.append(time_results)

    df =  pd.DataFrame(times)



        
    return df.median()

Normals

In [4]:

def norm_compute_time(dim, sample_size, eng ,runs= default):

    sample_size = int(sample_size /2 ) ### sampling for each class

    mean_sep = 2.56
    func0 = np.random.multivariate_normal
    func1 = np.random.multivariate_normal

    mean1 = np.zeros(dim)
    covariance1 = np.identity(dim)

    mean2 = np.zeros(dim)
    mean2[0] = mean_sep
            
    params0 = {'mean': mean1, 'cov': covariance1}
    params1  = {'mean': mean2, 'cov': covariance1}
            
    generator  = data_gen_multivariate(func0, func1,  params0, params1, boundary = mean_sep/2 )

    run_time = compute_time(generator, sample_size, eng, runs = runs)

    return run_time

In [5]:
def gaussian_mix_compute_time(dim, sample_size, eng ,runs = default):

    sample_size = int(sample_size /2 ) ### sampling for each class

    

    mean_sep = 2.56
        
    params0 = {'means': [[-1 * mean_sep], [mean_sep]], 'covariances':  [ [[1]], [[1]]]}

    params1 = {'mean' : np.zeros(dim), 'cov': np.identity(dim) }

    generator = data_gen_gauss_mix(params0, params1, boundary = [-1.55, 1.55] )

    run_time = compute_time(generator, sample_size, eng, runs = runs)

    return run_time

In [6]:

def uniform2_compute_time(dim, sample_size, eng ,runs=default):

    sample_size = int(sample_size /2 ) ### sampling for each class

    func0 = np.random.uniform
    func1 = np.random.uniform

    params0 = {'low': 0, 'high':1}
    params1= {"low":.8, "high" : 1.8}

    generator = data_gen(func0, func1,  params0, params1, dim, boundary=.9)

    run_time = compute_time(generator, sample_size, eng, runs = runs)

    return run_time


def uniform_norm_compute_time(dim, sample_size, eng ,runs=default):

    sample_size = int(sample_size /2 ) ### sampling for each class


    func0 = np.random.uniform
    func1 = np.random.normal

    params0 = {'low': .5, 'high':3}
    params1= {"loc":0, "scale" : 1}

    generator = data_gen(func0, func1,  params0, params1, dim, boundary=.5)

    run_time = compute_time(generator, sample_size, eng, runs = runs)

    return run_time


def norm_beta_compute_time(dim, sample_size, eng ,runs= default):

    sample_size = int(sample_size /2 ) ### sampling for each class


    func0 = np.random.normal
    func1 = np.random.beta
    params0= {"loc":0, "scale" : 1}
    params1 = {'a': 20, 'b':20}

    generator = data_gen(func0, func1,  params0, params1, dim, boundary =[0.3219999999942793, 0.6839999999940787] )

    run_time = compute_time(generator, sample_size, eng, runs = runs)

    return run_time




### Starting the computing


In [7]:

dims = [3, 8, 15]
samples  =[50, 5000]

In [8]:

import matlab.engine
eng = matlab.engine.start_matlab()

In [9]:
# norm_compute_time(3, 50, eng).median()

In [10]:
dim3 = []
dim8 = []
dim15 = []

In [11]:


for d in dims:
    for s in samples:
        run_time = norm_compute_time(d,s, eng)
        if d==3:
            dim3.append(run_time)
        elif d==8:
            dim8.append(run_time)
        elif d==15:
            dim15.append(run_time)

1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.


Gaussian Mixture

In [12]:

for d in dims:
    for s in samples:
        run_time = gaussian_mix_compute_time(d,s, eng)
        if d==3:
            dim3.append(run_time)
        elif d==8:
            dim8.append(run_time)
        elif d==15:
            dim15.append(run_time)


1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.


Paired Uniforms 

In [13]:

for d in dims:
    for s in samples:
        run_time = uniform2_compute_time(d,s, eng)
        if d==3:
            dim3.append(run_time)
        elif d==8:
            dim8.append(run_time)
        elif d==15:
            dim15.append(run_time)

1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.


norm beta

In [14]:
for d in dims:
    for s in samples:
        run_time = norm_beta_compute_time(d,s, eng)
        if d==3:
            dim3.append(run_time)
        elif d==8:
            dim8.append(run_time)
        elif d==15:
            dim15.append(run_time)

1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.


Uniform Normal

In [15]:

for d in dims:
    for s in samples:
        run_time = uniform_norm_compute_time(d,s, eng)
        if d==3:
            dim3.append(run_time)
        elif d==8:
            dim8.append(run_time)
        elif d==15:
            dim15.append(run_time)

1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.
1  was calculated for k. 3 is chosen for k for variance purposes.


In [16]:
eng.quit()

### More

In [17]:
sims = ["Normals", "Guassian_Mix", "Pair_Uniforms", "Normal_Beta", "Uniform_Normal"]

index = []

for names in sims:
    for num in samples:
        idx_name = names + "_" + str(num)
        index.append(idx_name)

index

['Normals_50',
 'Normals_5000',
 'Guassian_Mix_50',
 'Guassian_Mix_5000',
 'Pair_Uniforms_50',
 'Pair_Uniforms_5000',
 'Normal_Beta_50',
 'Normal_Beta_5000',
 'Uniform_Normal_50',
 'Uniform_Normal_5000']

In [22]:
ordered_columns = [ "Bhattacharyya","Bha_knn", "Influence", "Dp", "EnDive"]

In [34]:
dim3_df = pd.DataFrame(dim3,index= index )

dim3_df = dim3_df[ordered_columns]
round(dim3_df*1000, 2)

Unnamed: 0,Bhattacharyya,Bha_knn,Influence,Dp,EnDive
Normals_50,0.6,2.33,35.23,2.19,111.42
Normals_5000,0.69,86.91,1228.81,4932.94,609.74
Guassian_Mix_50,0.4,2.24,25.17,1.88,44.12
Guassian_Mix_5000,0.56,70.63,1775.88,5217.33,628.61
Pair_Uniforms_50,0.44,2.2,26.9,2.7,43.64
Pair_Uniforms_5000,0.69,86.12,1725.72,5441.72,652.97
Normal_Beta_50,0.39,1.94,26.65,1.72,42.05
Normal_Beta_5000,0.54,76.33,1781.59,5569.49,592.89
Uniform_Normal_50,0.36,1.94,26.89,1.43,47.88
Uniform_Normal_5000,0.5,57.67,1705.85,5119.89,615.22


In [36]:
dim8_df = pd.DataFrame(dim8,index= index )
dim8_df = dim8_df[ordered_columns]

round(dim8_df*1000, 2)

Unnamed: 0,Bhattacharyya,Bha_knn,Influence,Dp,EnDive
Normals_50,0.44,1.74,22.39,1.48,70.53
Normals_5000,14.13,270.58,1820.15,5395.44,717.77
Guassian_Mix_50,0.86,2.05,30.45,2.14,77.2
Guassian_Mix_5000,15.37,248.82,1848.03,5425.06,703.66
Pair_Uniforms_50,0.53,2.24,21.58,1.64,79.16
Pair_Uniforms_5000,11.1,247.88,1824.12,5475.9,695.2
Normal_Beta_50,0.79,2.46,24.78,1.66,81.7
Normal_Beta_5000,14.66,195.69,1729.87,5375.32,664.86
Uniform_Normal_50,0.46,1.77,21.41,1.41,76.98
Uniform_Normal_5000,11.44,210.72,1814.39,5575.84,677.74


In [33]:
dim15_df = pd.DataFrame(dim15,index= index )
dim15_df = dim15_df[ordered_columns]

round(dim15_df*1000, 2)

Unnamed: 0,Bhattacharyya,Bha_knn,Influence,Dp,EnDive
Normals_50,7.27,2.63,28.24,1.5,88.13
Normals_5000,14.17,492.92,1903.0,6311.53,783.16
Guassian_Mix_50,7.45,4.78,22.55,1.45,80.09
Guassian_Mix_5000,33.36,458.75,1764.79,5541.54,763.04
Pair_Uniforms_50,5.78,4.12,28.2,1.75,84.83
Pair_Uniforms_5000,17.17,541.62,1771.72,6065.73,802.4
Normal_Beta_50,5.1,2.8,21.21,1.51,96.2
Normal_Beta_5000,21.55,450.41,1750.01,5662.34,752.35
Uniform_Normal_50,9.33,4.96,25.06,1.92,102.57
Uniform_Normal_5000,30.59,406.63,1831.83,5844.46,736.97
