In [4]:
import pandas as pd
import numpy as np
import seaborn as sns

In [5]:
k_list = [2, 3, 4, 5, *range(10, 101, 10)]
n = 100
q = 100
p_list = [0.5, 1, 2, 5, None] # None instead of inf to calculate Chebyshev distance with Lp_norm()

In [6]:
def gen_data(q, n, k): # Generating the data
  pts = np.random.uniform(0, 1, size=(q, n, k))
  og = np.zeros(k)
  for i in range(0, q):
    for j in range(0, n):
      while np.allclose(pts[i][j], og):
        pts[i][j] = np.random.uniform(0, 1, size=k)
  return pts

def Lp_norm(p, a, b):
  if (p == None): # Chebyshev distance
    return max(abs(a-b))
  return (np.sum((abs(a - b))**p))**(1/p)

def distance(p, pts, q, n, k): # Lp norm for pts of size (q, n, k)
  og = np.zeros(k)
  dists = np.empty((q, n))
  for i in range(0, q):
    for j in range(0, n):
      dists[i][j] = Lp_norm(p, pts[i][j], og)
  return dists

def distances(p_list, pts, q, n, k): # Lp norms for all p in p_list for pts of size (q, n, k)
  dists = {}
  for p in p_list:
    dist = distance(p, pts, q, n, k)
    stats = {}
    stats["Dmin"] = np.min(dist, axis=1)
    stats["Min_avg"] = np.mean(stats["Dmin"])

    stats["Dmax"] = np.max(dist, axis=1)
    stats["Max_avg"] = np.mean(stats["Dmax"])

    stats["Dmean"] = np.mean(dist, axis=1)
    stats["Avg_avg"] = np.mean(stats["Dmean"])

    stats["Dvar"] = np.var(dist, axis=1, ddof=1)
    stats["Var_avg"] = np.mean(stats["Dvar"])

    stats["Ctr"] = np.divide(stats["Dmax"] - stats["Dmin"], stats["Dmin"])
    stats["Ctr_avg"] = np.mean(stats["Ctr"])

    finalobj = {}
    finalobj["dist"] = dist
    finalobj["stats"] = stats

    dists[p] = finalobj
  return dists

In [7]:
data = {}

for k in k_list:
  data[k] = gen_data(q, n, k)

dists_and_stats = {}
for k in k_list:
  dists_and_stats[k] = distances(p_list, data[k], q, n, k)

In [15]:
dfs = {}
for p in p_list:
    df = pd.DataFrame(np.nan, index = k_list, columns = ["min", "max", "avg", "var", "ctr"])

    for k in k_list:
        df.at[k, 'min'] = dists_and_stats[k][p]['stats']['Min_avg']
        df.at[k, 'max'] = dists_and_stats[k][p]['stats']['Max_avg']
        df.at[k, 'avg'] = dists_and_stats[k][p]['stats']['Avg_avg']
        df.at[k, 'var'] = dists_and_stats[k][p]['stats']['Var_avg']
        df.at[k, 'ctr'] = dists_and_stats[k][p]['stats']['Ctr_avg']
    
    dfs[p] = df

Unnamed: 0,min,max,avg,var,ctr
2,0.211107,3.738963,1.883893,0.730359,24.724948
3,0.848036,7.960536,4.193285,2.564335,9.852801
4,2.038328,13.462528,7.337452,6.187481,6.489011
5,3.709465,20.042507,11.398558,12.012315,4.875443
10,22.833316,69.962378,44.9411,97.6023,2.116926
20,112.160189,250.692958,178.891004,773.006196,1.25671
30,276.511092,534.545041,402.042777,2726.273585,0.941974
40,526.094033,910.555538,712.249267,6156.350848,0.7377
50,844.287755,1400.902239,1113.810378,12661.17109,0.663388
60,1252.179345,1976.494186,1603.746287,21365.699415,0.58283
