In [1]:
import numpy as np, pandas as pd, seaborn as sns, matplotlib.pyplot as plt

## Explore skew

In [2]:
def normalized_skew(x):
    return np.mean((x - np.mean(x))**3) / (np.std(x)**3)

def normalized_skew_v2(x):
    return np.mean(((np.mean(x) - x) / np.std(x))**3)

In [3]:
def skew(x):
    x = np.array(x)
    return np.mean(x**3)

In [4]:
print(skew([0, 0, 0.01, 0.01, 0.98]))

0.18823879999999998


In [5]:
print(skew([0, 0, 0.005, 0.01, 0.985]))

0.19113455


In [6]:
print(skew([0.25, 0.25, 0.25, 0.25]))

print(skew([0.25, 0.25, 0.2, 0.3]))

print(skew([0, 0, 0.5, 0.5]))

print(skew([0.2, 0.2, 0.2, 0.4]))

print(skew([0.1, 0.1, 0.1, 0.7]))

print(skew([0.04, 0.05, 0.06, 0.85]))

print(skew([0.02, 0.02, 0.02, 0.94]))

print(skew([0.01, 0.01, 0.02, 0.96]))

print(skew([0.0001, 0.00015, 0.0002, 0.99955]))

print(skew([0, 0, 0, 1]))

0.015625
0.0165625
0.0625
0.022000000000000006
0.08649999999999998
0.15363249999999998
0.20765199999999998
0.22118649999999995
0.24966265185531253
0.25


In [7]:
print(skew([0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.94]))

print(skew([0.001, 0.002, 0.005, 0.05, 0.01, 0.02, 0.93]))

0.11865571428571427
0.11492730485714286


In [8]:
print(skew([0.33, 0.33, 0.33]))

print(skew([0.1, 0.2, 0.7]))

print(skew([0.03, 0.48, 0.49]))

print(skew([0.05, 0.05, 0.9]))

print(skew([0.005, 0.005, 0.99]))

0.035937000000000004
0.1173333333333333
0.07608933333333333
0.24308333333333335
0.32343308333333337


In [9]:
# Normalized

def skew(x):
    x = np.array(x)
    return np.mean((x - np.mean(x))**3)

In [11]:
def l2(x):
    x = np.array(x)
    return np.sqrt(np.sum(x**2))

In [12]:
print(l2([0.25, 0.25, 0.25, 0.25]))
print(l2([0, 0, 0.5, 0.5]))

0.5
0.7071067811865476


In [13]:
print(np.std([0.25, 0.25, 0.25, 0.25]))
print(np.std([0, 0, 0.5, 0.5]))

0.0
0.25


In [14]:
def scorer(x):
    x = np.array(x)
    nx = 1 - x
    return np.mean(nx**2)

print(scorer([0.25, 0.25, 0.25, 0.25]))
print(scorer([0, 0, 0.5, 0.5]))

0.5625
0.625


## plot skew on probability simplex

In [35]:
def centered_skew(x):
    x = np.array(x)
    return np.mean((x - np.mean(x))**3)

def normalized_skew(x):
    x = np.array(x)
    return np.mean((x - np.mean(x))**3) / (np.std(x)**3)

def norm(x, lp = 2):
    x = np.array(x)
    return (np.sum(x**lp))**(1/lp)

def custom_score(x):
    x = np.array(x)
    nx = 1 - x
    return np.mean(nx**2)

In [36]:
res = 0.01

from collections import defaultdict
dd = defaultdict(list)
for i in np.arange(0, 1, res):
    for j in np.arange(0, 1 - i, res):
        k = 1 - i - j
        vec = [i, j, k]
        
        dd['x'].append(i)
        dd['y'].append(j)
        dd['z'].append(k)
        dd['Unnormalized skew'].append(centered_skew(vec))
        dd['Skew'].append(normalized_skew(vec))
        dd['L1 norm'].append(norm(vec, lp = 1))
        dd['L2 norm'].append(norm(vec, lp = 2))
        dd['L3 norm'].append(norm(vec, lp = 3))
        dd['Custom score'].append(custom_score(vec))

df = pd.DataFrame(dd)

# Add color

# max_skew = max(df['Skew'])
# num_steps = 100
# palette = sns.color_palette('hls', num_steps)

# dd = defaultdict(list)
# for idx, row in df.iterrows():
#     color = palette[int((row['Skew'] / max_skew) * num_steps) - 1]
#     dd['Color'].append(color)
    
# df['Color'] = dd['Color']

In [37]:
df.head()

Unnamed: 0,x,y,z,Unnormalized skew,Skew,L1 norm,L2 norm,L3 norm,Custom score
0,0.0,0.0,1.0,0.074074,0.707107,1.0,1.0,1.0,0.666667
1,0.0,0.01,0.99,0.070774,0.706861,1.0,0.990051,0.99,0.660067
2,0.0,0.02,0.98,0.067541,0.706093,1.0,0.980204,0.980003,0.6536
3,0.0,0.03,0.97,0.064374,0.704754,1.0,0.970464,0.97001,0.647267
4,0.0,0.04,0.96,0.061274,0.702794,1.0,0.960833,0.960023,0.641067


In [26]:

import plotly.express as px

fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z',
    color = 'Unnormalized skew',
)

fig.show()
plt.close()

In [27]:

import plotly.express as px

fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z',
    color = 'Skew',
)

fig.show()
plt.close()

In [33]:

import plotly.express as px

fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z',
    color = 'L2 norm',
)

fig.show()
plt.close()

In [34]:

import plotly.express as px

fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z',
    color = 'L3 norm',
)

fig.show()
plt.close()

In [38]:

import plotly.express as px

fig = px.scatter_3d(
    df, 
    x = 'x', 
    y = 'y', 
    z = 'z',
    color = 'Custom score',
)

fig.show()
plt.close()