<a href="https://colab.research.google.com/github/microprediction/precise/blob/main/examples_colab_notebooks/luck_versus_skill_m6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install --upgrade git+https://github.com/microprediction/precise.git

**Luck versus skill in the M6 Forecasting Competition**

In [3]:
from precise.skatertools.syntheticdata.factor import create_factor_dataset
n_dim = 100
n_train = 5000
n_test = 5000
X = create_factor_dataset(n_train+n_test, n_dim)
X_train = X[:n_train]
X_test = X[n_train:]
X_test[:2,:5]

array([[-0.58497763,  1.73311784, -0.58108863, -2.25389507,  0.81054599],
       [-1.2741325 ,  0.9157971 , -2.86096307, -0.74286836,  0.53774385]])

In [4]:
import numpy as np 
from precise.skatertools.m6.quintileprobabilities import mvn_quintile_probabilities

def estimate_quintile_probs(n_learn):
   np.random.shuffle(X_train)
   cov = np.cov(X_train[:n_learn,:], rowvar=False)
   return mvn_quintile_probabilities(sgma=cov, n_samples=100*1000)

# For example...
p1 = estimate_quintile_probs(n_learn=100)
p2 = estimate_quintile_probs(n_learn=120)


In [5]:
# Convert the test data set into quintile results
from precise.skatertools.m6.quintileprobabilities import scores_to_quintiles
qin_test = scores_to_quintiles(X_test)
out_test = [ [] for _ in range(5)]
len(qin_test)
qin_test[0][:12]


array([1, 4, 1, 0, 3, 1, 3, 2, 2, 2, 2, 0])

In [9]:
def win_record(p1,p2,n_test):
  wins1 = list()
  for q in qin_test[:n_test]:
      brier_sum_1 = 0 
      brier_sum_2 = 0
      for i,qi in enumerate(q):
          # qi is realized quintile for asset i
          qi_hot = [0 for _ in range(5)]
          qi_hot[qi] = 1 
          pi1 = [ p1[j][i] for j in range(5) ]
          pi2 = [ p2[j][i] for j in range(5) ]
          brier1 = np.linalg.norm( np.array(qi_hot) - np.array(pi1) )
          brier2 = np.linalg.norm( np.array(qi_hot) - np.array(pi2) )
          brier_sum_1 += brier1
          brier_sum_2 += brier2
      wins1.append(brier_sum_1<brier_sum_2)
  return wins1 

wp = win_record(p1, p2, n_test=36)
print(wp)
print(np.mean(wp))
        

[True, False, False, True, False, True, True, True, True, False, False, True, True, False, False, True, True, False, False, False, False, False, False, True, True, True, True, False, False, False, True, True, False, True, False, False]
0.4722222222222222


In [16]:
from itertools import islice
 
def chunk(arr_range, arr_size):
    arr_range = iter(arr_range)
    return iter(lambda: tuple(islice(arr_range, arr_size)), ())
   
def beat_down(win_rec, months):
    # Overall win reacord assuming 12 realizations 
    chunks = list(chunk(win_rec,months))
    return [ 1 if sum(c)>months/2+0.001 else 0.5 if abs(sum(c)-months/2)<0.001 else 0 for c in chunks ]

list(chunk(range(12),3))

[(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)]

In [None]:
# How often would one competitor beat another in the M6 rank forecasts? 

wps12 = list()
wps3 = list()
for _ in range(1000):
   p1 = estimate_quintile_probs(n_learn=25)
   p2 = estimate_quintile_probs(n_learn=5000)
   wr = win_record(p1, p2, n_test=4992)
   wps12.extend(beat_down(wr, months=12))
   wps3.extend(beat_down(wr, months=3))
   if False:
     print({'wr':wr,'wps12':wps12,'wps3':wps3})
   print({'12':np.mean(wps12),'3':np.mean(wps3),'n3':len(wps3)})




{'12': 0.08533653846153846, '3': 0.2361778846153846, 'n3': 1664}
{'12': 0.15504807692307693, '3': 0.2926682692307692, 'n3': 3328}
{'12': 0.16466346153846154, '3': 0.3010817307692308, 'n3': 4992}
{'12': 0.15745192307692307, '3': 0.29627403846153844, 'n3': 6656}
{'12': 0.1485576923076923, '3': 0.2875, 'n3': 8320}
{'12': 0.16306089743589744, '3': 0.29667467948717946, 'n3': 9984}
{'12': 0.16260302197802198, '3': 0.29447115384615385, 'n3': 11648}
{'12': 0.1603064903846154, '3': 0.2930438701923077, 'n3': 13312}
{'12': 0.14970619658119658, '3': 0.28338675213675213, 'n3': 14976}
{'12': 0.1467548076923077, '3': 0.2820913461538462, 'n3': 16640}
{'12': 0.14423076923076922, '3': 0.28086756993006995, 'n3': 18304}
{'12': 0.14022435897435898, '3': 0.27754407051282054, 'n3': 19968}
{'12': 0.14164201183431951, '3': 0.27773668639053256, 'n3': 21632}
{'12': 0.13555975274725274, '3': 0.27146291208791207, 'n3': 23296}
{'12': 0.1329326923076923, '3': 0.26959134615384617, 'n3': 24960}
{'12': 0.12868088942307

## Results - Overall

 - P(1000) beats P(1100)  49.5% of the time
 - P(100) beats P(110) 49% of the time
 - P(25) beats P(5000) 14% of the time

## Results - Quarterly

- P(25) beats P(5000) 27% of the time