In [1]:
import os
os.chdir("..")

In [2]:
import seaborn as sns
import pandas as pd

from sim import run_monte_carlo, Params
Params.ITERATIONS = 1000

# Monte Carlo sims

The previous page's data all came from a single simulated season of 100 teams and 100 games. This is insufficient to determine small effects with low but non-zero correlations, so we're going to need stronger tools.

From now on, all data is run across 1000 seasons. Teams and players are regenerated each times.

Let's start by asking: what's the actual correlation between attributes and hit rate? This is a repeat of the previous page's charts, except with 1000x as much data.

*Note: ave_dist measures the average distance between teams in matchmaking*

In [3]:
def get_correlations(team_df, player_df) -> dict:
    batters = player_df.loc[player_df["is_pitcher"] == False]
    corrs = {}
    for attr in ["goodness"] + [f"attr_{i}" for i in range(4)]:
        corrs[attr] = batters[[attr, 'average']].corr().iat[0, 1]
    corrs["ave_dist"] = team_df["average_distance"].mean()
    return corrs

print("Here are the correlations for each season across 1000 seasons:")
df = run_monte_carlo(get_correlations)
display(df)

print("And the final correlations, averaged across all iterations:")
display(df.mean())

Here are the correlations for each season across 1000 seasons:


Unnamed: 0,goodness,attr_0,attr_1,attr_2,attr_3,ave_dist
0,0.832549,0.740513,0.398142,0.110072,0.038099,3.8040
1,0.826040,0.739765,0.356056,0.117984,0.046072,3.8212
2,0.793712,0.691741,0.345529,0.130226,0.007621,3.8252
3,0.835634,0.725717,0.378051,0.141054,0.023234,3.8724
4,0.834244,0.721809,0.401363,0.155321,-0.016278,3.8356
...,...,...,...,...,...,...
995,0.837212,0.747205,0.371811,0.145441,-0.011089,3.8308
996,0.829325,0.733858,0.399053,0.111816,-0.037366,3.8340
997,0.833578,0.713950,0.401636,0.125392,-0.007426,3.8092
998,0.825825,0.742529,0.322757,0.105522,-0.030267,3.8584


And the final correlations, averaged across all iterations:


goodness    0.825350
attr_0      0.730286
attr_1      0.364458
attr_2      0.120619
attr_3     -0.000099
ave_dist    3.823793
dtype: float64

## Effects of matchmaking windows

The above experiment is going to be run across several matchmaking windows to determine the effects matchmaking has on the stat: attribute correlations.

In [5]:
window_correlations = {}
for window in (2, 6, 20, 100):
    Params.MATCHMAKING_SPREAD = window
    df = run_monte_carlo(get_correlations)
    window_correlations[window] = df.mean()

df = pd.DataFrame(window_correlations).T
display(df)
display(df.loc[100] - df.loc[6])

Unnamed: 0,goodness,attr_0,attr_1,attr_2,attr_3,ave_dist
2,0.824952,0.729995,0.363286,0.121377,0.001319,1.657931
6,0.825749,0.729809,0.366024,0.121807,-0.000866,3.826063
20,0.828262,0.732283,0.366282,0.122749,0.000132,10.897526
100,0.836003,0.739837,0.367981,0.121474,0.002816,33.665666


goodness     0.010253
attr_0       0.010028
attr_1       0.001957
attr_2      -0.000333
attr_3       0.003682
ave_dist    29.839603
dtype: float64