# Sports Team Ranking
## Analysis
This notebook is used to run the different ranking methods after the data has been processed. <br/>
<br/>
Originally coded by Rhys-Jasper León <br/>
Last updated 2025 November 15

In [1]:
# import libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import numpy.linalg as la
import scipy.stats as stats

In [2]:
# set verbose mode
verbose = False

In [3]:
# import processing functions
from processing import *

# process data
mlb_season, nrows, ncols = import_league_data('mlb_2025_season.csv', verbose=verbose)
mlb_teams, mlb_list = import_teams_data('mlb_teams.csv', verbose=verbose)
mlb_scores, mlb_games, mlb_wins, mlb_A = process_game_data(mlb_season, mlb_list, method='distribute', verbose=verbose)

## Direct Method

In [4]:
# find largest real, positive eigenvalue and corresponding eigenvector
lam, E = la.eig(mlb_A)
# print(eigenvalues)
ind = np.argmax(lam.real)
max_lam = lam[ind].real
max_E = E[:, ind].real
print(f'Largest real, positive eigenvalue: {max_lam}')
print(f'Corresponding eigenvector: {max_E}')

Largest real, positive eigenvalue: 80.53689426691702
Corresponding eigenvector: [0.18042291 0.1733005  0.18042964 0.17138287 0.1942651  0.20035883
 0.16884818 0.18817026 0.18240401 0.13879442 0.18857258 0.18618744
 0.18127331 0.16426368 0.19691168 0.17415969 0.19999066 0.17210399
 0.18718459 0.20220568 0.19471476 0.17246904 0.18991156 0.18034351
 0.19162586 0.17646043 0.18798217 0.18987694 0.18903052 0.15876291]


In [5]:
# create ranking vector based on max_E
r_vec = max_E / np.sum(max_E)

# verify that ranking vector sums to 1
print(f'Ranking vector sums to: {np.sum(r_vec)}')

Ranking vector sums to: 1.0


In [6]:
# display final rankings
rankings = pd.DataFrame({'Team': mlb_list, 'Rank': r_vec})
print('Final Team Rankings:')
display(rankings.sort_values(by='Rank', ascending=False).reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Rank
0,NYY,0.037018
1,CHC,0.03668
2,MIL,0.036612
3,LAD,0.036049
4,PHI,0.035646
5,BOS,0.035564
6,SEA,0.035081
7,SD,0.034767
8,TEX,0.034761
9,TOR,0.034606


## Non-linear method

In [10]:
# initialize ranking vector
r = np.ones(len(mlb_list))

In [None]:
e = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
# build e matrix from scores
for i in range(len(mlb_list)):
    for j in range(len(mlb_list)):
        e.iloc[i, j] = (5 + mlb_scores.iloc[i, j] + (mlb_scores.iloc[i, j] ** (2/3))) / (5 + mlb_scores.iloc[j, i] + (mlb_scores.iloc[j, i] ** (2/3)))

display(e)

abbr,AZ,ATH,ATL,BAL,BOS,CHC,CWS,CIN,CLE,COL,...,PHI,PIT,SD,SF,SEA,STL,TB,TEX,TOR,WSH
abbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AZ,1.0,1.211453,1.05183,1.543747,1.365204,1.021291,1.809718,0.730986,1.0,1.389069,...,0.945014,0.542751,0.844495,0.97181,2.137325,1.478124,1.0,1.148005,0.813462,0.895899
ATH,0.825455,1.0,1.172289,1.416786,0.663729,0.324708,1.556884,1.97811,0.899997,0.955801,...,0.853032,0.361092,1.281293,0.686206,1.018777,1.228877,0.968923,0.673557,0.683826,2.137325
ATL,0.950724,0.853032,1.0,0.707429,1.240406,0.961268,0.925507,1.239571,2.519912,1.382938,...,0.733001,0.765975,0.756907,0.642309,0.350709,0.973302,0.671476,0.485953,0.938254,1.580187
BAL,0.647774,0.705823,1.413569,1.0,0.741039,0.830606,1.568127,0.436951,1.028248,3.048808,...,0.503649,1.392433,1.529133,0.75763,1.485373,0.890751,1.01312,0.694007,1.130903,0.550359
BOS,0.732491,1.50664,0.806188,1.349456,1.0,0.711844,0.914986,1.241807,1.346761,3.14339,...,0.83575,0.687521,1.183601,0.853032,0.892385,1.663378,0.984546,1.0,1.051256,2.45366
CHC,0.979153,3.079688,1.040293,1.20394,1.404802,1.0,1.577613,1.034183,1.566956,1.319906,...,0.812607,1.912808,1.0,0.616399,0.705823,1.877755,1.13131,1.455048,0.813229,1.479225
CWS,0.552572,0.642309,1.080488,0.637703,1.092913,0.633869,1.0,1.0,0.606125,1.420174,...,1.489257,2.954059,0.750905,1.263074,0.656904,0.54425,1.318468,1.120593,0.791909,1.429591
CIN,1.368014,0.505533,0.80673,2.288584,0.805278,0.966947,1.0,1.0,1.040293,1.499222,...,1.306222,1.209532,1.4063,0.8835,0.915455,0.967508,2.273487,2.149565,0.81861,0.862195
CLE,1.0,1.111115,0.396839,0.972528,0.742522,0.63818,1.649825,0.961268,1.0,1.583125,...,0.766459,1.489257,0.313059,1.229666,0.622437,0.376446,1.042096,0.550359,0.778914,1.433105
COL,0.719907,1.046243,0.723098,0.327997,0.318128,0.75763,0.704139,0.667013,0.631662,1.0,...,0.421772,0.67777,0.484471,0.622003,0.456859,1.411999,0.844956,0.46521,0.193265,1.196907


In [26]:
# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

In [27]:
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
for i in range(len(mlb_list)):
    for j in range(len(mlb_list)):
        fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

display(fmat)

abbr,AZ,ATH,ATL,BAL,BOS,CHC,CWS,CIN,CLE,COL,...,PHI,PIT,SD,SF,SEA,STL,TB,TEX,TOR,WSH
abbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AZ,0.344262,0.433137,0.366876,0.551604,0.491358,0.353608,0.627253,0.222059,0.344262,0.499871,...,0.319798,0.138568,0.274152,0.331775,0.700376,0.530379,0.344262,0.407463,0.259915,0.297612
ATH,0.265421,0.344262,0.417398,0.509578,0.191502,0.057347,0.555726,0.667322,0.299473,0.324631,...,0.27806,0.069093,0.460302,0.201657,0.352509,0.440024,0.330489,0.195934,0.200579,0.700376
ATL,0.322358,0.27806,0.344262,0.211303,0.444541,0.327075,0.311019,0.444215,0.764038,0.497697,...,0.222981,0.2381,0.233938,0.181896,0.065653,0.332439,0.194994,0.11522,0.316761,0.562937
BAL,0.184339,0.210571,0.508461,0.344262,0.226662,0.267785,0.559222,0.096157,0.356645,0.825292,...,0.122362,0.501059,0.546969,0.234269,0.532776,0.295272,0.350031,0.205197,0.400387,0.141783
BOS,0.222748,0.539732,0.256574,0.485664,0.344262,0.213315,0.306266,0.445088,0.484683,0.83386,...,0.270144,0.202253,0.421981,0.27806,0.296015,0.587628,0.337432,0.344262,0.366628,0.754394
CHC,0.33504,0.828156,0.361883,0.430146,0.505404,0.344262,0.562147,0.35923,0.558859,0.47481,...,0.259523,0.652444,0.344262,0.170389,0.210571,0.644119,0.400557,0.522664,0.259808,0.530744
CWS,0.142722,0.181896,0.379167,0.179841,0.384446,0.178133,0.344262,0.344262,0.165865,0.510752,...,0.534055,0.816078,0.231184,0.453329,0.188433,0.1392,0.474276,0.396091,0.250014,0.513998
CIN,0.492368,0.12313,0.256823,0.727967,0.256156,0.329609,0.344262,0.344262,0.361883,0.537318,...,0.469711,0.432373,0.505928,0.291972,0.306478,0.329859,0.725366,0.70274,0.262279,0.282251
CLE,0.344262,0.392122,0.081441,0.332095,0.227341,0.180053,0.583716,0.327075,0.344262,0.563837,...,0.238322,0.534055,0.053773,0.440334,0.173058,0.074303,0.362665,0.141783,0.244043,0.515204
COL,0.216995,0.364461,0.218453,0.058372,0.055317,0.234269,0.209804,0.192981,0.177151,0.344262,...,0.090487,0.197838,0.114627,0.172866,0.103767,0.507915,0.274363,0.107016,0.022967,0.427333


In [29]:
# create ranking vector based on f matrix
r_new = fmat.sum(axis=1).values / mlb_games.sum(axis=1).values

print(r_new)

[0.06619413 0.05999924 0.06311984 0.06091583 0.07384491 0.07839072
 0.06140372 0.07232429 0.06218102 0.03773547 0.07207746 0.07438504
 0.06875044 0.05298639 0.08184621 0.06496618 0.07973621 0.05895553
 0.07030567 0.08236792 0.07759    0.06212821 0.06825988 0.06676964
 0.07388712 0.06555654 0.07181427 0.07472284 0.0744461  0.05007965]


In [30]:
# display final rankings
rankings_nl = pd.DataFrame({'Team': mlb_list, 'Rank': r_new})
print('Final Team Rankings:')
display(rankings_nl.sort_values(by='Rank', ascending=False).reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Rank
0,NYY,0.082368
1,LAD,0.081846
2,MIL,0.079736
3,CHC,0.078391
4,PHI,0.07759
5,TEX,0.074723
6,TOR,0.074446
7,HOU,0.074385
8,SEA,0.073887
9,BOS,0.073845


In [31]:
# put it all together
max_iters = 1000
tolerance = 1e-6

# initialize ranking vector
r = np.ones(len(mlb_list))
# pre-allocate e matrix
e = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=mlb_list, columns=mlb_list)

# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

for it in range(max_iters):
    # build e matrix from scores
    for i in range(len(mlb_list)):
        for j in range(len(mlb_list)):
            e.iloc[i, j] = (5 + mlb_scores.iloc[i, j] + (mlb_scores.iloc[i, j] ** (2/3))) / (5 + mlb_scores.iloc[j, i] + (mlb_scores.iloc[j, i] ** (2/3)))
            fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

    # create ranking vector based on f matrix
    r_new = fmat.sum(axis=1).values / mlb_games.sum(axis=1).values

    # check for convergence
    if la.norm(r_new - r, ord=1) < tolerance:
        print(f'Converged after {it} iterations.')
        print(r_new)
        break
    
    r = r_new

    # if max_iters reached without convergence
    if it == max_iters - 1:
        print('Maximum iterations reached without convergence.')

Converged after 4 iterations.
[1.16341069e-10 1.06306704e-10 1.13201066e-10 1.09236329e-10
 1.28601461e-10 1.36601273e-10 1.10403042e-10 1.27744876e-10
 1.08753071e-10 7.35412901e-11 1.29745241e-10 1.35760297e-10
 1.19517494e-10 9.66354599e-11 1.42729943e-10 1.15088137e-10
 1.43843520e-10 1.06789880e-10 1.22583678e-10 1.43890920e-10
 1.36288204e-10 1.13645476e-10 1.20576488e-10 1.18380653e-10
 1.39107517e-10 1.18370948e-10 1.27405877e-10 1.32889652e-10
 1.34017557e-10 9.33910713e-11]


In [32]:
# display final rankings
rankings_nl = pd.DataFrame({'Team': mlb_list, 'Rank': r_new})
print('Final Team Rankings:')
display(rankings_nl.sort_values(by='Rank', ascending=False).reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Rank
0,NYY,1.438909e-10
1,MIL,1.438435e-10
2,LAD,1.427299e-10
3,SEA,1.391075e-10
4,CHC,1.366013e-10
5,PHI,1.362882e-10
6,HOU,1.357603e-10
7,TOR,1.340176e-10
8,TEX,1.328897e-10
9,DET,1.297452e-10
