# Sports Team Ranking
## Analysis
This notebook is used to run the different ranking methods after the data has been processed. <br/>
<br/>
Originally coded by Rhys-Jasper Le√≥n <br/>
Last updated 2025 November 15

In [37]:
# import libraries
import numpy as np
import pandas as pd
import numpy.linalg as la

In [38]:
# set verbose mode
verbose = False

In [39]:
# import processing functions
from processing import *

# process data
mlb_season, nrows, ncols = import_league_data('mlb_2025_season.csv', verbose=verbose)
mlb_teams, mlb_list = import_teams_data('mlb_teams.csv', verbose=verbose)
mlb_scores, mlb_games, mlb_wins, mlb_A = process_game_data(mlb_season, mlb_list, method='distribute', verbose=verbose)

## Direct Method

In [40]:
# find largest real, positive eigenvalue and corresponding eigenvector
lam, E = la.eig(mlb_A)
# print(eigenvalues)
ind = np.argmax(lam.real)
max_lam = lam[ind].real
max_E = E[:, ind].real
print(f'Largest real, positive eigenvalue: {max_lam}')
print(f'Corresponding eigenvector: {max_E}')

Largest real, positive eigenvalue: 80.53689426691702
Corresponding eigenvector: [0.18042291 0.1733005  0.18042964 0.17138287 0.1942651  0.20035883
 0.16884818 0.18817026 0.18240401 0.13879442 0.18857258 0.18618744
 0.18127331 0.16426368 0.19691168 0.17415969 0.19999066 0.17210399
 0.18718459 0.20220568 0.19471476 0.17246904 0.18991156 0.18034351
 0.19162586 0.17646043 0.18798217 0.18987694 0.18903052 0.15876291]


In [41]:
# create ranking vector based on max_E
r_vec = max_E / np.sum(max_E)

# verify that ranking vector sums to 1
print(f'Ranking vector sums to: {np.sum(r_vec)}')

Ranking vector sums to: 1.0


In [42]:
# display final rankings
rankings = pd.DataFrame({'Team': mlb_list, 'Rank': r_vec})
print('Final Team Rankings:')
display(rankings.sort_values(by='Rank', ascending=False).reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Rank
0,NYY,0.037018
1,CHC,0.03668
2,MIL,0.036612
3,LAD,0.036049
4,PHI,0.035646
5,BOS,0.035564
6,SEA,0.035081
7,SD,0.034767
8,TEX,0.034761
9,TOR,0.034606


## Non-linear method

In [43]:
# initialize ranking vector
r = np.ones(len(mlb_list))

In [44]:
# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

In [45]:
# pre-allocate e matrix
e = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
# build e matrix from scores
for i in range(len(mlb_list)):
    for j in range(len(mlb_list)):
        e.iloc[i, j] = (5 + mlb_scores.iloc[i, j] + (mlb_scores.iloc[i, j] ** (2/3))) / (5 + mlb_scores.iloc[j, i] + (mlb_scores.iloc[j, i] ** (2/3)))

if verbose:
    display(e)

In [46]:
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
for i in range(len(mlb_list)):
    for j in range(len(mlb_list)):
        fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

if verbose:
    display(fmat)

In [47]:
# create ranking vector based on f matrix
r_new = fmat.sum(axis=1).values / mlb_games.sum(axis=1).values

if verbose:
    # print first iteration ranking vector
    print(r_new)

In [48]:
# put it all together
max_iters = 1000
tolerance = 1e-6

# initialize ranking vector
r = np.ones(len(mlb_list))
# pre-allocate e matrix
e = pd.DataFrame(0., index=mlb_list, columns=mlb_list)
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=mlb_list, columns=mlb_list)

# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

for it in range(max_iters):
    # build e matrix from scores
    for i in range(len(mlb_list)):
        for j in range(len(mlb_list)):
            e.iloc[i, j] = (5 + mlb_scores.iloc[i, j] + (mlb_scores.iloc[i, j] ** (2/3))) / (5 + mlb_scores.iloc[j, i] + (mlb_scores.iloc[j, i] ** (2/3)))
            fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

    # create ranking vector based on f matrix
    r_new = fmat.sum(axis=1).values / mlb_games.sum(axis=1).values

    # check for convergence
    if la.norm(r_new - r, ord=1) < tolerance:
        print(f'Converged after {it + 1} iterations.')
        if verbose:
            print(r_new)
        break
    
    r = r_new

    # if max_iters reached without convergence
    if it == max_iters - 1:
        print('Maximum iterations reached without convergence.')

Converged after 5 iterations.


In [49]:
# display final rankings
rankings_nl = pd.DataFrame({'Team': mlb_list, 'Rank': r_new})
print('Final Team Rankings:')
display(rankings_nl.sort_values(by='Rank', ascending=False).reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Rank
0,NYY,1.438909e-10
1,MIL,1.438435e-10
2,LAD,1.427299e-10
3,SEA,1.391075e-10
4,CHC,1.366013e-10
5,PHI,1.362882e-10
6,HOU,1.357603e-10
7,TOR,1.340176e-10
8,TEX,1.328897e-10
9,DET,1.297452e-10
