# Sports Team Ranking
## Analysis
This notebook is used to run the different ranking methods after the data has been processed. <br/>
<br/>
Originally coded by Rhys-Jasper Le√≥n <br/>
Last updated 2025 November 18

In [30]:
# import libraries
import numpy as np
import pandas as pd
import numpy.linalg as la

In [31]:
# set verbose mode
verbose = False

In [32]:
# import processing functions
from processing import *

# process data
season, nrows, ncols = import_league_data('mlb_2025_season.csv', verbose=verbose)
teams, teams_list = import_teams_data('mlb_teams.csv', verbose=verbose)
S, G, W, A = process_game_data(season, teams_list, method='distribute', verbose=verbose)

## Direct Method

In [33]:
# find largest real, positive eigenvalue and corresponding eigenvector
lam, E = la.eig(A)
# print(eigenvalues)
ind = np.argmax(lam.real)
max_lam = lam[ind].real
max_E = E[:, ind].real
print(f'Largest real, positive eigenvalue: {max_lam}')
print(f'Corresponding eigenvector: {max_E}')

Largest real, positive eigenvalue: 80.53689426691702
Corresponding eigenvector: [0.18042291 0.1733005  0.18042964 0.17138287 0.1942651  0.20035883
 0.16884818 0.18817026 0.18240401 0.13879442 0.18857258 0.18618744
 0.18127331 0.16426368 0.19691168 0.17415969 0.19999066 0.17210399
 0.18718459 0.20220568 0.19471476 0.17246904 0.18991156 0.18034351
 0.19162586 0.17646043 0.18798217 0.18987694 0.18903052 0.15876291]


In [34]:
# create ranking vector based on max_E
r_direct = max_E / np.sum(max_E)

# verify that ranking vector sums to 1
print(f'Ranking vector sums to: {np.sum(r_direct)}')

Ranking vector sums to: 1.0


In [35]:
# display final rankings
ranking = pd.DataFrame({'Team': teams_list, 'Score': r_direct})
ranking['Rank'] = ranking['Score'].rank(method='dense', ascending=False).astype(int)
print('Final Team Rankings:')
display(ranking.sort_values(by='Rank').reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Score,Rank
0,NYY,0.037018,1
1,CHC,0.03668,2
2,MIL,0.036612,3
3,LAD,0.036049,4
4,PHI,0.035646,5
5,BOS,0.035564,6
6,SEA,0.035081,7
7,SD,0.034767,8
8,TEX,0.034761,9
9,TOR,0.034606,10


## Non-linear method

In [36]:
# initialize ranking vector
r = np.ones(len(teams_list))

In [37]:
# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

In [38]:
# pre-allocate e matrix
e = pd.DataFrame(0., index=teams_list, columns=teams_list)
# build e matrix from scores
for i in range(len(teams_list)):
    for j in range(len(teams_list)):
        e.iloc[i, j] = (5 + S.iloc[i, j] + (S.iloc[i, j] ** (2/3))) / (5 + S.iloc[j, i] + (S.iloc[j, i] ** (2/3)))

if verbose:
    display(e)

In [39]:
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=teams_list, columns=teams_list)
for i in range(len(teams_list)):
    for j in range(len(teams_list)):
        fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

if verbose:
    display(fmat)

In [40]:
# create ranking vector based on f matrix
r_nonlinear = fmat.sum(axis=1).values / G.sum(axis=1).values

if verbose:
    # print first iteration ranking vector
    print(r_nonlinear)

In [41]:
# put it all together
max_iters = 100
tolerance = 1e-6

# initialize ranking vector
r = np.ones(len(teams_list))
# pre-allocate e matrix
e = pd.DataFrame(0., index=teams_list, columns=teams_list)
# build f matrix using nonlinear function f(x)
fmat = pd.DataFrame(0., index=teams_list, columns=teams_list)

# define continuous monotone increasing function f(x)
def f_inc(x):
    return (0.05*x + x**2) / (2 + 0.05*x + x**2)

for it in range(max_iters):
    # build e matrix from scores
    for i in range(len(teams_list)):
        for j in range(len(teams_list)):
            e.iloc[i, j] = (5 + S.iloc[i, j] + (S.iloc[i, j] ** (2/3))) / (5 + S.iloc[j, i] + (S.iloc[j, i] ** (2/3)))
            fmat.iloc[i, j] = f_inc(e.iloc[i, j] * r[j])

    # create ranking vector based on f matrix
    r_nonlinear = fmat.sum(axis=1).values / G.sum(axis=1).values

    # check for convergence
    if la.norm(r_nonlinear - r, ord=1) < tolerance:
        print(f'Converged after {it + 1} iterations.')
        if verbose:
            print(r_nonlinear)
        break
    
    r = r_nonlinear

    # if max_iters reached without convergence
    if it == max_iters - 1:
        print('Maximum iterations reached without convergence.')

Converged after 5 iterations.


In [42]:
# display final rankings
ranking_nl = pd.DataFrame({'Team': teams_list, 'Score': r_nonlinear})
ranking_nl['Rank'] = ranking_nl['Score'].rank(method='dense', ascending=False).astype(int)
print('Final Team Rankings:')
display(ranking_nl.sort_values(by='Rank').reset_index(drop=True))

Final Team Rankings:


Unnamed: 0,Team,Score,Rank
0,NYY,1.438909e-10,1
1,MIL,1.438435e-10,2
2,LAD,1.427299e-10,3
3,SEA,1.391075e-10,4
4,CHC,1.366013e-10,5
5,PHI,1.362882e-10,6
6,HOU,1.357603e-10,7
7,TOR,1.340176e-10,8
8,TEX,1.328897e-10,9
9,DET,1.297452e-10,10


## Compare rankings

In [43]:
wins = W.sum(axis=1).values
display(wins)

array([80, 76, 76, 75, 89, 92, 60, 83, 88, 43, 87, 87, 82, 72, 93, 79, 97,
       70, 83, 94, 96, 71, 90, 81, 90, 78, 77, 81, 94, 66])

In [44]:
final_rank = pd.DataFrame({'Team': teams_list, 'Wins': wins, 'Direct Score': r_direct, 'Nonlinear Score': r_nonlinear})
final_rank['Wins Rank'] = final_rank['Wins'].rank(method='dense', ascending=False).astype(int)
final_rank['Direct Rank'] = final_rank['Direct Score'].rank(method='dense', ascending=False).astype(int)
final_rank['Nonlinear Rank'] = final_rank['Nonlinear Score'].rank(method='dense', ascending=False).astype(int)
display(final_rank.sort_values(by='Wins Rank', ascending=True).reset_index(drop=True))

Unnamed: 0,Team,Wins,Direct Score,Nonlinear Score,Wins Rank,Direct Rank,Nonlinear Rank
0,MIL,97,0.036612,1.438435e-10,1,3,2
1,PHI,96,0.035646,1.362882e-10,2,5,6
2,TOR,94,0.034606,1.340176e-10,3,10,8
3,NYY,94,0.037018,1.438909e-10,3,1,1
4,LAD,93,0.036049,1.427299e-10,4,4,3
5,CHC,92,0.03668,1.366013e-10,5,2,5
6,SD,90,0.034767,1.205765e-10,6,8,15
7,SEA,90,0.035081,1.391075e-10,6,7,4
8,BOS,89,0.035564,1.286015e-10,7,6,11
9,CLE,88,0.033393,1.087531e-10,8,16,25


In [45]:
result = pd.merge(teams, final_rank, left_on='abbr', right_on='Team')
final_result = result[['team', 'Wins', 'Wins Rank', 'Direct Rank', 'Nonlinear Rank']].sort_values(by='Wins', ascending=False)
final_result.rename(columns={'team': 'Team'}, inplace=True)
display(final_result.style.hide(axis='index'))

Team,Wins,Wins Rank,Direct Rank,Nonlinear Rank
Milwaukee Brewers,97,1,3,2
Philadelphia Phillies,96,2,5,6
Toronto Blue Jays,94,3,10,8
New York Yankees,94,3,1,1
Los Angeles Dodgers,93,4,4,3
Chicago Cubs,92,5,2,5
San Diego Padres,90,6,8,15
Seattle Mariners,90,6,7,4
Boston Red Sox,89,7,6,11
Cleveland Guardians,88,8,16,25
