### Using the stationary distribution of an MDP to find teams' strength

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import math

In [2]:
'''
Calculates the stationary distribution of M using eigenvectors

Args:
    M: the MDP transistion matrix
    
Returns:
    vec: the first eigenvector found
'''
def getStationaryDist(M):
    w,v = np.linalg.eig(M.transpose())
    
    for i in range(0,len(w)):
        if w[i] - 1 < 1e-8 and w[i]-1 > -1e-8:
            vec = v[:,i]
            sumProb = 0
            for j in range(0,len(vec)):
                sumProb += vec[j]
            vec = vec/sumProb
            return vec.astype(float)
            

# Testing getStationaryDist

M = np.zeros((3,3))
M[0][0] = 0.7
M[0][1] = 0.2
M[0][2] = 0.1
M[1][0] = 0.4
M[1][1] = 0.6
M[1][2] = 0.0
M[2][0] = 0.0
M[2][1] = 1.0
M[2][2] = 0.0
M2 = np.zeros((2,2))
M2[0][0] = 0.3
M2[0][1] = 0.7
M2[1][0] = 0.8
M2[1][1] = 0.2
getStationaryDist(M)



array([0.54054054, 0.40540541, 0.05405405])

In [3]:
all_data = pd.read_csv("results.csv")
training_data = all_data.iloc[25000:,:5]
minYear = int(training_data.iloc[0].date.split("-")[0])

'''
Converts a date to an integer

Args:
    date: a date in the form "YYYY-MM-DD"
    
Returns:
    finalDate: an equivalent int to represent the date
    
'''
def preprocessDate(date):
    arr = np.array(date.split("-"))
    arr = arr.astype(int)
    yearDiff = arr[0]-minYear
    finalDate = yearDiff*365+arr[1]*30+arr[2]
    return finalDate

training_data.date = training_data.date.apply(preprocessDate)
maxDate = 6060
print("Number of games: {}".format(training_data.shape[0]))
training_data.head(3)

Number of games: 14669


Unnamed: 0,date,home_team,away_team,home_score,away_score
25000,350,Czech Republic,Sweden,3,3
25001,350,Denmark,Poland,2,0
25002,350,Ecuador,Costa Rica,2,2


In [4]:
teams = training_data.home_team
teams = teams.append(training_data.away_team)
teams = teams.unique()
teams = teams.tolist()
S = len(teams)
print("Numer of unique teams: {}".format(S))

Numer of unique teams: 282


In [5]:
M = np.zeros((S,S))
dateParam = -1*0.0075

'''
Adds a game to the transition matrix M

Args:
    homeIndex: unique index of the home team
    awayIndex: unique index of the away team
    home_score: number of goals of the home team
    away_score: number of goals of the home team
    date: date converted to an integer

Returns:
    None
'''
def modifyMatrix(homeIndex,awayIndex,home_score,away_score,date):
    toAdd = 0
    dateFactor = math.exp(dateParam*float(date-maxDate))
    
    if home_score < away_score:
        M[awayIndex][awayIndex] += 1*dateFactor
        M[homeIndex][awayIndex] += 1*dateFactor
        toAdd += away_score
    elif home_score > away_score:
        M[homeIndex][homeIndex] += 1*dateFactor
        M[awayIndex][homeIndex] += 1*dateFactor
        toAdd += home_score
    if home_score+away_score > 0:
        toAdd = float(toAdd)/(home_score+away_score)
        M[awayIndex][awayIndex] += toAdd*dateFactor
        M[homeIndex][awayIndex] += toAdd*dateFactor
        M[homeIndex][homeIndex] += toAdd*dateFactor
        M[awayIndex][homeIndex] += toAdd*dateFactor

# Add games to the trasition matrix
training_data_len = len(training_data)
for i in range(5000,training_data_len):
    date = training_data.iloc[i].date
    home = training_data.iloc[i].home_team
    away = training_data.iloc[i].away_team
    home_score = training_data.iloc[i].home_score
    away_score = training_data.iloc[i].away_score
    homeIndex = teams.index(home)
    awayIndex = teams.index(away)
    modifyMatrix(homeIndex,awayIndex,home_score,away_score,date)
    
'''
Normalize the given matrix M

Args:
    M: transition matrix

Returns:
    M: normalized transition matrix
'''
def normalizeTransitionMatrix(M):
    for i in range(0,M.shape[0]):
        row = M[i]
        rowSum = np.sum(row)
        if rowSum > 0:
            row /= rowSum
        M[i] = row
    return M
M2 = normalizeTransitionMatrix(M)
stationaryDist = getStationaryDist(M2)



In [6]:
# Sort the final score and display the teams in the order of the strongest to the weakest

sortedDist = np.sort(stationaryDist)
sortedDist = np.flip(sortedDist,axis=0)
toSearch = stationaryDist.tolist()
countryNames = np.chararray(len(teams))
for i in range(0,len(sortedDist)):
    countryIndex = toSearch.index(sortedDist[i])
    countryName = teams[countryIndex]
    print(str(i+1)+": "+countryName)

1: Spain
2: Germany
3: Russia
4: Netherlands
5: Iran
6: England
7: Trinidad and Tobago
8: USA
9: Chile
10: Turkey
11: Croatia
12: Mexico
13: Saudi Arabia
14: Japan
15: Qatar
16: Honduras
17: Portugal
18: France
19: Uruguay
20: Switzerland
21: Costa Rica
22: Sweden
23: Italy
24: Nigeria
25: Bahrain
26: Australia
27: Ukraine
28: Jamaica
29: Venezuela
30: Brazil
31: Poland
32: Czech Republic
33: Paraguay
34: El Salvador
35: Cuba
36: Libya
37: Guatemala
38: Argentina
39: Oman
40: Lithuania
41: Syria
42: Korea DPR
43: Korea Republic
44: India
45: Greece
46: Wales
47: Algeria
48: Bolivia
49: Uzbekistan
50: Panama
51: Romania
52: Bermuda
53: Bosnia-Herzegovina
54: Serbia
55: Padania
56: Iraq
57: Peru
58: China
59: Grenada
60: Ghana
61: Austria
62: Gabon
63: Colombia
64: Ecuador
65: Thailand
66: Haiti
67: South Africa
68: Malawi
69: Jordan
70: Latvia
71: Kuwait
72: Barbados
73: Estonia
74: Ivory Coast
75: Egypt
76: Guadeloupe
77: Maldives
78: United Arab Emirates
79: Burkina Faso
80: Antigua a

Just looking at the teams, you can clearly see that the order is not random and there is a measure of strength.