# Markov Method

In [1]:
import pandas as pd
import numpy as np

def get_teams(df):
    return sorted(list(set(df.t1) | set(df.t2)))

def get_stats(df, f1='s1', f2='s2'):
    def get_scores(t1, t2):
        x = df[(df.t1 == t1) & (df.t2 == t2)]
        if x.shape[0] > 0:
            return x.iloc[0][f1], x.iloc[0][f2]

        x = df[(df.t1 == t2) & (df.t2 == t1)]
        if x.shape[0] > 0:
            return x.iloc[0][f2], x.iloc[0][f1]

        return np.nan

    teams = get_teams(df)
    return pd.DataFrame([[get_scores(t1, t2) for t2 in teams] for t1 in teams], index=teams, columns=teams)

fpath = './ranking/acc-2005-ncaaf.csv'
df = get_stats(pd.read_csv(fpath))
df

Unnamed: 0,Duke,Miami,UNC,UVA,VT
Duke,,"(7, 52)","(21, 24)","(7, 38)","(0, 45)"
Miami,"(52, 7)",,"(34, 16)","(25, 17)","(27, 7)"
UNC,"(24, 21)","(16, 34)",,"(7, 5)","(3, 30)"
UVA,"(38, 7)","(17, 25)","(5, 7)",,"(14, 52)"
VT,"(45, 0)","(7, 27)","(30, 3)","(52, 14)",


In [2]:
def adjust(df):
    for r, s in enumerate(df.sum(axis=1)):
        if s == 0:
            df.iloc[r,:] = 1
    return df

def normalize(df):
    return pd.DataFrame([df.iloc[r,:] * s 
                         for r, s in enumerate(1 / df.sum(axis=1))], 
                        index=df.index, columns=df.columns)

def vote_by_loss(df):
    def get_vote(v):
        if pd.isna(v):
            return 0
        s1, s2 = v
        return 1 if s1 < s2 else 0
    
    return pd.DataFrame([[get_vote(r[c]) for c in df.columns] 
                         for _, r in df.iterrows()], 
                        index=df.index, columns=df.columns)

def vote_by_point_differential(df):
    def get_point(v):
        if pd.isna(v):
            return 0
        s1, s2 = v
        diff = s2 - s1
        return 0 if diff < 0 else diff
    
    return pd.DataFrame([[get_point(r[c]) for c in df.columns] 
                         for _, r in df.iterrows()], 
                        index=df.index, columns=df.columns)

def vote_by_opponent_stats(df):
    def get_stats(v):
        if pd.isna(v):
            return 0
        _, s2 = v
        return s2
    
    return pd.DataFrame([[get_stats(r[c]) for c in df.columns] 
                         for _, r in df.iterrows()], 
                        index=df.index, columns=df.columns)

def vote_by_team_stats(df):
    def get_stats(v):
        if pd.isna(v):
            return 0
        s1, _ = v
        return s1
    
    return pd.DataFrame([[get_stats(r[c]) for c in df.columns] 
                         for _, r in df.iterrows()], 
                        index=df.index, columns=df.columns)

def get_stationary_p(X):
    S, U = np.linalg.eig(X.T)
    r = (U[:,np.isclose(S, 1)][:,0] / U[:,np.isclose(S, 1)][:,0].sum()).real
    return pd.Series(r, index=X.index)

In [3]:
get_stationary_p(normalize(adjust(vote_by_loss(df))))

Duke     0.087591
Miami    0.437956
UNC      0.145985
UVA      0.109489
VT       0.218978
dtype: float64

In [4]:
get_stationary_p(normalize(adjust(vote_by_point_differential(df))))

Duke     0.088304
Miami    0.441519
UNC      0.095039
UVA      0.110380
VT       0.264757
dtype: float64

In [5]:
S_p = normalize(adjust(vote_by_opponent_stats(df)))

S_y = normalize(
        adjust(
            vote_by_opponent_stats(
                get_stats(pd.read_csv(fpath), f1='y1', f2='y2'))))

S_t = normalize(
        adjust(
            vote_by_team_stats(
                get_stats(pd.read_csv(fpath), f1='to1', f2='to2'))))

S_poss = normalize(
        adjust(
            vote_by_team_stats(
                get_stats(pd.read_csv(fpath), f1='p1', f2='p2'))))

In [6]:
get_stationary_p(S_p)

Duke     0.095384
Miami    0.296301
UNC      0.148504
UVA      0.215845
VT       0.243967
dtype: float64

In [7]:
get_stationary_p(S_y)

Duke     0.104596
Miami    0.248631
UNC      0.169855
UVA      0.259757
VT       0.217161
dtype: float64

In [8]:
get_stationary_p(S_t)

Duke     0.189286
Miami    0.241413
UNC      0.212132
UVA      0.124894
VT       0.232275
dtype: float64

In [9]:
get_stationary_p(S_poss)

Duke     0.201862
Miami    0.197136
UNC      0.208133
UVA      0.187274
VT       0.205595
dtype: float64

In [10]:
a = 1 / 4
S = (a * S_p) + (a * S_y) + (a * S_t) + (a * S_poss)
S

Unnamed: 0,Duke,Miami,UNC,UVA,VT
Duke,0.0,0.246596,0.202874,0.227091,0.323439
Miami,0.209417,0.0,0.321459,0.305492,0.163632
UNC,0.243919,0.32989,0.0,0.157833,0.268358
UVA,0.163728,0.2054,0.174977,0.0,0.455895
VT,0.100524,0.465297,0.18626,0.247918,0.0


In [11]:
get_stationary_p(S)

Duke     0.150692
Miami    0.243539
UNC      0.185036
UVA      0.194029
VT       0.226703
dtype: float64